feat: add builder mode for centralized Nix builds
Add a new "builder" capability to trigger Nix builds on a dedicated build host via NATS messaging. This allows pre-building NixOS configurations before deployment. New components: - Builder mode: subscribes to build.<repo>.* subjects, executes nix build - Build CLI command: triggers builds with progress tracking - MCP build tool: available with --enable-builds flag - Builder metrics: tracks build success/failure per repo and host - NixOS module: services.homelab-deploy.builder The builder uses a YAML config file to define allowed repositories with their URLs and default branches. Builds can target all hosts or specific hosts, with real-time progress updates. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
323
internal/builder/builder.go
Normal file
323
internal/builder/builder.go
Normal file
@@ -0,0 +1,323 @@
|
||||
package builder
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/metrics"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/nats"
|
||||
)
|
||||
|
||||
// BuilderConfig holds the configuration for the builder.
|
||||
type BuilderConfig struct {
|
||||
NATSUrl string
|
||||
NKeyFile string
|
||||
ConfigFile string
|
||||
Timeout time.Duration
|
||||
MetricsEnabled bool
|
||||
MetricsAddr string
|
||||
}
|
||||
|
||||
// Builder handles build requests from NATS.
|
||||
type Builder struct {
|
||||
cfg BuilderConfig
|
||||
repoCfg *Config
|
||||
client *nats.Client
|
||||
executor *Executor
|
||||
lock sync.Mutex
|
||||
busy bool
|
||||
logger *slog.Logger
|
||||
|
||||
// metrics server and collector (nil if metrics disabled)
|
||||
metricsServer *metrics.Server
|
||||
metrics *metrics.BuildCollector
|
||||
}
|
||||
|
||||
// New creates a new builder with the given configuration.
|
||||
func New(cfg BuilderConfig, repoCfg *Config, logger *slog.Logger) *Builder {
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
b := &Builder{
|
||||
cfg: cfg,
|
||||
repoCfg: repoCfg,
|
||||
executor: NewExecutor(cfg.Timeout),
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
if cfg.MetricsEnabled {
|
||||
b.metricsServer = metrics.NewServer(metrics.ServerConfig{
|
||||
Addr: cfg.MetricsAddr,
|
||||
Logger: logger,
|
||||
})
|
||||
b.metrics = metrics.NewBuildCollector(b.metricsServer.Registry())
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// Run starts the builder and blocks until the context is cancelled.
|
||||
func (b *Builder) Run(ctx context.Context) error {
|
||||
// Start metrics server if enabled
|
||||
if b.metricsServer != nil {
|
||||
if err := b.metricsServer.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start metrics server: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
_ = b.metricsServer.Shutdown(shutdownCtx)
|
||||
}()
|
||||
}
|
||||
|
||||
// Connect to NATS
|
||||
b.logger.Info("connecting to NATS", "url", b.cfg.NATSUrl)
|
||||
|
||||
client, err := nats.Connect(nats.Config{
|
||||
URL: b.cfg.NATSUrl,
|
||||
NKeyFile: b.cfg.NKeyFile,
|
||||
Name: "homelab-deploy-builder",
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
b.client = client
|
||||
defer b.client.Close()
|
||||
|
||||
b.logger.Info("connected to NATS")
|
||||
|
||||
// Subscribe to build subjects for each repo
|
||||
for repoName := range b.repoCfg.Repos {
|
||||
// Subscribe to build.<repo>.all and build.<repo>.<hostname>
|
||||
allSubject := fmt.Sprintf("build.%s.*", repoName)
|
||||
b.logger.Info("subscribing to build subject", "subject", allSubject)
|
||||
if _, err := b.client.Subscribe(allSubject, b.handleBuildRequest); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to %s: %w", allSubject, err)
|
||||
}
|
||||
}
|
||||
|
||||
b.logger.Info("builder started", "repos", len(b.repoCfg.Repos))
|
||||
|
||||
// Wait for context cancellation
|
||||
<-ctx.Done()
|
||||
b.logger.Info("shutting down builder")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Builder) handleBuildRequest(subject string, data []byte) {
|
||||
req, err := messages.UnmarshalBuildRequest(data)
|
||||
if err != nil {
|
||||
b.logger.Error("failed to unmarshal build request",
|
||||
"subject", subject,
|
||||
"error", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
b.logger.Info("received build request",
|
||||
"subject", subject,
|
||||
"repo", req.Repo,
|
||||
"target", req.Target,
|
||||
"branch", req.Branch,
|
||||
"reply_to", req.ReplyTo,
|
||||
)
|
||||
|
||||
// Validate request
|
||||
if err := req.Validate(); err != nil {
|
||||
b.logger.Warn("invalid build request", "error", err)
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusRejected,
|
||||
err.Error(),
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
// Get repo config
|
||||
repo, err := b.repoCfg.GetRepo(req.Repo)
|
||||
if err != nil {
|
||||
b.logger.Warn("unknown repo", "repo", req.Repo)
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusRejected,
|
||||
fmt.Sprintf("unknown repo: %s", req.Repo),
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
// Try to acquire lock
|
||||
b.lock.Lock()
|
||||
if b.busy {
|
||||
b.lock.Unlock()
|
||||
b.logger.Warn("build already in progress")
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusRejected,
|
||||
"another build is already in progress",
|
||||
))
|
||||
return
|
||||
}
|
||||
b.busy = true
|
||||
b.lock.Unlock()
|
||||
|
||||
defer func() {
|
||||
b.lock.Lock()
|
||||
b.busy = false
|
||||
b.lock.Unlock()
|
||||
}()
|
||||
|
||||
// Use default branch if not specified
|
||||
branch := req.Branch
|
||||
if branch == "" {
|
||||
branch = repo.DefaultBranch
|
||||
}
|
||||
|
||||
// Determine hosts to build
|
||||
var hosts []string
|
||||
if req.Target == "all" {
|
||||
// List hosts from flake
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusStarted,
|
||||
"discovering hosts...",
|
||||
))
|
||||
|
||||
hosts, err = b.executor.ListHosts(context.Background(), repo.URL, branch)
|
||||
if err != nil {
|
||||
b.logger.Error("failed to list hosts", "error", err)
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusFailed,
|
||||
fmt.Sprintf("failed to list hosts: %v", err),
|
||||
).WithError(err.Error()))
|
||||
if b.metrics != nil {
|
||||
b.metrics.RecordBuildFailure(req.Repo, "")
|
||||
}
|
||||
return
|
||||
}
|
||||
// Sort hosts for consistent ordering
|
||||
sort.Strings(hosts)
|
||||
} else {
|
||||
hosts = []string{req.Target}
|
||||
}
|
||||
|
||||
if len(hosts) == 0 {
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusFailed,
|
||||
"no hosts to build",
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
// Send started response
|
||||
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
|
||||
Status: messages.BuildStatusStarted,
|
||||
Message: fmt.Sprintf("building %d host(s)", len(hosts)),
|
||||
HostsTotal: len(hosts),
|
||||
})
|
||||
|
||||
// Build each host sequentially
|
||||
startTime := time.Now()
|
||||
results := make([]messages.BuildHostResult, 0, len(hosts))
|
||||
succeeded := 0
|
||||
failed := 0
|
||||
|
||||
for i, host := range hosts {
|
||||
hostStart := time.Now()
|
||||
b.logger.Info("building host",
|
||||
"host", host,
|
||||
"progress", fmt.Sprintf("%d/%d", i+1, len(hosts)),
|
||||
"command", b.executor.BuildCommand(repo.URL, branch, host),
|
||||
)
|
||||
|
||||
result := b.executor.Build(context.Background(), repo.URL, branch, host)
|
||||
hostDuration := time.Since(hostStart).Seconds()
|
||||
|
||||
hostResult := messages.BuildHostResult{
|
||||
Host: host,
|
||||
Success: result.Success,
|
||||
DurationSeconds: hostDuration,
|
||||
}
|
||||
if !result.Success {
|
||||
hostResult.Error = result.Stderr
|
||||
if hostResult.Error == "" && result.Error != nil {
|
||||
hostResult.Error = result.Error.Error()
|
||||
}
|
||||
}
|
||||
results = append(results, hostResult)
|
||||
|
||||
if result.Success {
|
||||
succeeded++
|
||||
b.logger.Info("host build succeeded", "host", host, "duration", hostDuration)
|
||||
if b.metrics != nil {
|
||||
b.metrics.RecordHostBuildSuccess(req.Repo, host, hostDuration)
|
||||
}
|
||||
} else {
|
||||
failed++
|
||||
b.logger.Error("host build failed", "host", host, "error", hostResult.Error)
|
||||
if b.metrics != nil {
|
||||
b.metrics.RecordHostBuildFailure(req.Repo, host, hostDuration)
|
||||
}
|
||||
}
|
||||
|
||||
// Send progress update
|
||||
success := result.Success
|
||||
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
|
||||
Status: messages.BuildStatusProgress,
|
||||
Host: host,
|
||||
HostSuccess: &success,
|
||||
HostsCompleted: i + 1,
|
||||
HostsTotal: len(hosts),
|
||||
})
|
||||
}
|
||||
|
||||
totalDuration := time.Since(startTime).Seconds()
|
||||
|
||||
// Send final response
|
||||
status := messages.BuildStatusCompleted
|
||||
message := fmt.Sprintf("built %d/%d hosts successfully", succeeded, len(hosts))
|
||||
if failed > 0 {
|
||||
status = messages.BuildStatusFailed
|
||||
message = fmt.Sprintf("build failed: %d/%d hosts failed", failed, len(hosts))
|
||||
}
|
||||
|
||||
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
|
||||
Status: status,
|
||||
Message: message,
|
||||
Results: results,
|
||||
TotalDurationSeconds: totalDuration,
|
||||
Succeeded: succeeded,
|
||||
Failed: failed,
|
||||
})
|
||||
|
||||
// Record overall build metrics
|
||||
if b.metrics != nil {
|
||||
if failed == 0 {
|
||||
b.metrics.RecordBuildSuccess(req.Repo)
|
||||
} else {
|
||||
b.metrics.RecordBuildFailure(req.Repo, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) sendResponse(replyTo string, resp *messages.BuildResponse) {
|
||||
data, err := resp.Marshal()
|
||||
if err != nil {
|
||||
b.logger.Error("failed to marshal build response", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := b.client.Publish(replyTo, data); err != nil {
|
||||
b.logger.Error("failed to publish build response",
|
||||
"reply_to", replyTo,
|
||||
"error", err,
|
||||
)
|
||||
}
|
||||
|
||||
// Flush to ensure response is sent immediately
|
||||
if err := b.client.Flush(); err != nil {
|
||||
b.logger.Error("failed to flush", "error", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user