feat: add builder mode for centralized Nix builds
Add a new "builder" capability to trigger Nix builds on a dedicated build host via NATS messaging. This allows pre-building NixOS configurations before deployment. New components: - Builder mode: subscribes to build.<repo>.* subjects, executes nix build - Build CLI command: triggers builds with progress tracking - MCP build tool: available with --enable-builds flag - Builder metrics: tracks build success/failure per repo and host - NixOS module: services.homelab-deploy.builder The builder uses a YAML config file to define allowed repositories with their URLs and default branches. Builds can target all hosts or specific hosts, with real-time progress updates. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
323
internal/builder/builder.go
Normal file
323
internal/builder/builder.go
Normal file
@@ -0,0 +1,323 @@
|
||||
package builder
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/metrics"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/nats"
|
||||
)
|
||||
|
||||
// BuilderConfig holds the configuration for the builder.
|
||||
type BuilderConfig struct {
|
||||
NATSUrl string
|
||||
NKeyFile string
|
||||
ConfigFile string
|
||||
Timeout time.Duration
|
||||
MetricsEnabled bool
|
||||
MetricsAddr string
|
||||
}
|
||||
|
||||
// Builder handles build requests from NATS.
|
||||
type Builder struct {
|
||||
cfg BuilderConfig
|
||||
repoCfg *Config
|
||||
client *nats.Client
|
||||
executor *Executor
|
||||
lock sync.Mutex
|
||||
busy bool
|
||||
logger *slog.Logger
|
||||
|
||||
// metrics server and collector (nil if metrics disabled)
|
||||
metricsServer *metrics.Server
|
||||
metrics *metrics.BuildCollector
|
||||
}
|
||||
|
||||
// New creates a new builder with the given configuration.
|
||||
func New(cfg BuilderConfig, repoCfg *Config, logger *slog.Logger) *Builder {
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
b := &Builder{
|
||||
cfg: cfg,
|
||||
repoCfg: repoCfg,
|
||||
executor: NewExecutor(cfg.Timeout),
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
if cfg.MetricsEnabled {
|
||||
b.metricsServer = metrics.NewServer(metrics.ServerConfig{
|
||||
Addr: cfg.MetricsAddr,
|
||||
Logger: logger,
|
||||
})
|
||||
b.metrics = metrics.NewBuildCollector(b.metricsServer.Registry())
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// Run starts the builder and blocks until the context is cancelled.
|
||||
func (b *Builder) Run(ctx context.Context) error {
|
||||
// Start metrics server if enabled
|
||||
if b.metricsServer != nil {
|
||||
if err := b.metricsServer.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start metrics server: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
_ = b.metricsServer.Shutdown(shutdownCtx)
|
||||
}()
|
||||
}
|
||||
|
||||
// Connect to NATS
|
||||
b.logger.Info("connecting to NATS", "url", b.cfg.NATSUrl)
|
||||
|
||||
client, err := nats.Connect(nats.Config{
|
||||
URL: b.cfg.NATSUrl,
|
||||
NKeyFile: b.cfg.NKeyFile,
|
||||
Name: "homelab-deploy-builder",
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
b.client = client
|
||||
defer b.client.Close()
|
||||
|
||||
b.logger.Info("connected to NATS")
|
||||
|
||||
// Subscribe to build subjects for each repo
|
||||
for repoName := range b.repoCfg.Repos {
|
||||
// Subscribe to build.<repo>.all and build.<repo>.<hostname>
|
||||
allSubject := fmt.Sprintf("build.%s.*", repoName)
|
||||
b.logger.Info("subscribing to build subject", "subject", allSubject)
|
||||
if _, err := b.client.Subscribe(allSubject, b.handleBuildRequest); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to %s: %w", allSubject, err)
|
||||
}
|
||||
}
|
||||
|
||||
b.logger.Info("builder started", "repos", len(b.repoCfg.Repos))
|
||||
|
||||
// Wait for context cancellation
|
||||
<-ctx.Done()
|
||||
b.logger.Info("shutting down builder")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Builder) handleBuildRequest(subject string, data []byte) {
|
||||
req, err := messages.UnmarshalBuildRequest(data)
|
||||
if err != nil {
|
||||
b.logger.Error("failed to unmarshal build request",
|
||||
"subject", subject,
|
||||
"error", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
b.logger.Info("received build request",
|
||||
"subject", subject,
|
||||
"repo", req.Repo,
|
||||
"target", req.Target,
|
||||
"branch", req.Branch,
|
||||
"reply_to", req.ReplyTo,
|
||||
)
|
||||
|
||||
// Validate request
|
||||
if err := req.Validate(); err != nil {
|
||||
b.logger.Warn("invalid build request", "error", err)
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusRejected,
|
||||
err.Error(),
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
// Get repo config
|
||||
repo, err := b.repoCfg.GetRepo(req.Repo)
|
||||
if err != nil {
|
||||
b.logger.Warn("unknown repo", "repo", req.Repo)
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusRejected,
|
||||
fmt.Sprintf("unknown repo: %s", req.Repo),
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
// Try to acquire lock
|
||||
b.lock.Lock()
|
||||
if b.busy {
|
||||
b.lock.Unlock()
|
||||
b.logger.Warn("build already in progress")
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusRejected,
|
||||
"another build is already in progress",
|
||||
))
|
||||
return
|
||||
}
|
||||
b.busy = true
|
||||
b.lock.Unlock()
|
||||
|
||||
defer func() {
|
||||
b.lock.Lock()
|
||||
b.busy = false
|
||||
b.lock.Unlock()
|
||||
}()
|
||||
|
||||
// Use default branch if not specified
|
||||
branch := req.Branch
|
||||
if branch == "" {
|
||||
branch = repo.DefaultBranch
|
||||
}
|
||||
|
||||
// Determine hosts to build
|
||||
var hosts []string
|
||||
if req.Target == "all" {
|
||||
// List hosts from flake
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusStarted,
|
||||
"discovering hosts...",
|
||||
))
|
||||
|
||||
hosts, err = b.executor.ListHosts(context.Background(), repo.URL, branch)
|
||||
if err != nil {
|
||||
b.logger.Error("failed to list hosts", "error", err)
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusFailed,
|
||||
fmt.Sprintf("failed to list hosts: %v", err),
|
||||
).WithError(err.Error()))
|
||||
if b.metrics != nil {
|
||||
b.metrics.RecordBuildFailure(req.Repo, "")
|
||||
}
|
||||
return
|
||||
}
|
||||
// Sort hosts for consistent ordering
|
||||
sort.Strings(hosts)
|
||||
} else {
|
||||
hosts = []string{req.Target}
|
||||
}
|
||||
|
||||
if len(hosts) == 0 {
|
||||
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
|
||||
messages.BuildStatusFailed,
|
||||
"no hosts to build",
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
// Send started response
|
||||
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
|
||||
Status: messages.BuildStatusStarted,
|
||||
Message: fmt.Sprintf("building %d host(s)", len(hosts)),
|
||||
HostsTotal: len(hosts),
|
||||
})
|
||||
|
||||
// Build each host sequentially
|
||||
startTime := time.Now()
|
||||
results := make([]messages.BuildHostResult, 0, len(hosts))
|
||||
succeeded := 0
|
||||
failed := 0
|
||||
|
||||
for i, host := range hosts {
|
||||
hostStart := time.Now()
|
||||
b.logger.Info("building host",
|
||||
"host", host,
|
||||
"progress", fmt.Sprintf("%d/%d", i+1, len(hosts)),
|
||||
"command", b.executor.BuildCommand(repo.URL, branch, host),
|
||||
)
|
||||
|
||||
result := b.executor.Build(context.Background(), repo.URL, branch, host)
|
||||
hostDuration := time.Since(hostStart).Seconds()
|
||||
|
||||
hostResult := messages.BuildHostResult{
|
||||
Host: host,
|
||||
Success: result.Success,
|
||||
DurationSeconds: hostDuration,
|
||||
}
|
||||
if !result.Success {
|
||||
hostResult.Error = result.Stderr
|
||||
if hostResult.Error == "" && result.Error != nil {
|
||||
hostResult.Error = result.Error.Error()
|
||||
}
|
||||
}
|
||||
results = append(results, hostResult)
|
||||
|
||||
if result.Success {
|
||||
succeeded++
|
||||
b.logger.Info("host build succeeded", "host", host, "duration", hostDuration)
|
||||
if b.metrics != nil {
|
||||
b.metrics.RecordHostBuildSuccess(req.Repo, host, hostDuration)
|
||||
}
|
||||
} else {
|
||||
failed++
|
||||
b.logger.Error("host build failed", "host", host, "error", hostResult.Error)
|
||||
if b.metrics != nil {
|
||||
b.metrics.RecordHostBuildFailure(req.Repo, host, hostDuration)
|
||||
}
|
||||
}
|
||||
|
||||
// Send progress update
|
||||
success := result.Success
|
||||
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
|
||||
Status: messages.BuildStatusProgress,
|
||||
Host: host,
|
||||
HostSuccess: &success,
|
||||
HostsCompleted: i + 1,
|
||||
HostsTotal: len(hosts),
|
||||
})
|
||||
}
|
||||
|
||||
totalDuration := time.Since(startTime).Seconds()
|
||||
|
||||
// Send final response
|
||||
status := messages.BuildStatusCompleted
|
||||
message := fmt.Sprintf("built %d/%d hosts successfully", succeeded, len(hosts))
|
||||
if failed > 0 {
|
||||
status = messages.BuildStatusFailed
|
||||
message = fmt.Sprintf("build failed: %d/%d hosts failed", failed, len(hosts))
|
||||
}
|
||||
|
||||
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
|
||||
Status: status,
|
||||
Message: message,
|
||||
Results: results,
|
||||
TotalDurationSeconds: totalDuration,
|
||||
Succeeded: succeeded,
|
||||
Failed: failed,
|
||||
})
|
||||
|
||||
// Record overall build metrics
|
||||
if b.metrics != nil {
|
||||
if failed == 0 {
|
||||
b.metrics.RecordBuildSuccess(req.Repo)
|
||||
} else {
|
||||
b.metrics.RecordBuildFailure(req.Repo, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) sendResponse(replyTo string, resp *messages.BuildResponse) {
|
||||
data, err := resp.Marshal()
|
||||
if err != nil {
|
||||
b.logger.Error("failed to marshal build response", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := b.client.Publish(replyTo, data); err != nil {
|
||||
b.logger.Error("failed to publish build response",
|
||||
"reply_to", replyTo,
|
||||
"error", err,
|
||||
)
|
||||
}
|
||||
|
||||
// Flush to ensure response is sent immediately
|
||||
if err := b.client.Flush(); err != nil {
|
||||
b.logger.Error("failed to flush", "error", err)
|
||||
}
|
||||
}
|
||||
65
internal/builder/config.go
Normal file
65
internal/builder/config.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package builder
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// RepoConfig holds configuration for a single repository.
|
||||
type RepoConfig struct {
|
||||
URL string `yaml:"url"`
|
||||
DefaultBranch string `yaml:"default_branch"`
|
||||
}
|
||||
|
||||
// Config holds the builder configuration.
|
||||
type Config struct {
|
||||
Repos map[string]RepoConfig `yaml:"repos"`
|
||||
}
|
||||
|
||||
// LoadConfig loads configuration from a YAML file.
|
||||
func LoadConfig(path string) (*Config, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read config file: %w", err)
|
||||
}
|
||||
|
||||
var cfg Config
|
||||
if err := yaml.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse config file: %w", err)
|
||||
}
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// Validate checks that the configuration is valid.
|
||||
func (c *Config) Validate() error {
|
||||
if len(c.Repos) == 0 {
|
||||
return fmt.Errorf("no repos configured")
|
||||
}
|
||||
|
||||
for name, repo := range c.Repos {
|
||||
if repo.URL == "" {
|
||||
return fmt.Errorf("repo %q: url is required", name)
|
||||
}
|
||||
if repo.DefaultBranch == "" {
|
||||
return fmt.Errorf("repo %q: default_branch is required", name)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetRepo returns the configuration for a repository, or an error if not found.
|
||||
func (c *Config) GetRepo(name string) (*RepoConfig, error) {
|
||||
repo, ok := c.Repos[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("repo %q not found in configuration", name)
|
||||
}
|
||||
return &repo, nil
|
||||
}
|
||||
116
internal/builder/executor.go
Normal file
116
internal/builder/executor.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package builder
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Executor handles the execution of nix build commands.
|
||||
type Executor struct {
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewExecutor creates a new build executor.
|
||||
func NewExecutor(timeout time.Duration) *Executor {
|
||||
return &Executor{
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// BuildResult contains the result of a build execution.
|
||||
type BuildResult struct {
|
||||
Success bool
|
||||
ExitCode int
|
||||
Stdout string
|
||||
Stderr string
|
||||
Error error
|
||||
}
|
||||
|
||||
// FlakeShowResult contains the parsed output of nix flake show.
|
||||
type FlakeShowResult struct {
|
||||
NixosConfigurations map[string]any `json:"nixosConfigurations"`
|
||||
}
|
||||
|
||||
// ListHosts returns the list of hosts (nixosConfigurations) available in a flake.
|
||||
func (e *Executor) ListHosts(ctx context.Context, flakeURL, branch string) ([]string, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
flakeRef := fmt.Sprintf("%s?ref=%s", flakeURL, branch)
|
||||
cmd := exec.CommandContext(ctx, "nix", "flake", "show", "--json", flakeRef)
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
return nil, fmt.Errorf("timeout listing hosts")
|
||||
}
|
||||
return nil, fmt.Errorf("failed to list hosts: %w\n%s", err, stderr.String())
|
||||
}
|
||||
|
||||
var result FlakeShowResult
|
||||
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse flake show output: %w", err)
|
||||
}
|
||||
|
||||
hosts := make([]string, 0, len(result.NixosConfigurations))
|
||||
for host := range result.NixosConfigurations {
|
||||
hosts = append(hosts, host)
|
||||
}
|
||||
|
||||
return hosts, nil
|
||||
}
|
||||
|
||||
// Build builds a single host's system configuration.
|
||||
func (e *Executor) Build(ctx context.Context, flakeURL, branch, host string) *BuildResult {
|
||||
ctx, cancel := context.WithTimeout(ctx, e.timeout)
|
||||
defer cancel()
|
||||
|
||||
// Build the flake reference for the system toplevel
|
||||
flakeRef := fmt.Sprintf("%s?ref=%s#nixosConfigurations.%s.config.system.build.toplevel", flakeURL, branch, host)
|
||||
|
||||
cmd := exec.CommandContext(ctx, "nix", "build", "--no-link", flakeRef)
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
err := cmd.Run()
|
||||
|
||||
result := &BuildResult{
|
||||
Stdout: stdout.String(),
|
||||
Stderr: stderr.String(),
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
result.Success = false
|
||||
result.Error = err
|
||||
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
result.Error = fmt.Errorf("build timed out after %v", e.timeout)
|
||||
}
|
||||
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
result.ExitCode = exitErr.ExitCode()
|
||||
} else {
|
||||
result.ExitCode = -1
|
||||
}
|
||||
} else {
|
||||
result.Success = true
|
||||
result.ExitCode = 0
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// BuildCommand returns the command that would be executed (for logging/debugging).
|
||||
func (e *Executor) BuildCommand(flakeURL, branch, host string) string {
|
||||
flakeRef := fmt.Sprintf("%s?ref=%s#nixosConfigurations.%s.config.system.build.toplevel", flakeURL, branch, host)
|
||||
return fmt.Sprintf("nix build --no-link %s", flakeRef)
|
||||
}
|
||||
140
internal/cli/build.go
Normal file
140
internal/cli/build.go
Normal file
@@ -0,0 +1,140 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/nats"
|
||||
)
|
||||
|
||||
// BuildConfig holds configuration for a build operation.
|
||||
type BuildConfig struct {
|
||||
NATSUrl string
|
||||
NKeyFile string
|
||||
Repo string
|
||||
Target string
|
||||
Branch string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// BuildResult contains the aggregated results from a build.
|
||||
type BuildResult struct {
|
||||
Responses []*messages.BuildResponse
|
||||
FinalResponse *messages.BuildResponse
|
||||
Errors []error
|
||||
}
|
||||
|
||||
// AllSucceeded returns true if the build completed successfully.
|
||||
func (r *BuildResult) AllSucceeded() bool {
|
||||
if len(r.Errors) > 0 {
|
||||
return false
|
||||
}
|
||||
if r.FinalResponse == nil {
|
||||
return false
|
||||
}
|
||||
return r.FinalResponse.Status == messages.BuildStatusCompleted && r.FinalResponse.Failed == 0
|
||||
}
|
||||
|
||||
// MarshalJSON returns the JSON representation of the build result.
|
||||
func (r *BuildResult) MarshalJSON() ([]byte, error) {
|
||||
if r.FinalResponse != nil {
|
||||
return json.Marshal(r.FinalResponse)
|
||||
}
|
||||
return json.Marshal(map[string]any{
|
||||
"status": "unknown",
|
||||
"responses": r.Responses,
|
||||
"errors": r.Errors,
|
||||
})
|
||||
}
|
||||
|
||||
// Build triggers a build and collects responses.
|
||||
func Build(ctx context.Context, cfg BuildConfig, onResponse func(*messages.BuildResponse)) (*BuildResult, error) {
|
||||
// Connect to NATS
|
||||
client, err := nats.Connect(nats.Config{
|
||||
URL: cfg.NATSUrl,
|
||||
NKeyFile: cfg.NKeyFile,
|
||||
Name: "homelab-deploy-build-cli",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Generate unique reply subject
|
||||
requestID := uuid.New().String()
|
||||
replySubject := fmt.Sprintf("build.responses.%s", requestID)
|
||||
|
||||
var mu sync.Mutex
|
||||
result := &BuildResult{}
|
||||
done := make(chan struct{})
|
||||
|
||||
// Subscribe to reply subject
|
||||
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
|
||||
resp, err := messages.UnmarshalBuildResponse(data)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
result.Errors = append(result.Errors, fmt.Errorf("failed to unmarshal response: %w", err))
|
||||
mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
result.Responses = append(result.Responses, resp)
|
||||
if resp.Status.IsFinal() {
|
||||
result.FinalResponse = resp
|
||||
select {
|
||||
case <-done:
|
||||
default:
|
||||
close(done)
|
||||
}
|
||||
}
|
||||
mu.Unlock()
|
||||
|
||||
if onResponse != nil {
|
||||
onResponse(resp)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to subscribe to reply subject: %w", err)
|
||||
}
|
||||
defer func() { _ = sub.Unsubscribe() }()
|
||||
|
||||
// Build and send request
|
||||
req := &messages.BuildRequest{
|
||||
Repo: cfg.Repo,
|
||||
Target: cfg.Target,
|
||||
Branch: cfg.Branch,
|
||||
ReplyTo: replySubject,
|
||||
}
|
||||
|
||||
data, err := req.Marshal()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
// Publish to build.<repo>.<target>
|
||||
buildSubject := fmt.Sprintf("build.%s.%s", cfg.Repo, cfg.Target)
|
||||
if err := client.Publish(buildSubject, data); err != nil {
|
||||
return nil, fmt.Errorf("failed to publish request: %w", err)
|
||||
}
|
||||
|
||||
if err := client.Flush(); err != nil {
|
||||
return nil, fmt.Errorf("failed to flush: %w", err)
|
||||
}
|
||||
|
||||
// Wait for final response or timeout
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return result, ctx.Err()
|
||||
case <-done:
|
||||
return result, nil
|
||||
case <-time.After(cfg.Timeout):
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
109
internal/mcp/build_tools.go
Normal file
109
internal/mcp/build_tools.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package mcp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/mark3labs/mcp-go/mcp"
|
||||
|
||||
deploycli "git.t-juice.club/torjus/homelab-deploy/internal/cli"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
)
|
||||
|
||||
// BuildTool creates the build tool definition.
|
||||
func BuildTool() mcp.Tool {
|
||||
return mcp.NewTool(
|
||||
"build",
|
||||
mcp.WithDescription("Trigger a Nix build on the build server"),
|
||||
mcp.WithString("repo",
|
||||
mcp.Required(),
|
||||
mcp.Description("Repository name (must match builder config)"),
|
||||
),
|
||||
mcp.WithString("target",
|
||||
mcp.Description("Target hostname, or omit to build all hosts"),
|
||||
),
|
||||
mcp.WithBoolean("all",
|
||||
mcp.Description("Build all hosts in the repository (default if no target specified)"),
|
||||
),
|
||||
mcp.WithString("branch",
|
||||
mcp.Description("Git branch to build (uses repo default if not specified)"),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// HandleBuild handles the build tool.
|
||||
func (h *ToolHandler) HandleBuild(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
repo, err := request.RequireString("repo")
|
||||
if err != nil {
|
||||
return mcp.NewToolResultError("repo is required"), nil
|
||||
}
|
||||
|
||||
target := request.GetString("target", "")
|
||||
all := request.GetBool("all", false)
|
||||
branch := request.GetString("branch", "")
|
||||
|
||||
// Default to "all" if no target specified
|
||||
if target == "" {
|
||||
if !all {
|
||||
all = true
|
||||
}
|
||||
target = "all"
|
||||
}
|
||||
if all && target != "all" {
|
||||
return mcp.NewToolResultError("cannot specify both target and all"), nil
|
||||
}
|
||||
|
||||
cfg := deploycli.BuildConfig{
|
||||
NATSUrl: h.cfg.NATSUrl,
|
||||
NKeyFile: h.cfg.NKeyFile,
|
||||
Repo: repo,
|
||||
Target: target,
|
||||
Branch: branch,
|
||||
Timeout: h.cfg.Timeout,
|
||||
}
|
||||
|
||||
var output strings.Builder
|
||||
branchStr := branch
|
||||
if branchStr == "" {
|
||||
branchStr = "(default)"
|
||||
}
|
||||
output.WriteString(fmt.Sprintf("Building %s target=%s branch=%s\n\n", repo, target, branchStr))
|
||||
|
||||
result, err := deploycli.Build(ctx, cfg, func(resp *messages.BuildResponse) {
|
||||
switch resp.Status {
|
||||
case messages.BuildStatusStarted:
|
||||
output.WriteString(fmt.Sprintf("Started: %s\n", resp.Message))
|
||||
case messages.BuildStatusProgress:
|
||||
successStr := "..."
|
||||
if resp.HostSuccess != nil {
|
||||
if *resp.HostSuccess {
|
||||
successStr = "success"
|
||||
} else {
|
||||
successStr = "failed"
|
||||
}
|
||||
}
|
||||
output.WriteString(fmt.Sprintf("[%d/%d] %s: %s\n", resp.HostsCompleted, resp.HostsTotal, resp.Host, successStr))
|
||||
case messages.BuildStatusCompleted, messages.BuildStatusFailed:
|
||||
output.WriteString(fmt.Sprintf("\n%s\n", resp.Message))
|
||||
case messages.BuildStatusRejected:
|
||||
output.WriteString(fmt.Sprintf("Rejected: %s\n", resp.Message))
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return mcp.NewToolResultError(fmt.Sprintf("build failed: %v", err)), nil
|
||||
}
|
||||
|
||||
if result.FinalResponse != nil {
|
||||
output.WriteString(fmt.Sprintf("\nBuild complete: %d succeeded, %d failed (%.1fs)\n",
|
||||
result.FinalResponse.Succeeded,
|
||||
result.FinalResponse.Failed,
|
||||
result.FinalResponse.TotalDurationSeconds))
|
||||
}
|
||||
|
||||
if !result.AllSucceeded() {
|
||||
output.WriteString("WARNING: Some builds failed\n")
|
||||
}
|
||||
|
||||
return mcp.NewToolResultText(output.String()), nil
|
||||
}
|
||||
@@ -12,6 +12,7 @@ type ServerConfig struct {
|
||||
NKeyFile string
|
||||
EnableAdmin bool
|
||||
AdminNKeyFile string
|
||||
EnableBuilds bool
|
||||
DiscoverSubject string
|
||||
Timeout time.Duration
|
||||
}
|
||||
@@ -49,6 +50,11 @@ func New(cfg ServerConfig) *Server {
|
||||
s.AddTool(DeployAdminTool(), handler.HandleDeployAdmin)
|
||||
}
|
||||
|
||||
// Optionally register build tool
|
||||
if cfg.EnableBuilds {
|
||||
s.AddTool(BuildTool(), handler.HandleBuild)
|
||||
}
|
||||
|
||||
return &Server{
|
||||
cfg: cfg,
|
||||
server: s,
|
||||
|
||||
125
internal/messages/build.go
Normal file
125
internal/messages/build.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package messages
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// BuildStatus represents the status of a build response.
|
||||
type BuildStatus string
|
||||
|
||||
const (
|
||||
BuildStatusStarted BuildStatus = "started"
|
||||
BuildStatusProgress BuildStatus = "progress"
|
||||
BuildStatusCompleted BuildStatus = "completed"
|
||||
BuildStatusFailed BuildStatus = "failed"
|
||||
BuildStatusRejected BuildStatus = "rejected"
|
||||
)
|
||||
|
||||
// IsFinal returns true if this status indicates a terminal state.
|
||||
func (s BuildStatus) IsFinal() bool {
|
||||
switch s {
|
||||
case BuildStatusCompleted, BuildStatusFailed, BuildStatusRejected:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// BuildRequest is the message sent to request a build.
|
||||
type BuildRequest struct {
|
||||
Repo string `json:"repo"` // Must match config
|
||||
Target string `json:"target"` // Hostname or "all"
|
||||
Branch string `json:"branch,omitempty"` // Optional, uses repo default
|
||||
ReplyTo string `json:"reply_to"`
|
||||
}
|
||||
|
||||
// Validate checks that the request is valid.
|
||||
func (r *BuildRequest) Validate() error {
|
||||
if r.Repo == "" {
|
||||
return fmt.Errorf("repo is required")
|
||||
}
|
||||
if !revisionRegex.MatchString(r.Repo) {
|
||||
return fmt.Errorf("invalid repo name format: %q", r.Repo)
|
||||
}
|
||||
if r.Target == "" {
|
||||
return fmt.Errorf("target is required")
|
||||
}
|
||||
if r.Branch != "" && !revisionRegex.MatchString(r.Branch) {
|
||||
return fmt.Errorf("invalid branch format: %q", r.Branch)
|
||||
}
|
||||
if r.ReplyTo == "" {
|
||||
return fmt.Errorf("reply_to is required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Marshal serializes the request to JSON.
|
||||
func (r *BuildRequest) Marshal() ([]byte, error) {
|
||||
return json.Marshal(r)
|
||||
}
|
||||
|
||||
// UnmarshalBuildRequest deserializes a request from JSON.
|
||||
func UnmarshalBuildRequest(data []byte) (*BuildRequest, error) {
|
||||
var r BuildRequest
|
||||
if err := json.Unmarshal(data, &r); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal build request: %w", err)
|
||||
}
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
// BuildHostResult contains the result of building a single host.
|
||||
type BuildHostResult struct {
|
||||
Host string `json:"host"`
|
||||
Success bool `json:"success"`
|
||||
Error string `json:"error,omitempty"`
|
||||
DurationSeconds float64 `json:"duration_seconds"`
|
||||
}
|
||||
|
||||
// BuildResponse is the message sent in response to a build request.
|
||||
type BuildResponse struct {
|
||||
Status BuildStatus `json:"status"`
|
||||
Message string `json:"message,omitempty"`
|
||||
|
||||
// Progress updates
|
||||
Host string `json:"host,omitempty"`
|
||||
HostSuccess *bool `json:"host_success,omitempty"`
|
||||
HostsCompleted int `json:"hosts_completed,omitempty"`
|
||||
HostsTotal int `json:"hosts_total,omitempty"`
|
||||
|
||||
// Final response
|
||||
Results []BuildHostResult `json:"results,omitempty"`
|
||||
TotalDurationSeconds float64 `json:"total_duration_seconds,omitempty"`
|
||||
Succeeded int `json:"succeeded,omitempty"`
|
||||
Failed int `json:"failed,omitempty"`
|
||||
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// NewBuildResponse creates a new response with the given status and message.
|
||||
func NewBuildResponse(status BuildStatus, message string) *BuildResponse {
|
||||
return &BuildResponse{
|
||||
Status: status,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
|
||||
// WithError adds an error message to the response.
|
||||
func (r *BuildResponse) WithError(err string) *BuildResponse {
|
||||
r.Error = err
|
||||
return r
|
||||
}
|
||||
|
||||
// Marshal serializes the response to JSON.
|
||||
func (r *BuildResponse) Marshal() ([]byte, error) {
|
||||
return json.Marshal(r)
|
||||
}
|
||||
|
||||
// UnmarshalBuildResponse deserializes a response from JSON.
|
||||
func UnmarshalBuildResponse(data []byte) (*BuildResponse, error) {
|
||||
var r BuildResponse
|
||||
if err := json.Unmarshal(data, &r); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal build response: %w", err)
|
||||
}
|
||||
return &r, nil
|
||||
}
|
||||
99
internal/metrics/build_metrics.go
Normal file
99
internal/metrics/build_metrics.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// BuildCollector holds all Prometheus metrics for the builder.
|
||||
type BuildCollector struct {
|
||||
buildsTotal *prometheus.CounterVec
|
||||
buildHostTotal *prometheus.CounterVec
|
||||
buildDuration *prometheus.HistogramVec
|
||||
buildLastTimestamp *prometheus.GaugeVec
|
||||
buildLastSuccessTime *prometheus.GaugeVec
|
||||
buildLastFailureTime *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// NewBuildCollector creates a new build metrics collector and registers it with the given registerer.
|
||||
func NewBuildCollector(reg prometheus.Registerer) *BuildCollector {
|
||||
c := &BuildCollector{
|
||||
buildsTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "homelab_deploy_builds_total",
|
||||
Help: "Total builds processed",
|
||||
},
|
||||
[]string{"repo", "status"},
|
||||
),
|
||||
buildHostTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "homelab_deploy_build_host_total",
|
||||
Help: "Total host builds processed",
|
||||
},
|
||||
[]string{"repo", "host", "status"},
|
||||
),
|
||||
buildDuration: prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "homelab_deploy_build_duration_seconds",
|
||||
Help: "Build execution time per host",
|
||||
Buckets: []float64{30, 60, 120, 300, 600, 900, 1200, 1800, 3600},
|
||||
},
|
||||
[]string{"repo", "host"},
|
||||
),
|
||||
buildLastTimestamp: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "homelab_deploy_build_last_timestamp",
|
||||
Help: "Timestamp of last build attempt",
|
||||
},
|
||||
[]string{"repo"},
|
||||
),
|
||||
buildLastSuccessTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "homelab_deploy_build_last_success_timestamp",
|
||||
Help: "Timestamp of last successful build",
|
||||
},
|
||||
[]string{"repo"},
|
||||
),
|
||||
buildLastFailureTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "homelab_deploy_build_last_failure_timestamp",
|
||||
Help: "Timestamp of last failed build",
|
||||
},
|
||||
[]string{"repo"},
|
||||
),
|
||||
}
|
||||
|
||||
reg.MustRegister(c.buildsTotal)
|
||||
reg.MustRegister(c.buildHostTotal)
|
||||
reg.MustRegister(c.buildDuration)
|
||||
reg.MustRegister(c.buildLastTimestamp)
|
||||
reg.MustRegister(c.buildLastSuccessTime)
|
||||
reg.MustRegister(c.buildLastFailureTime)
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// RecordBuildSuccess records a successful build.
|
||||
func (c *BuildCollector) RecordBuildSuccess(repo string) {
|
||||
c.buildsTotal.WithLabelValues(repo, "success").Inc()
|
||||
c.buildLastTimestamp.WithLabelValues(repo).SetToCurrentTime()
|
||||
c.buildLastSuccessTime.WithLabelValues(repo).SetToCurrentTime()
|
||||
}
|
||||
|
||||
// RecordBuildFailure records a failed build.
|
||||
func (c *BuildCollector) RecordBuildFailure(repo, errorCode string) {
|
||||
c.buildsTotal.WithLabelValues(repo, "failure").Inc()
|
||||
c.buildLastTimestamp.WithLabelValues(repo).SetToCurrentTime()
|
||||
c.buildLastFailureTime.WithLabelValues(repo).SetToCurrentTime()
|
||||
}
|
||||
|
||||
// RecordHostBuildSuccess records a successful host build.
|
||||
func (c *BuildCollector) RecordHostBuildSuccess(repo, host string, durationSeconds float64) {
|
||||
c.buildHostTotal.WithLabelValues(repo, host, "success").Inc()
|
||||
c.buildDuration.WithLabelValues(repo, host).Observe(durationSeconds)
|
||||
}
|
||||
|
||||
// RecordHostBuildFailure records a failed host build.
|
||||
func (c *BuildCollector) RecordHostBuildFailure(repo, host string, durationSeconds float64) {
|
||||
c.buildHostTotal.WithLabelValues(repo, host, "failure").Inc()
|
||||
c.buildDuration.WithLabelValues(repo, host).Observe(durationSeconds)
|
||||
}
|
||||
@@ -74,6 +74,11 @@ func (s *Server) Collector() *Collector {
|
||||
return s.collector
|
||||
}
|
||||
|
||||
// Registry returns the Prometheus registry.
|
||||
func (s *Server) Registry() *prometheus.Registry {
|
||||
return s.registry
|
||||
}
|
||||
|
||||
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
|
||||
func (s *Server) ScrapeCh() <-chan struct{} {
|
||||
return s.scrapeCh
|
||||
|
||||
Reference in New Issue
Block a user