feat: add builder mode for centralized Nix builds

Add a new "builder" capability to trigger Nix builds on a dedicated
build host via NATS messaging. This allows pre-building NixOS
configurations before deployment.

New components:
- Builder mode: subscribes to build.<repo>.* subjects, executes nix build
- Build CLI command: triggers builds with progress tracking
- MCP build tool: available with --enable-builds flag
- Builder metrics: tracks build success/failure per repo and host
- NixOS module: services.homelab-deploy.builder

The builder uses a YAML config file to define allowed repositories
with their URLs and default branches. Builds can target all hosts
or specific hosts, with real-time progress updates.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-10 22:03:14 +01:00
parent 277a49a666
commit 14f5b31faf
13 changed files with 1535 additions and 57 deletions

323
internal/builder/builder.go Normal file
View File

@@ -0,0 +1,323 @@
package builder
import (
"context"
"fmt"
"log/slog"
"sort"
"sync"
"time"
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
"git.t-juice.club/torjus/homelab-deploy/internal/metrics"
"git.t-juice.club/torjus/homelab-deploy/internal/nats"
)
// BuilderConfig holds the configuration for the builder.
type BuilderConfig struct {
NATSUrl string
NKeyFile string
ConfigFile string
Timeout time.Duration
MetricsEnabled bool
MetricsAddr string
}
// Builder handles build requests from NATS.
type Builder struct {
cfg BuilderConfig
repoCfg *Config
client *nats.Client
executor *Executor
lock sync.Mutex
busy bool
logger *slog.Logger
// metrics server and collector (nil if metrics disabled)
metricsServer *metrics.Server
metrics *metrics.BuildCollector
}
// New creates a new builder with the given configuration.
func New(cfg BuilderConfig, repoCfg *Config, logger *slog.Logger) *Builder {
if logger == nil {
logger = slog.Default()
}
b := &Builder{
cfg: cfg,
repoCfg: repoCfg,
executor: NewExecutor(cfg.Timeout),
logger: logger,
}
if cfg.MetricsEnabled {
b.metricsServer = metrics.NewServer(metrics.ServerConfig{
Addr: cfg.MetricsAddr,
Logger: logger,
})
b.metrics = metrics.NewBuildCollector(b.metricsServer.Registry())
}
return b
}
// Run starts the builder and blocks until the context is cancelled.
func (b *Builder) Run(ctx context.Context) error {
// Start metrics server if enabled
if b.metricsServer != nil {
if err := b.metricsServer.Start(); err != nil {
return fmt.Errorf("failed to start metrics server: %w", err)
}
defer func() {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = b.metricsServer.Shutdown(shutdownCtx)
}()
}
// Connect to NATS
b.logger.Info("connecting to NATS", "url", b.cfg.NATSUrl)
client, err := nats.Connect(nats.Config{
URL: b.cfg.NATSUrl,
NKeyFile: b.cfg.NKeyFile,
Name: "homelab-deploy-builder",
})
if err != nil {
return fmt.Errorf("failed to connect to NATS: %w", err)
}
b.client = client
defer b.client.Close()
b.logger.Info("connected to NATS")
// Subscribe to build subjects for each repo
for repoName := range b.repoCfg.Repos {
// Subscribe to build.<repo>.all and build.<repo>.<hostname>
allSubject := fmt.Sprintf("build.%s.*", repoName)
b.logger.Info("subscribing to build subject", "subject", allSubject)
if _, err := b.client.Subscribe(allSubject, b.handleBuildRequest); err != nil {
return fmt.Errorf("failed to subscribe to %s: %w", allSubject, err)
}
}
b.logger.Info("builder started", "repos", len(b.repoCfg.Repos))
// Wait for context cancellation
<-ctx.Done()
b.logger.Info("shutting down builder")
return nil
}
func (b *Builder) handleBuildRequest(subject string, data []byte) {
req, err := messages.UnmarshalBuildRequest(data)
if err != nil {
b.logger.Error("failed to unmarshal build request",
"subject", subject,
"error", err,
)
return
}
b.logger.Info("received build request",
"subject", subject,
"repo", req.Repo,
"target", req.Target,
"branch", req.Branch,
"reply_to", req.ReplyTo,
)
// Validate request
if err := req.Validate(); err != nil {
b.logger.Warn("invalid build request", "error", err)
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
messages.BuildStatusRejected,
err.Error(),
))
return
}
// Get repo config
repo, err := b.repoCfg.GetRepo(req.Repo)
if err != nil {
b.logger.Warn("unknown repo", "repo", req.Repo)
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
messages.BuildStatusRejected,
fmt.Sprintf("unknown repo: %s", req.Repo),
))
return
}
// Try to acquire lock
b.lock.Lock()
if b.busy {
b.lock.Unlock()
b.logger.Warn("build already in progress")
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
messages.BuildStatusRejected,
"another build is already in progress",
))
return
}
b.busy = true
b.lock.Unlock()
defer func() {
b.lock.Lock()
b.busy = false
b.lock.Unlock()
}()
// Use default branch if not specified
branch := req.Branch
if branch == "" {
branch = repo.DefaultBranch
}
// Determine hosts to build
var hosts []string
if req.Target == "all" {
// List hosts from flake
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
messages.BuildStatusStarted,
"discovering hosts...",
))
hosts, err = b.executor.ListHosts(context.Background(), repo.URL, branch)
if err != nil {
b.logger.Error("failed to list hosts", "error", err)
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
messages.BuildStatusFailed,
fmt.Sprintf("failed to list hosts: %v", err),
).WithError(err.Error()))
if b.metrics != nil {
b.metrics.RecordBuildFailure(req.Repo, "")
}
return
}
// Sort hosts for consistent ordering
sort.Strings(hosts)
} else {
hosts = []string{req.Target}
}
if len(hosts) == 0 {
b.sendResponse(req.ReplyTo, messages.NewBuildResponse(
messages.BuildStatusFailed,
"no hosts to build",
))
return
}
// Send started response
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
Status: messages.BuildStatusStarted,
Message: fmt.Sprintf("building %d host(s)", len(hosts)),
HostsTotal: len(hosts),
})
// Build each host sequentially
startTime := time.Now()
results := make([]messages.BuildHostResult, 0, len(hosts))
succeeded := 0
failed := 0
for i, host := range hosts {
hostStart := time.Now()
b.logger.Info("building host",
"host", host,
"progress", fmt.Sprintf("%d/%d", i+1, len(hosts)),
"command", b.executor.BuildCommand(repo.URL, branch, host),
)
result := b.executor.Build(context.Background(), repo.URL, branch, host)
hostDuration := time.Since(hostStart).Seconds()
hostResult := messages.BuildHostResult{
Host: host,
Success: result.Success,
DurationSeconds: hostDuration,
}
if !result.Success {
hostResult.Error = result.Stderr
if hostResult.Error == "" && result.Error != nil {
hostResult.Error = result.Error.Error()
}
}
results = append(results, hostResult)
if result.Success {
succeeded++
b.logger.Info("host build succeeded", "host", host, "duration", hostDuration)
if b.metrics != nil {
b.metrics.RecordHostBuildSuccess(req.Repo, host, hostDuration)
}
} else {
failed++
b.logger.Error("host build failed", "host", host, "error", hostResult.Error)
if b.metrics != nil {
b.metrics.RecordHostBuildFailure(req.Repo, host, hostDuration)
}
}
// Send progress update
success := result.Success
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
Status: messages.BuildStatusProgress,
Host: host,
HostSuccess: &success,
HostsCompleted: i + 1,
HostsTotal: len(hosts),
})
}
totalDuration := time.Since(startTime).Seconds()
// Send final response
status := messages.BuildStatusCompleted
message := fmt.Sprintf("built %d/%d hosts successfully", succeeded, len(hosts))
if failed > 0 {
status = messages.BuildStatusFailed
message = fmt.Sprintf("build failed: %d/%d hosts failed", failed, len(hosts))
}
b.sendResponse(req.ReplyTo, &messages.BuildResponse{
Status: status,
Message: message,
Results: results,
TotalDurationSeconds: totalDuration,
Succeeded: succeeded,
Failed: failed,
})
// Record overall build metrics
if b.metrics != nil {
if failed == 0 {
b.metrics.RecordBuildSuccess(req.Repo)
} else {
b.metrics.RecordBuildFailure(req.Repo, "")
}
}
}
func (b *Builder) sendResponse(replyTo string, resp *messages.BuildResponse) {
data, err := resp.Marshal()
if err != nil {
b.logger.Error("failed to marshal build response", "error", err)
return
}
if err := b.client.Publish(replyTo, data); err != nil {
b.logger.Error("failed to publish build response",
"reply_to", replyTo,
"error", err,
)
}
// Flush to ensure response is sent immediately
if err := b.client.Flush(); err != nil {
b.logger.Error("failed to flush", "error", err)
}
}

View File

@@ -0,0 +1,65 @@
package builder
import (
"fmt"
"os"
"gopkg.in/yaml.v3"
)
// RepoConfig holds configuration for a single repository.
type RepoConfig struct {
URL string `yaml:"url"`
DefaultBranch string `yaml:"default_branch"`
}
// Config holds the builder configuration.
type Config struct {
Repos map[string]RepoConfig `yaml:"repos"`
}
// LoadConfig loads configuration from a YAML file.
func LoadConfig(path string) (*Config, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read config file: %w", err)
}
var cfg Config
if err := yaml.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("failed to parse config file: %w", err)
}
if err := cfg.Validate(); err != nil {
return nil, err
}
return &cfg, nil
}
// Validate checks that the configuration is valid.
func (c *Config) Validate() error {
if len(c.Repos) == 0 {
return fmt.Errorf("no repos configured")
}
for name, repo := range c.Repos {
if repo.URL == "" {
return fmt.Errorf("repo %q: url is required", name)
}
if repo.DefaultBranch == "" {
return fmt.Errorf("repo %q: default_branch is required", name)
}
}
return nil
}
// GetRepo returns the configuration for a repository, or an error if not found.
func (c *Config) GetRepo(name string) (*RepoConfig, error) {
repo, ok := c.Repos[name]
if !ok {
return nil, fmt.Errorf("repo %q not found in configuration", name)
}
return &repo, nil
}

View File

@@ -0,0 +1,116 @@
package builder
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os/exec"
"time"
)
// Executor handles the execution of nix build commands.
type Executor struct {
timeout time.Duration
}
// NewExecutor creates a new build executor.
func NewExecutor(timeout time.Duration) *Executor {
return &Executor{
timeout: timeout,
}
}
// BuildResult contains the result of a build execution.
type BuildResult struct {
Success bool
ExitCode int
Stdout string
Stderr string
Error error
}
// FlakeShowResult contains the parsed output of nix flake show.
type FlakeShowResult struct {
NixosConfigurations map[string]any `json:"nixosConfigurations"`
}
// ListHosts returns the list of hosts (nixosConfigurations) available in a flake.
func (e *Executor) ListHosts(ctx context.Context, flakeURL, branch string) ([]string, error) {
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
flakeRef := fmt.Sprintf("%s?ref=%s", flakeURL, branch)
cmd := exec.CommandContext(ctx, "nix", "flake", "show", "--json", flakeRef)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
if ctx.Err() == context.DeadlineExceeded {
return nil, fmt.Errorf("timeout listing hosts")
}
return nil, fmt.Errorf("failed to list hosts: %w\n%s", err, stderr.String())
}
var result FlakeShowResult
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
return nil, fmt.Errorf("failed to parse flake show output: %w", err)
}
hosts := make([]string, 0, len(result.NixosConfigurations))
for host := range result.NixosConfigurations {
hosts = append(hosts, host)
}
return hosts, nil
}
// Build builds a single host's system configuration.
func (e *Executor) Build(ctx context.Context, flakeURL, branch, host string) *BuildResult {
ctx, cancel := context.WithTimeout(ctx, e.timeout)
defer cancel()
// Build the flake reference for the system toplevel
flakeRef := fmt.Sprintf("%s?ref=%s#nixosConfigurations.%s.config.system.build.toplevel", flakeURL, branch, host)
cmd := exec.CommandContext(ctx, "nix", "build", "--no-link", flakeRef)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
result := &BuildResult{
Stdout: stdout.String(),
Stderr: stderr.String(),
}
if err != nil {
result.Success = false
result.Error = err
if ctx.Err() == context.DeadlineExceeded {
result.Error = fmt.Errorf("build timed out after %v", e.timeout)
}
if exitErr, ok := err.(*exec.ExitError); ok {
result.ExitCode = exitErr.ExitCode()
} else {
result.ExitCode = -1
}
} else {
result.Success = true
result.ExitCode = 0
}
return result
}
// BuildCommand returns the command that would be executed (for logging/debugging).
func (e *Executor) BuildCommand(flakeURL, branch, host string) string {
flakeRef := fmt.Sprintf("%s?ref=%s#nixosConfigurations.%s.config.system.build.toplevel", flakeURL, branch, host)
return fmt.Sprintf("nix build --no-link %s", flakeRef)
}

140
internal/cli/build.go Normal file
View File

@@ -0,0 +1,140 @@
package cli
import (
"context"
"encoding/json"
"fmt"
"sync"
"time"
"github.com/google/uuid"
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
"git.t-juice.club/torjus/homelab-deploy/internal/nats"
)
// BuildConfig holds configuration for a build operation.
type BuildConfig struct {
NATSUrl string
NKeyFile string
Repo string
Target string
Branch string
Timeout time.Duration
}
// BuildResult contains the aggregated results from a build.
type BuildResult struct {
Responses []*messages.BuildResponse
FinalResponse *messages.BuildResponse
Errors []error
}
// AllSucceeded returns true if the build completed successfully.
func (r *BuildResult) AllSucceeded() bool {
if len(r.Errors) > 0 {
return false
}
if r.FinalResponse == nil {
return false
}
return r.FinalResponse.Status == messages.BuildStatusCompleted && r.FinalResponse.Failed == 0
}
// MarshalJSON returns the JSON representation of the build result.
func (r *BuildResult) MarshalJSON() ([]byte, error) {
if r.FinalResponse != nil {
return json.Marshal(r.FinalResponse)
}
return json.Marshal(map[string]any{
"status": "unknown",
"responses": r.Responses,
"errors": r.Errors,
})
}
// Build triggers a build and collects responses.
func Build(ctx context.Context, cfg BuildConfig, onResponse func(*messages.BuildResponse)) (*BuildResult, error) {
// Connect to NATS
client, err := nats.Connect(nats.Config{
URL: cfg.NATSUrl,
NKeyFile: cfg.NKeyFile,
Name: "homelab-deploy-build-cli",
})
if err != nil {
return nil, fmt.Errorf("failed to connect to NATS: %w", err)
}
defer client.Close()
// Generate unique reply subject
requestID := uuid.New().String()
replySubject := fmt.Sprintf("build.responses.%s", requestID)
var mu sync.Mutex
result := &BuildResult{}
done := make(chan struct{})
// Subscribe to reply subject
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
resp, err := messages.UnmarshalBuildResponse(data)
if err != nil {
mu.Lock()
result.Errors = append(result.Errors, fmt.Errorf("failed to unmarshal response: %w", err))
mu.Unlock()
return
}
mu.Lock()
result.Responses = append(result.Responses, resp)
if resp.Status.IsFinal() {
result.FinalResponse = resp
select {
case <-done:
default:
close(done)
}
}
mu.Unlock()
if onResponse != nil {
onResponse(resp)
}
})
if err != nil {
return nil, fmt.Errorf("failed to subscribe to reply subject: %w", err)
}
defer func() { _ = sub.Unsubscribe() }()
// Build and send request
req := &messages.BuildRequest{
Repo: cfg.Repo,
Target: cfg.Target,
Branch: cfg.Branch,
ReplyTo: replySubject,
}
data, err := req.Marshal()
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
// Publish to build.<repo>.<target>
buildSubject := fmt.Sprintf("build.%s.%s", cfg.Repo, cfg.Target)
if err := client.Publish(buildSubject, data); err != nil {
return nil, fmt.Errorf("failed to publish request: %w", err)
}
if err := client.Flush(); err != nil {
return nil, fmt.Errorf("failed to flush: %w", err)
}
// Wait for final response or timeout
select {
case <-ctx.Done():
return result, ctx.Err()
case <-done:
return result, nil
case <-time.After(cfg.Timeout):
return result, nil
}
}

109
internal/mcp/build_tools.go Normal file
View File

@@ -0,0 +1,109 @@
package mcp
import (
"context"
"fmt"
"strings"
"github.com/mark3labs/mcp-go/mcp"
deploycli "git.t-juice.club/torjus/homelab-deploy/internal/cli"
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
)
// BuildTool creates the build tool definition.
func BuildTool() mcp.Tool {
return mcp.NewTool(
"build",
mcp.WithDescription("Trigger a Nix build on the build server"),
mcp.WithString("repo",
mcp.Required(),
mcp.Description("Repository name (must match builder config)"),
),
mcp.WithString("target",
mcp.Description("Target hostname, or omit to build all hosts"),
),
mcp.WithBoolean("all",
mcp.Description("Build all hosts in the repository (default if no target specified)"),
),
mcp.WithString("branch",
mcp.Description("Git branch to build (uses repo default if not specified)"),
),
)
}
// HandleBuild handles the build tool.
func (h *ToolHandler) HandleBuild(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
repo, err := request.RequireString("repo")
if err != nil {
return mcp.NewToolResultError("repo is required"), nil
}
target := request.GetString("target", "")
all := request.GetBool("all", false)
branch := request.GetString("branch", "")
// Default to "all" if no target specified
if target == "" {
if !all {
all = true
}
target = "all"
}
if all && target != "all" {
return mcp.NewToolResultError("cannot specify both target and all"), nil
}
cfg := deploycli.BuildConfig{
NATSUrl: h.cfg.NATSUrl,
NKeyFile: h.cfg.NKeyFile,
Repo: repo,
Target: target,
Branch: branch,
Timeout: h.cfg.Timeout,
}
var output strings.Builder
branchStr := branch
if branchStr == "" {
branchStr = "(default)"
}
output.WriteString(fmt.Sprintf("Building %s target=%s branch=%s\n\n", repo, target, branchStr))
result, err := deploycli.Build(ctx, cfg, func(resp *messages.BuildResponse) {
switch resp.Status {
case messages.BuildStatusStarted:
output.WriteString(fmt.Sprintf("Started: %s\n", resp.Message))
case messages.BuildStatusProgress:
successStr := "..."
if resp.HostSuccess != nil {
if *resp.HostSuccess {
successStr = "success"
} else {
successStr = "failed"
}
}
output.WriteString(fmt.Sprintf("[%d/%d] %s: %s\n", resp.HostsCompleted, resp.HostsTotal, resp.Host, successStr))
case messages.BuildStatusCompleted, messages.BuildStatusFailed:
output.WriteString(fmt.Sprintf("\n%s\n", resp.Message))
case messages.BuildStatusRejected:
output.WriteString(fmt.Sprintf("Rejected: %s\n", resp.Message))
}
})
if err != nil {
return mcp.NewToolResultError(fmt.Sprintf("build failed: %v", err)), nil
}
if result.FinalResponse != nil {
output.WriteString(fmt.Sprintf("\nBuild complete: %d succeeded, %d failed (%.1fs)\n",
result.FinalResponse.Succeeded,
result.FinalResponse.Failed,
result.FinalResponse.TotalDurationSeconds))
}
if !result.AllSucceeded() {
output.WriteString("WARNING: Some builds failed\n")
}
return mcp.NewToolResultText(output.String()), nil
}

View File

@@ -12,6 +12,7 @@ type ServerConfig struct {
NKeyFile string
EnableAdmin bool
AdminNKeyFile string
EnableBuilds bool
DiscoverSubject string
Timeout time.Duration
}
@@ -49,6 +50,11 @@ func New(cfg ServerConfig) *Server {
s.AddTool(DeployAdminTool(), handler.HandleDeployAdmin)
}
// Optionally register build tool
if cfg.EnableBuilds {
s.AddTool(BuildTool(), handler.HandleBuild)
}
return &Server{
cfg: cfg,
server: s,

125
internal/messages/build.go Normal file
View File

@@ -0,0 +1,125 @@
package messages
import (
"encoding/json"
"fmt"
)
// BuildStatus represents the status of a build response.
type BuildStatus string
const (
BuildStatusStarted BuildStatus = "started"
BuildStatusProgress BuildStatus = "progress"
BuildStatusCompleted BuildStatus = "completed"
BuildStatusFailed BuildStatus = "failed"
BuildStatusRejected BuildStatus = "rejected"
)
// IsFinal returns true if this status indicates a terminal state.
func (s BuildStatus) IsFinal() bool {
switch s {
case BuildStatusCompleted, BuildStatusFailed, BuildStatusRejected:
return true
default:
return false
}
}
// BuildRequest is the message sent to request a build.
type BuildRequest struct {
Repo string `json:"repo"` // Must match config
Target string `json:"target"` // Hostname or "all"
Branch string `json:"branch,omitempty"` // Optional, uses repo default
ReplyTo string `json:"reply_to"`
}
// Validate checks that the request is valid.
func (r *BuildRequest) Validate() error {
if r.Repo == "" {
return fmt.Errorf("repo is required")
}
if !revisionRegex.MatchString(r.Repo) {
return fmt.Errorf("invalid repo name format: %q", r.Repo)
}
if r.Target == "" {
return fmt.Errorf("target is required")
}
if r.Branch != "" && !revisionRegex.MatchString(r.Branch) {
return fmt.Errorf("invalid branch format: %q", r.Branch)
}
if r.ReplyTo == "" {
return fmt.Errorf("reply_to is required")
}
return nil
}
// Marshal serializes the request to JSON.
func (r *BuildRequest) Marshal() ([]byte, error) {
return json.Marshal(r)
}
// UnmarshalBuildRequest deserializes a request from JSON.
func UnmarshalBuildRequest(data []byte) (*BuildRequest, error) {
var r BuildRequest
if err := json.Unmarshal(data, &r); err != nil {
return nil, fmt.Errorf("failed to unmarshal build request: %w", err)
}
return &r, nil
}
// BuildHostResult contains the result of building a single host.
type BuildHostResult struct {
Host string `json:"host"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
DurationSeconds float64 `json:"duration_seconds"`
}
// BuildResponse is the message sent in response to a build request.
type BuildResponse struct {
Status BuildStatus `json:"status"`
Message string `json:"message,omitempty"`
// Progress updates
Host string `json:"host,omitempty"`
HostSuccess *bool `json:"host_success,omitempty"`
HostsCompleted int `json:"hosts_completed,omitempty"`
HostsTotal int `json:"hosts_total,omitempty"`
// Final response
Results []BuildHostResult `json:"results,omitempty"`
TotalDurationSeconds float64 `json:"total_duration_seconds,omitempty"`
Succeeded int `json:"succeeded,omitempty"`
Failed int `json:"failed,omitempty"`
Error string `json:"error,omitempty"`
}
// NewBuildResponse creates a new response with the given status and message.
func NewBuildResponse(status BuildStatus, message string) *BuildResponse {
return &BuildResponse{
Status: status,
Message: message,
}
}
// WithError adds an error message to the response.
func (r *BuildResponse) WithError(err string) *BuildResponse {
r.Error = err
return r
}
// Marshal serializes the response to JSON.
func (r *BuildResponse) Marshal() ([]byte, error) {
return json.Marshal(r)
}
// UnmarshalBuildResponse deserializes a response from JSON.
func UnmarshalBuildResponse(data []byte) (*BuildResponse, error) {
var r BuildResponse
if err := json.Unmarshal(data, &r); err != nil {
return nil, fmt.Errorf("failed to unmarshal build response: %w", err)
}
return &r, nil
}

View File

@@ -0,0 +1,99 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
)
// BuildCollector holds all Prometheus metrics for the builder.
type BuildCollector struct {
buildsTotal *prometheus.CounterVec
buildHostTotal *prometheus.CounterVec
buildDuration *prometheus.HistogramVec
buildLastTimestamp *prometheus.GaugeVec
buildLastSuccessTime *prometheus.GaugeVec
buildLastFailureTime *prometheus.GaugeVec
}
// NewBuildCollector creates a new build metrics collector and registers it with the given registerer.
func NewBuildCollector(reg prometheus.Registerer) *BuildCollector {
c := &BuildCollector{
buildsTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "homelab_deploy_builds_total",
Help: "Total builds processed",
},
[]string{"repo", "status"},
),
buildHostTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "homelab_deploy_build_host_total",
Help: "Total host builds processed",
},
[]string{"repo", "host", "status"},
),
buildDuration: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "homelab_deploy_build_duration_seconds",
Help: "Build execution time per host",
Buckets: []float64{30, 60, 120, 300, 600, 900, 1200, 1800, 3600},
},
[]string{"repo", "host"},
),
buildLastTimestamp: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "homelab_deploy_build_last_timestamp",
Help: "Timestamp of last build attempt",
},
[]string{"repo"},
),
buildLastSuccessTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "homelab_deploy_build_last_success_timestamp",
Help: "Timestamp of last successful build",
},
[]string{"repo"},
),
buildLastFailureTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "homelab_deploy_build_last_failure_timestamp",
Help: "Timestamp of last failed build",
},
[]string{"repo"},
),
}
reg.MustRegister(c.buildsTotal)
reg.MustRegister(c.buildHostTotal)
reg.MustRegister(c.buildDuration)
reg.MustRegister(c.buildLastTimestamp)
reg.MustRegister(c.buildLastSuccessTime)
reg.MustRegister(c.buildLastFailureTime)
return c
}
// RecordBuildSuccess records a successful build.
func (c *BuildCollector) RecordBuildSuccess(repo string) {
c.buildsTotal.WithLabelValues(repo, "success").Inc()
c.buildLastTimestamp.WithLabelValues(repo).SetToCurrentTime()
c.buildLastSuccessTime.WithLabelValues(repo).SetToCurrentTime()
}
// RecordBuildFailure records a failed build.
func (c *BuildCollector) RecordBuildFailure(repo, errorCode string) {
c.buildsTotal.WithLabelValues(repo, "failure").Inc()
c.buildLastTimestamp.WithLabelValues(repo).SetToCurrentTime()
c.buildLastFailureTime.WithLabelValues(repo).SetToCurrentTime()
}
// RecordHostBuildSuccess records a successful host build.
func (c *BuildCollector) RecordHostBuildSuccess(repo, host string, durationSeconds float64) {
c.buildHostTotal.WithLabelValues(repo, host, "success").Inc()
c.buildDuration.WithLabelValues(repo, host).Observe(durationSeconds)
}
// RecordHostBuildFailure records a failed host build.
func (c *BuildCollector) RecordHostBuildFailure(repo, host string, durationSeconds float64) {
c.buildHostTotal.WithLabelValues(repo, host, "failure").Inc()
c.buildDuration.WithLabelValues(repo, host).Observe(durationSeconds)
}

View File

@@ -74,6 +74,11 @@ func (s *Server) Collector() *Collector {
return s.collector
}
// Registry returns the Prometheus registry.
func (s *Server) Registry() *prometheus.Registry {
return s.registry
}
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
func (s *Server) ScrapeCh() <-chan struct{} {
return s.scrapeCh