This repository has been archived on 2026-03-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
homelab-deploy/cmd/homelab-deploy/main.go
Torjus Håkestad 14f5b31faf feat: add builder mode for centralized Nix builds
Add a new "builder" capability to trigger Nix builds on a dedicated
build host via NATS messaging. This allows pre-building NixOS
configurations before deployment.

New components:
- Builder mode: subscribes to build.<repo>.* subjects, executes nix build
- Build CLI command: triggers builds with progress tracking
- MCP build tool: available with --enable-builds flag
- Builder metrics: tracks build success/failure per repo and host
- NixOS module: services.homelab-deploy.builder

The builder uses a YAML config file to define allowed repositories
with their URLs and default branches. Builds can target all hosts
or specific hosts, with real-time progress updates.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-10 22:03:14 +01:00

578 lines
15 KiB
Go

package main
import (
"context"
"fmt"
"log/slog"
"os"
"os/signal"
"syscall"
"time"
"git.t-juice.club/torjus/homelab-deploy/internal/builder"
deploycli "git.t-juice.club/torjus/homelab-deploy/internal/cli"
"git.t-juice.club/torjus/homelab-deploy/internal/listener"
"git.t-juice.club/torjus/homelab-deploy/internal/mcp"
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
"github.com/urfave/cli/v3"
)
const version = "0.2.0"
func main() {
app := &cli.Command{
Name: "homelab-deploy",
Usage: "Message-based NixOS deployment system using NATS",
Version: version,
Commands: []*cli.Command{
listenerCommand(),
builderCommand(),
mcpCommand(),
deployCommand(),
buildCommand(),
listHostsCommand(),
},
}
if err := app.Run(context.Background(), os.Args); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
}
func listenerCommand() *cli.Command {
return &cli.Command{
Name: "listener",
Usage: "Run as a deployment listener (systemd service mode)",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "hostname",
Usage: "Hostname for this listener",
Required: true,
},
&cli.StringFlag{
Name: "tier",
Usage: "Deployment tier (test or prod)",
Required: true,
},
&cli.StringFlag{
Name: "role",
Usage: "Role for role-based deployment targeting",
},
&cli.StringFlag{
Name: "nats-url",
Usage: "NATS server URL",
Required: true,
},
&cli.StringFlag{
Name: "nkey-file",
Usage: "Path to NKey seed file for NATS authentication",
Required: true,
},
&cli.StringFlag{
Name: "flake-url",
Usage: "Git flake URL for nixos-rebuild",
Required: true,
},
&cli.IntFlag{
Name: "timeout",
Usage: "Deployment timeout in seconds",
Value: 600,
},
&cli.StringSliceFlag{
Name: "deploy-subject",
Usage: "NATS subject to subscribe to for deployments (can be repeated)",
Value: []string{
"deploy.<tier>.<hostname>",
"deploy.<tier>.all",
"deploy.<tier>.role.<role>",
},
},
&cli.StringFlag{
Name: "discover-subject",
Usage: "NATS subject for host discovery requests",
Value: "deploy.discover",
},
&cli.BoolFlag{
Name: "metrics-enabled",
Usage: "Enable Prometheus metrics endpoint",
},
&cli.StringFlag{
Name: "metrics-addr",
Usage: "Address for Prometheus metrics HTTP server",
Value: ":9972",
},
&cli.IntFlag{
Name: "heartbeat-interval",
Usage: "Interval in seconds for sending status updates during deployment (0 to disable)",
Value: 15,
},
},
Action: func(ctx context.Context, c *cli.Command) error {
tier := c.String("tier")
if tier != "test" && tier != "prod" {
return fmt.Errorf("tier must be 'test' or 'prod', got %q", tier)
}
cfg := listener.Config{
Hostname: c.String("hostname"),
Tier: tier,
Role: c.String("role"),
NATSUrl: c.String("nats-url"),
NKeyFile: c.String("nkey-file"),
FlakeURL: c.String("flake-url"),
Timeout: time.Duration(c.Int("timeout")) * time.Second,
HeartbeatInterval: time.Duration(c.Int("heartbeat-interval")) * time.Second,
DeploySubjects: c.StringSlice("deploy-subject"),
DiscoverSubject: c.String("discover-subject"),
MetricsEnabled: c.Bool("metrics-enabled"),
MetricsAddr: c.String("metrics-addr"),
Version: version,
}
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo,
}))
l := listener.New(cfg, logger)
// Handle shutdown signals
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
return l.Run(ctx)
},
}
}
func mcpCommand() *cli.Command {
return &cli.Command{
Name: "mcp",
Usage: "Run as an MCP server for AI assistants",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "nats-url",
Usage: "NATS server URL",
Required: true,
},
&cli.StringFlag{
Name: "nkey-file",
Usage: "Path to NKey seed file for NATS authentication",
Required: true,
},
&cli.BoolFlag{
Name: "enable-admin",
Usage: "Enable admin deployment tool for all tiers",
},
&cli.StringFlag{
Name: "admin-nkey-file",
Usage: "Path to admin NKey seed file (required if --enable-admin)",
},
&cli.StringFlag{
Name: "discover-subject",
Usage: "NATS subject for host discovery",
Value: "deploy.discover",
},
&cli.IntFlag{
Name: "timeout",
Usage: "Timeout in seconds for deployment operations",
Value: 900,
},
&cli.BoolFlag{
Name: "enable-builds",
Usage: "Enable build tool",
},
},
Action: func(_ context.Context, c *cli.Command) error {
enableAdmin := c.Bool("enable-admin")
adminNKeyFile := c.String("admin-nkey-file")
if enableAdmin && adminNKeyFile == "" {
return fmt.Errorf("--admin-nkey-file is required when --enable-admin is set")
}
cfg := mcp.ServerConfig{
NATSUrl: c.String("nats-url"),
NKeyFile: c.String("nkey-file"),
EnableAdmin: enableAdmin,
AdminNKeyFile: adminNKeyFile,
EnableBuilds: c.Bool("enable-builds"),
DiscoverSubject: c.String("discover-subject"),
Timeout: time.Duration(c.Int("timeout")) * time.Second,
}
s := mcp.New(cfg)
return s.Run()
},
}
}
func deployCommand() *cli.Command {
return &cli.Command{
Name: "deploy",
Usage: "Deploy to a target subject",
ArgsUsage: "<subject>",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "nats-url",
Usage: "NATS server URL",
Sources: cli.EnvVars("HOMELAB_DEPLOY_NATS_URL"),
Required: true,
},
&cli.StringFlag{
Name: "nkey-file",
Usage: "Path to NKey seed file for NATS authentication",
Sources: cli.EnvVars("HOMELAB_DEPLOY_NKEY_FILE"),
Required: true,
},
&cli.StringFlag{
Name: "branch",
Usage: "Git branch or commit to deploy",
Sources: cli.EnvVars("HOMELAB_DEPLOY_BRANCH"),
Value: "master",
},
&cli.StringFlag{
Name: "action",
Usage: "nixos-rebuild action (switch, boot, test, dry-activate)",
Sources: cli.EnvVars("HOMELAB_DEPLOY_ACTION"),
Value: "switch",
},
&cli.IntFlag{
Name: "timeout",
Usage: "Timeout in seconds for collecting responses",
Sources: cli.EnvVars("HOMELAB_DEPLOY_TIMEOUT"),
Value: 900,
},
},
Action: func(ctx context.Context, c *cli.Command) error {
if c.Args().Len() < 1 {
return fmt.Errorf("subject argument required")
}
subjectArg := c.Args().First()
subject := deploycli.ResolveAlias(subjectArg)
if deploycli.IsAlias(subjectArg) && subject != subjectArg {
fmt.Printf("Resolved alias %q to %q\n", subjectArg, subject)
}
action := messages.Action(c.String("action"))
if !action.Valid() {
return fmt.Errorf("invalid action: %q", action)
}
cfg := deploycli.DeployConfig{
NATSUrl: c.String("nats-url"),
NKeyFile: c.String("nkey-file"),
Subject: subject,
Action: action,
Revision: c.String("branch"),
Timeout: time.Duration(c.Int("timeout")) * time.Second,
}
fmt.Printf("Deploying to %s (action=%s, revision=%s)\n", subject, action, cfg.Revision)
// Handle shutdown signals
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
result, err := deploycli.Deploy(ctx, cfg, func(resp *messages.DeployResponse) {
status := string(resp.Status)
if resp.Error != nil {
status = fmt.Sprintf("%s (%s)", status, *resp.Error)
}
fmt.Printf("[%s] %s: %s\n", resp.Hostname, status, resp.Message)
})
if err != nil {
return fmt.Errorf("deploy failed: %w", err)
}
fmt.Printf("\nDeployment complete: %d hosts responded\n", result.HostCount())
if !result.AllSucceeded() {
return fmt.Errorf("some deployments failed")
}
return nil
},
}
}
func listHostsCommand() *cli.Command {
return &cli.Command{
Name: "list-hosts",
Usage: "List available deployment targets",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "nats-url",
Usage: "NATS server URL",
Sources: cli.EnvVars("HOMELAB_DEPLOY_NATS_URL"),
Required: true,
},
&cli.StringFlag{
Name: "nkey-file",
Usage: "Path to NKey seed file for NATS authentication",
Sources: cli.EnvVars("HOMELAB_DEPLOY_NKEY_FILE"),
Required: true,
},
&cli.StringFlag{
Name: "tier",
Usage: "Filter by tier (test or prod)",
Sources: cli.EnvVars("HOMELAB_DEPLOY_TIER"),
},
&cli.StringFlag{
Name: "discover-subject",
Usage: "NATS subject for host discovery",
Sources: cli.EnvVars("HOMELAB_DEPLOY_DISCOVER_SUBJECT"),
Value: "deploy.discover",
},
&cli.IntFlag{
Name: "timeout",
Usage: "Timeout in seconds for discovery",
Sources: cli.EnvVars("HOMELAB_DEPLOY_DISCOVER_TIMEOUT"),
Value: 5,
},
},
Action: func(ctx context.Context, c *cli.Command) error {
tierFilter := c.String("tier")
if tierFilter != "" && tierFilter != "test" && tierFilter != "prod" {
return fmt.Errorf("tier must be 'test' or 'prod', got %q", tierFilter)
}
// Handle shutdown signals
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
responses, err := deploycli.Discover(
ctx,
c.String("nats-url"),
c.String("nkey-file"),
c.String("discover-subject"),
time.Duration(c.Int("timeout"))*time.Second,
)
if err != nil {
return fmt.Errorf("discovery failed: %w", err)
}
if len(responses) == 0 {
fmt.Println("No hosts responded to discovery request")
return nil
}
fmt.Println("Available deployment targets:")
fmt.Println()
for _, resp := range responses {
if tierFilter != "" && resp.Tier != tierFilter {
continue
}
role := resp.Role
if role == "" {
role = "(none)"
}
fmt.Printf("- %s (tier=%s, role=%s)\n", resp.Hostname, resp.Tier, role)
for _, subj := range resp.DeploySubjects {
fmt.Printf(" %s\n", subj)
}
}
return nil
},
}
}
func builderCommand() *cli.Command {
return &cli.Command{
Name: "builder",
Usage: "Run as a build server (systemd service mode)",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "nats-url",
Usage: "NATS server URL",
Required: true,
},
&cli.StringFlag{
Name: "nkey-file",
Usage: "Path to NKey seed file for NATS authentication",
Required: true,
},
&cli.StringFlag{
Name: "config",
Usage: "Path to builder configuration file",
Required: true,
},
&cli.IntFlag{
Name: "timeout",
Usage: "Build timeout in seconds per host",
Value: 1800,
},
&cli.BoolFlag{
Name: "metrics-enabled",
Usage: "Enable Prometheus metrics endpoint",
},
&cli.StringFlag{
Name: "metrics-addr",
Usage: "Address for Prometheus metrics HTTP server",
Value: ":9973",
},
},
Action: func(ctx context.Context, c *cli.Command) error {
repoCfg, err := builder.LoadConfig(c.String("config"))
if err != nil {
return fmt.Errorf("failed to load config: %w", err)
}
cfg := builder.BuilderConfig{
NATSUrl: c.String("nats-url"),
NKeyFile: c.String("nkey-file"),
ConfigFile: c.String("config"),
Timeout: time.Duration(c.Int("timeout")) * time.Second,
MetricsEnabled: c.Bool("metrics-enabled"),
MetricsAddr: c.String("metrics-addr"),
}
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo,
}))
b := builder.New(cfg, repoCfg, logger)
// Handle shutdown signals
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
return b.Run(ctx)
},
}
}
func buildCommand() *cli.Command {
return &cli.Command{
Name: "build",
Usage: "Trigger a build on the build server",
ArgsUsage: "<repo> [hostname]",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "nats-url",
Usage: "NATS server URL",
Sources: cli.EnvVars("HOMELAB_DEPLOY_NATS_URL"),
Required: true,
},
&cli.StringFlag{
Name: "nkey-file",
Usage: "Path to NKey seed file for NATS authentication",
Sources: cli.EnvVars("HOMELAB_DEPLOY_NKEY_FILE"),
Required: true,
},
&cli.StringFlag{
Name: "branch",
Usage: "Git branch to build (uses repo default if not specified)",
Sources: cli.EnvVars("HOMELAB_DEPLOY_BRANCH"),
},
&cli.BoolFlag{
Name: "all",
Usage: "Build all hosts in the repo",
},
&cli.IntFlag{
Name: "timeout",
Usage: "Timeout in seconds for collecting responses",
Sources: cli.EnvVars("HOMELAB_DEPLOY_BUILD_TIMEOUT"),
Value: 3600,
},
&cli.BoolFlag{
Name: "json",
Usage: "Output results as JSON",
},
},
Action: func(ctx context.Context, c *cli.Command) error {
if c.Args().Len() < 1 {
return fmt.Errorf("repo argument required")
}
repo := c.Args().First()
target := c.Args().Get(1)
all := c.Bool("all")
if target == "" && !all {
return fmt.Errorf("must specify hostname or --all")
}
if target != "" && all {
return fmt.Errorf("cannot specify both hostname and --all")
}
if all {
target = "all"
}
cfg := deploycli.BuildConfig{
NATSUrl: c.String("nats-url"),
NKeyFile: c.String("nkey-file"),
Repo: repo,
Target: target,
Branch: c.String("branch"),
Timeout: time.Duration(c.Int("timeout")) * time.Second,
}
jsonOutput := c.Bool("json")
if !jsonOutput {
branchStr := cfg.Branch
if branchStr == "" {
branchStr = "(default)"
}
fmt.Printf("Building %s target=%s branch=%s\n", repo, target, branchStr)
}
// Handle shutdown signals
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
result, err := deploycli.Build(ctx, cfg, func(resp *messages.BuildResponse) {
if jsonOutput {
return
}
switch resp.Status {
case messages.BuildStatusStarted:
fmt.Printf("Started: %s\n", resp.Message)
case messages.BuildStatusProgress:
successStr := "..."
if resp.HostSuccess != nil {
if *resp.HostSuccess {
successStr = "success"
} else {
successStr = "failed"
}
}
fmt.Printf("[%d/%d] %s: %s\n", resp.HostsCompleted, resp.HostsTotal, resp.Host, successStr)
case messages.BuildStatusCompleted, messages.BuildStatusFailed:
fmt.Printf("\n%s\n", resp.Message)
case messages.BuildStatusRejected:
fmt.Printf("Rejected: %s\n", resp.Message)
}
})
if err != nil {
return fmt.Errorf("build failed: %w", err)
}
if jsonOutput {
data, err := result.MarshalJSON()
if err != nil {
return fmt.Errorf("failed to marshal result: %w", err)
}
fmt.Println(string(data))
} else if result.FinalResponse != nil {
fmt.Printf("\nBuild complete: %d succeeded, %d failed (%.1fs)\n",
result.FinalResponse.Succeeded,
result.FinalResponse.Failed,
result.FinalResponse.TotalDurationSeconds)
}
if !result.AllSucceeded() {
return fmt.Errorf("some builds failed")
}
return nil
},
}
}