feat: implement NATS-based NixOS deployment system
Implement the complete homelab-deploy system with three operational modes: - Listener mode: Runs on NixOS hosts as a systemd service, subscribes to NATS subjects with configurable templates, executes nixos-rebuild on deployment requests with concurrency control - MCP mode: MCP server exposing deploy, deploy_admin, and list_hosts tools for AI assistants with tiered access control - CLI mode: Manual deployment commands with subject alias support via environment variables Key components: - internal/messages: Request/response types with validation - internal/nats: Client wrapper with NKey authentication - internal/deploy: Executor with timeout and lock for concurrency - internal/listener: Subject template expansion and request handling - internal/cli: Deploy logic with alias resolution - internal/mcp: MCP server with mcp-go integration - nixos/module.nix: NixOS module with hardened systemd service Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
267
cmd/homelab-deploy/main.go
Normal file
267
cmd/homelab-deploy/main.go
Normal file
@@ -0,0 +1,267 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
deploycli "git.t-juice.club/torjus/homelab-deploy/internal/cli"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/listener"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/mcp"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
"github.com/urfave/cli/v3"
|
||||
)
|
||||
|
||||
const version = "0.1.0"
|
||||
|
||||
func main() {
|
||||
app := &cli.Command{
|
||||
Name: "homelab-deploy",
|
||||
Usage: "Message-based NixOS deployment system using NATS",
|
||||
Version: version,
|
||||
Commands: []*cli.Command{
|
||||
listenerCommand(),
|
||||
mcpCommand(),
|
||||
deployCommand(),
|
||||
},
|
||||
}
|
||||
|
||||
if err := app.Run(context.Background(), os.Args); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func listenerCommand() *cli.Command {
|
||||
return &cli.Command{
|
||||
Name: "listener",
|
||||
Usage: "Run as a deployment listener (systemd service mode)",
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "hostname",
|
||||
Usage: "Hostname for this listener",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "tier",
|
||||
Usage: "Deployment tier (test or prod)",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "role",
|
||||
Usage: "Role for role-based deployment targeting",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nats-url",
|
||||
Usage: "NATS server URL",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nkey-file",
|
||||
Usage: "Path to NKey seed file for NATS authentication",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "flake-url",
|
||||
Usage: "Git flake URL for nixos-rebuild",
|
||||
Required: true,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "timeout",
|
||||
Usage: "Deployment timeout in seconds",
|
||||
Value: 600,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "deploy-subject",
|
||||
Usage: "NATS subject to subscribe to for deployments (can be repeated)",
|
||||
Value: []string{
|
||||
"deploy.<tier>.<hostname>",
|
||||
"deploy.<tier>.all",
|
||||
"deploy.<tier>.role.<role>",
|
||||
},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "discover-subject",
|
||||
Usage: "NATS subject for host discovery requests",
|
||||
Value: "deploy.discover",
|
||||
},
|
||||
},
|
||||
Action: func(ctx context.Context, c *cli.Command) error {
|
||||
tier := c.String("tier")
|
||||
if tier != "test" && tier != "prod" {
|
||||
return fmt.Errorf("tier must be 'test' or 'prod', got %q", tier)
|
||||
}
|
||||
|
||||
cfg := listener.Config{
|
||||
Hostname: c.String("hostname"),
|
||||
Tier: tier,
|
||||
Role: c.String("role"),
|
||||
NATSUrl: c.String("nats-url"),
|
||||
NKeyFile: c.String("nkey-file"),
|
||||
FlakeURL: c.String("flake-url"),
|
||||
Timeout: time.Duration(c.Int("timeout")) * time.Second,
|
||||
DeploySubjects: c.StringSlice("deploy-subject"),
|
||||
DiscoverSubject: c.String("discover-subject"),
|
||||
}
|
||||
|
||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: slog.LevelInfo,
|
||||
}))
|
||||
|
||||
l := listener.New(cfg, logger)
|
||||
|
||||
// Handle shutdown signals
|
||||
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
return l.Run(ctx)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func mcpCommand() *cli.Command {
|
||||
return &cli.Command{
|
||||
Name: "mcp",
|
||||
Usage: "Run as an MCP server for AI assistants",
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "nats-url",
|
||||
Usage: "NATS server URL",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nkey-file",
|
||||
Usage: "Path to NKey seed file for NATS authentication",
|
||||
Required: true,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "enable-admin",
|
||||
Usage: "Enable admin deployment tool for all tiers",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "admin-nkey-file",
|
||||
Usage: "Path to admin NKey seed file (required if --enable-admin)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "discover-subject",
|
||||
Usage: "NATS subject for host discovery",
|
||||
Value: "deploy.discover",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "timeout",
|
||||
Usage: "Timeout in seconds for deployment operations",
|
||||
Value: 900,
|
||||
},
|
||||
},
|
||||
Action: func(_ context.Context, c *cli.Command) error {
|
||||
enableAdmin := c.Bool("enable-admin")
|
||||
adminNKeyFile := c.String("admin-nkey-file")
|
||||
|
||||
if enableAdmin && adminNKeyFile == "" {
|
||||
return fmt.Errorf("--admin-nkey-file is required when --enable-admin is set")
|
||||
}
|
||||
|
||||
cfg := mcp.ServerConfig{
|
||||
NATSUrl: c.String("nats-url"),
|
||||
NKeyFile: c.String("nkey-file"),
|
||||
EnableAdmin: enableAdmin,
|
||||
AdminNKeyFile: adminNKeyFile,
|
||||
DiscoverSubject: c.String("discover-subject"),
|
||||
Timeout: time.Duration(c.Int("timeout")) * time.Second,
|
||||
}
|
||||
|
||||
s := mcp.New(cfg)
|
||||
return s.Run()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func deployCommand() *cli.Command {
|
||||
return &cli.Command{
|
||||
Name: "deploy",
|
||||
Usage: "Deploy to a target subject",
|
||||
ArgsUsage: "<subject>",
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "nats-url",
|
||||
Usage: "NATS server URL",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nkey-file",
|
||||
Usage: "Path to NKey seed file for NATS authentication",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "branch",
|
||||
Usage: "Git branch or commit to deploy",
|
||||
Value: "master",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "action",
|
||||
Usage: "nixos-rebuild action (switch, boot, test, dry-activate)",
|
||||
Value: "switch",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "timeout",
|
||||
Usage: "Timeout in seconds for collecting responses",
|
||||
Value: 900,
|
||||
},
|
||||
},
|
||||
Action: func(ctx context.Context, c *cli.Command) error {
|
||||
if c.Args().Len() < 1 {
|
||||
return fmt.Errorf("subject argument required")
|
||||
}
|
||||
|
||||
subjectArg := c.Args().First()
|
||||
subject := deploycli.ResolveAlias(subjectArg)
|
||||
|
||||
if deploycli.IsAlias(subjectArg) && subject != subjectArg {
|
||||
fmt.Printf("Resolved alias %q to %q\n", subjectArg, subject)
|
||||
}
|
||||
|
||||
action := messages.Action(c.String("action"))
|
||||
if !action.Valid() {
|
||||
return fmt.Errorf("invalid action: %q", action)
|
||||
}
|
||||
|
||||
cfg := deploycli.DeployConfig{
|
||||
NATSUrl: c.String("nats-url"),
|
||||
NKeyFile: c.String("nkey-file"),
|
||||
Subject: subject,
|
||||
Action: action,
|
||||
Revision: c.String("branch"),
|
||||
Timeout: time.Duration(c.Int("timeout")) * time.Second,
|
||||
}
|
||||
|
||||
fmt.Printf("Deploying to %s (action=%s, revision=%s)\n", subject, action, cfg.Revision)
|
||||
|
||||
// Handle shutdown signals
|
||||
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
result, err := deploycli.Deploy(ctx, cfg, func(resp *messages.DeployResponse) {
|
||||
status := string(resp.Status)
|
||||
if resp.Error != nil {
|
||||
status = fmt.Sprintf("%s (%s)", status, *resp.Error)
|
||||
}
|
||||
fmt.Printf("[%s] %s: %s\n", resp.Hostname, status, resp.Message)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("deploy failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\nDeployment complete: %d hosts responded\n", result.HostCount())
|
||||
|
||||
if !result.AllSucceeded() {
|
||||
return fmt.Errorf("some deployments failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user