feat: implement NATS-based NixOS deployment system
Implement the complete homelab-deploy system with three operational modes: - Listener mode: Runs on NixOS hosts as a systemd service, subscribes to NATS subjects with configurable templates, executes nixos-rebuild on deployment requests with concurrency control - MCP mode: MCP server exposing deploy, deploy_admin, and list_hosts tools for AI assistants with tiered access control - CLI mode: Manual deployment commands with subject alias support via environment variables Key components: - internal/messages: Request/response types with validation - internal/nats: Client wrapper with NKey authentication - internal/deploy: Executor with timeout and lock for concurrency - internal/listener: Subject template expansion and request handling - internal/cli: Deploy logic with alias resolution - internal/mcp: MCP server with mcp-go integration - nixos/module.nix: NixOS module with hardened systemd service Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
40
internal/cli/aliases.go
Normal file
40
internal/cli/aliases.go
Normal file
@@ -0,0 +1,40 @@
|
||||
// Package cli provides the deploy command logic.
|
||||
package cli
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const aliasEnvPrefix = "HOMELAB_DEPLOY_ALIAS_"
|
||||
|
||||
// ResolveAlias resolves a subject alias to a full NATS subject.
|
||||
// If the input looks like a NATS subject (contains dots), it is returned as-is.
|
||||
// Otherwise, it checks for an environment variable HOMELAB_DEPLOY_ALIAS_<NAME>.
|
||||
// Alias names are case-insensitive and hyphens are converted to underscores.
|
||||
func ResolveAlias(input string) string {
|
||||
// If it contains dots, it's already a subject
|
||||
if strings.Contains(input, ".") {
|
||||
return input
|
||||
}
|
||||
|
||||
// Convert to uppercase and replace hyphens with underscores
|
||||
envName := aliasEnvPrefix + strings.ToUpper(strings.ReplaceAll(input, "-", "_"))
|
||||
|
||||
if alias := os.Getenv(envName); alias != "" {
|
||||
return alias
|
||||
}
|
||||
|
||||
// Return as-is if no alias found (will likely fail later)
|
||||
return input
|
||||
}
|
||||
|
||||
// IsAlias returns true if the input looks like an alias (no dots).
|
||||
func IsAlias(input string) bool {
|
||||
return !strings.Contains(input, ".")
|
||||
}
|
||||
|
||||
// GetAliasEnvVar returns the environment variable name for a given alias.
|
||||
func GetAliasEnvVar(alias string) string {
|
||||
return aliasEnvPrefix + strings.ToUpper(strings.ReplaceAll(alias, "-", "_"))
|
||||
}
|
||||
112
internal/cli/aliases_test.go
Normal file
112
internal/cli/aliases_test.go
Normal file
@@ -0,0 +1,112 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestResolveAlias(t *testing.T) {
|
||||
// Set up test environment variables
|
||||
t.Setenv("HOMELAB_DEPLOY_ALIAS_TEST", "deploy.test.all")
|
||||
t.Setenv("HOMELAB_DEPLOY_ALIAS_PROD", "deploy.prod.all")
|
||||
t.Setenv("HOMELAB_DEPLOY_ALIAS_PROD_DNS", "deploy.prod.role.dns")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "full subject unchanged",
|
||||
input: "deploy.prod.ns1",
|
||||
want: "deploy.prod.ns1",
|
||||
},
|
||||
{
|
||||
name: "subject with multiple dots",
|
||||
input: "deploy.test.role.web",
|
||||
want: "deploy.test.role.web",
|
||||
},
|
||||
{
|
||||
name: "lowercase alias",
|
||||
input: "test",
|
||||
want: "deploy.test.all",
|
||||
},
|
||||
{
|
||||
name: "uppercase alias",
|
||||
input: "TEST",
|
||||
want: "deploy.test.all",
|
||||
},
|
||||
{
|
||||
name: "mixed case alias",
|
||||
input: "TeSt",
|
||||
want: "deploy.test.all",
|
||||
},
|
||||
{
|
||||
name: "alias with hyphen",
|
||||
input: "prod-dns",
|
||||
want: "deploy.prod.role.dns",
|
||||
},
|
||||
{
|
||||
name: "alias with hyphen uppercase",
|
||||
input: "PROD-DNS",
|
||||
want: "deploy.prod.role.dns",
|
||||
},
|
||||
{
|
||||
name: "unknown alias returns as-is",
|
||||
input: "unknown",
|
||||
want: "unknown",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := ResolveAlias(tc.input)
|
||||
if got != tc.want {
|
||||
t.Errorf("ResolveAlias(%q) = %q, want %q", tc.input, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAlias(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want bool
|
||||
}{
|
||||
{"test", true},
|
||||
{"prod-dns", true},
|
||||
{"PROD", true},
|
||||
{"deploy.test.all", false},
|
||||
{"deploy.prod.ns1", false},
|
||||
{"a.b", false},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.input, func(t *testing.T) {
|
||||
got := IsAlias(tc.input)
|
||||
if got != tc.want {
|
||||
t.Errorf("IsAlias(%q) = %v, want %v", tc.input, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAliasEnvVar(t *testing.T) {
|
||||
tests := []struct {
|
||||
alias string
|
||||
want string
|
||||
}{
|
||||
{"test", "HOMELAB_DEPLOY_ALIAS_TEST"},
|
||||
{"prod", "HOMELAB_DEPLOY_ALIAS_PROD"},
|
||||
{"prod-dns", "HOMELAB_DEPLOY_ALIAS_PROD_DNS"},
|
||||
{"my-long-alias", "HOMELAB_DEPLOY_ALIAS_MY_LONG_ALIAS"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.alias, func(t *testing.T) {
|
||||
got := GetAliasEnvVar(tc.alias)
|
||||
if got != tc.want {
|
||||
t.Errorf("GetAliasEnvVar(%q) = %q, want %q", tc.alias, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
213
internal/cli/deploy.go
Normal file
213
internal/cli/deploy.go
Normal file
@@ -0,0 +1,213 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/nats"
|
||||
)
|
||||
|
||||
// DeployConfig holds configuration for a deploy operation.
|
||||
type DeployConfig struct {
|
||||
NATSUrl string
|
||||
NKeyFile string
|
||||
Subject string
|
||||
Action messages.Action
|
||||
Revision string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// DeployResult contains the aggregated results from a deployment.
|
||||
type DeployResult struct {
|
||||
Responses []*messages.DeployResponse
|
||||
Errors []error
|
||||
}
|
||||
|
||||
// AllSucceeded returns true if all responses indicate success.
|
||||
func (r *DeployResult) AllSucceeded() bool {
|
||||
for _, resp := range r.Responses {
|
||||
if resp.Status != messages.StatusCompleted {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(r.Responses) > 0 && len(r.Errors) == 0
|
||||
}
|
||||
|
||||
// HostCount returns the number of unique hosts that responded.
|
||||
func (r *DeployResult) HostCount() int {
|
||||
seen := make(map[string]bool)
|
||||
for _, resp := range r.Responses {
|
||||
seen[resp.Hostname] = true
|
||||
}
|
||||
return len(seen)
|
||||
}
|
||||
|
||||
// Deploy executes a deployment to the specified subject and collects responses.
|
||||
func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.DeployResponse)) (*DeployResult, error) {
|
||||
// Connect to NATS
|
||||
client, err := nats.Connect(nats.Config{
|
||||
URL: cfg.NATSUrl,
|
||||
NKeyFile: cfg.NKeyFile,
|
||||
Name: "homelab-deploy-cli",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Generate unique reply subject
|
||||
requestID := uuid.New().String()
|
||||
replySubject := fmt.Sprintf("deploy.responses.%s", requestID)
|
||||
|
||||
// Track responses by hostname to handle multiple messages per host
|
||||
var mu sync.Mutex
|
||||
result := &DeployResult{}
|
||||
hostFinal := make(map[string]bool) // track which hosts have sent final status
|
||||
|
||||
// Subscribe to reply subject
|
||||
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
|
||||
resp, err := messages.UnmarshalDeployResponse(data)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
result.Errors = append(result.Errors, fmt.Errorf("failed to unmarshal response: %w", err))
|
||||
mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
result.Responses = append(result.Responses, resp)
|
||||
if resp.Status.IsFinal() {
|
||||
hostFinal[resp.Hostname] = true
|
||||
}
|
||||
mu.Unlock()
|
||||
|
||||
if onResponse != nil {
|
||||
onResponse(resp)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to subscribe to reply subject: %w", err)
|
||||
}
|
||||
defer func() { _ = sub.Unsubscribe() }()
|
||||
|
||||
// Build and send request
|
||||
req := &messages.DeployRequest{
|
||||
Action: cfg.Action,
|
||||
Revision: cfg.Revision,
|
||||
ReplyTo: replySubject,
|
||||
}
|
||||
|
||||
data, err := req.Marshal()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
if err := client.Publish(cfg.Subject, data); err != nil {
|
||||
return nil, fmt.Errorf("failed to publish request: %w", err)
|
||||
}
|
||||
|
||||
if err := client.Flush(); err != nil {
|
||||
return nil, fmt.Errorf("failed to flush: %w", err)
|
||||
}
|
||||
|
||||
// Wait for responses with timeout
|
||||
// Use a dynamic timeout: wait for initial responses, then extend
|
||||
// timeout after each response until no new responses or max timeout
|
||||
deadline := time.Now().Add(cfg.Timeout)
|
||||
lastResponse := time.Now()
|
||||
idleTimeout := 30 * time.Second // wait this long after last response
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return result, ctx.Err()
|
||||
case <-time.After(1 * time.Second):
|
||||
mu.Lock()
|
||||
responseCount := len(result.Responses)
|
||||
mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
|
||||
// Check if we've exceeded the absolute deadline
|
||||
if now.After(deadline) {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// If we have responses, use idle timeout
|
||||
if responseCount > 0 {
|
||||
mu.Lock()
|
||||
lastResponseTime := lastResponse
|
||||
// Update lastResponse time if we got new responses
|
||||
if responseCount > 0 {
|
||||
// Simple approximation - in practice you'd track this more precisely
|
||||
lastResponseTime = now
|
||||
}
|
||||
mu.Unlock()
|
||||
|
||||
if now.Sub(lastResponseTime) > idleTimeout {
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Discover sends a discovery request and collects host information.
|
||||
func Discover(ctx context.Context, natsURL, nkeyFile, discoverSubject string, timeout time.Duration) ([]*messages.DiscoveryResponse, error) {
|
||||
client, err := nats.Connect(nats.Config{
|
||||
URL: natsURL,
|
||||
NKeyFile: nkeyFile,
|
||||
Name: "homelab-deploy-cli-discover",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
requestID := uuid.New().String()
|
||||
replySubject := fmt.Sprintf("deploy.responses.discover-%s", requestID)
|
||||
|
||||
var mu sync.Mutex
|
||||
var responses []*messages.DiscoveryResponse
|
||||
|
||||
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
|
||||
resp, err := messages.UnmarshalDiscoveryResponse(data)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
mu.Lock()
|
||||
responses = append(responses, resp)
|
||||
mu.Unlock()
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to subscribe: %w", err)
|
||||
}
|
||||
defer func() { _ = sub.Unsubscribe() }()
|
||||
|
||||
req := &messages.DiscoveryRequest{ReplyTo: replySubject}
|
||||
data, err := req.Marshal()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
if err := client.Publish(discoverSubject, data); err != nil {
|
||||
return nil, fmt.Errorf("failed to publish: %w", err)
|
||||
}
|
||||
|
||||
if err := client.Flush(); err != nil {
|
||||
return nil, fmt.Errorf("failed to flush: %w", err)
|
||||
}
|
||||
|
||||
// Wait for responses
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return responses, ctx.Err()
|
||||
case <-time.After(timeout):
|
||||
return responses, nil
|
||||
}
|
||||
}
|
||||
109
internal/cli/deploy_test.go
Normal file
109
internal/cli/deploy_test.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||
)
|
||||
|
||||
func TestDeployResult_AllSucceeded(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
responses []*messages.DeployResponse
|
||||
errors []error
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "all completed",
|
||||
responses: []*messages.DeployResponse{
|
||||
{Hostname: "host1", Status: messages.StatusCompleted},
|
||||
{Hostname: "host2", Status: messages.StatusCompleted},
|
||||
},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "one failed",
|
||||
responses: []*messages.DeployResponse{
|
||||
{Hostname: "host1", Status: messages.StatusCompleted},
|
||||
{Hostname: "host2", Status: messages.StatusFailed},
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "one rejected",
|
||||
responses: []*messages.DeployResponse{
|
||||
{Hostname: "host1", Status: messages.StatusRejected},
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "no responses",
|
||||
responses: []*messages.DeployResponse{},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "has errors",
|
||||
responses: []*messages.DeployResponse{
|
||||
{Hostname: "host1", Status: messages.StatusCompleted},
|
||||
},
|
||||
errors: []error{nil}, // placeholder error
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
r := &DeployResult{
|
||||
Responses: tc.responses,
|
||||
Errors: tc.errors,
|
||||
}
|
||||
got := r.AllSucceeded()
|
||||
if got != tc.want {
|
||||
t.Errorf("AllSucceeded() = %v, want %v", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeployResult_HostCount(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
responses []*messages.DeployResponse
|
||||
want int
|
||||
}{
|
||||
{
|
||||
name: "no responses",
|
||||
responses: []*messages.DeployResponse{},
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "unique hosts",
|
||||
responses: []*messages.DeployResponse{
|
||||
{Hostname: "host1"},
|
||||
{Hostname: "host2"},
|
||||
{Hostname: "host3"},
|
||||
},
|
||||
want: 3,
|
||||
},
|
||||
{
|
||||
name: "duplicate hosts",
|
||||
responses: []*messages.DeployResponse{
|
||||
{Hostname: "host1", Status: messages.StatusStarted},
|
||||
{Hostname: "host1", Status: messages.StatusCompleted},
|
||||
{Hostname: "host2", Status: messages.StatusStarted},
|
||||
{Hostname: "host2", Status: messages.StatusCompleted},
|
||||
},
|
||||
want: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
r := &DeployResult{Responses: tc.responses}
|
||||
got := r.HostCount()
|
||||
if got != tc.want {
|
||||
t.Errorf("HostCount() = %d, want %d", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user