fix: add nixos-rebuild to PATH and fix CLI hanging after deploy failure

- Add nixos-rebuild to listener service PATH in NixOS module
- Fix CLI deploy command hanging after receiving final status by properly
  tracking lastResponse time and exiting when all hosts have responded

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-07 05:53:22 +01:00
parent c9b85435ba
commit 5f3cfc3d21
3 changed files with 21 additions and 18 deletions

View File

@@ -67,7 +67,9 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
// Track responses by hostname to handle multiple messages per host
var mu sync.Mutex
result := &DeployResult{}
hostFinal := make(map[string]bool) // track which hosts have sent final status
hostFinal := make(map[string]bool) // track which hosts have sent final status
hostSeen := make(map[string]bool) // track all hosts that have responded
lastResponse := time.Now()
// Subscribe to reply subject
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
@@ -81,9 +83,11 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
mu.Lock()
result.Responses = append(result.Responses, resp)
hostSeen[resp.Hostname] = true
if resp.Status.IsFinal() {
hostFinal[resp.Hostname] = true
}
lastResponse = time.Now()
mu.Unlock()
if onResponse != nil {
@@ -119,8 +123,7 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
// Use a dynamic timeout: wait for initial responses, then extend
// timeout after each response until no new responses or max timeout
deadline := time.Now().Add(cfg.Timeout)
lastResponse := time.Now()
idleTimeout := 30 * time.Second // wait this long after last response
idleTimeout := 30 * time.Second // wait this long after last response for new hosts
for {
select {
@@ -128,7 +131,9 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
return result, ctx.Err()
case <-time.After(1 * time.Second):
mu.Lock()
responseCount := len(result.Responses)
seenCount := len(hostSeen)
finalCount := len(hostFinal)
lastResponseTime := lastResponse
mu.Unlock()
now := time.Now()
@@ -138,21 +143,19 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
return result, nil
}
// If we have responses, use idle timeout
if responseCount > 0 {
mu.Lock()
lastResponseTime := lastResponse
// Update lastResponse time if we got new responses
if responseCount > 0 {
// Simple approximation - in practice you'd track this more precisely
lastResponseTime = now
}
mu.Unlock()
if now.Sub(lastResponseTime) > idleTimeout {
// If all hosts that responded have sent final status, we're done
// Add a short grace period for late arrivals from other hosts
if seenCount > 0 && seenCount == finalCount {
// Wait a bit for any other hosts to respond
if now.Sub(lastResponseTime) > 2*time.Second {
return result, nil
}
}
// If we have responses but waiting for more hosts, use idle timeout
if seenCount > 0 && now.Sub(lastResponseTime) > idleTimeout {
return result, nil
}
}
}
}