fix: add nixos-rebuild to PATH and fix CLI hanging after deploy failure
- Add nixos-rebuild to listener service PATH in NixOS module - Fix CLI deploy command hanging after receiving final status by properly tracking lastResponse time and exiting when all hosts have responded Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
const version = "0.1.3"
|
const version = "0.1.4"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
app := &cli.Command{
|
app := &cli.Command{
|
||||||
|
|||||||
@@ -68,6 +68,8 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
|
|||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
result := &DeployResult{}
|
result := &DeployResult{}
|
||||||
hostFinal := make(map[string]bool) // track which hosts have sent final status
|
hostFinal := make(map[string]bool) // track which hosts have sent final status
|
||||||
|
hostSeen := make(map[string]bool) // track all hosts that have responded
|
||||||
|
lastResponse := time.Now()
|
||||||
|
|
||||||
// Subscribe to reply subject
|
// Subscribe to reply subject
|
||||||
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
|
sub, err := client.Subscribe(replySubject, func(subject string, data []byte) {
|
||||||
@@ -81,9 +83,11 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
|
|||||||
|
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
result.Responses = append(result.Responses, resp)
|
result.Responses = append(result.Responses, resp)
|
||||||
|
hostSeen[resp.Hostname] = true
|
||||||
if resp.Status.IsFinal() {
|
if resp.Status.IsFinal() {
|
||||||
hostFinal[resp.Hostname] = true
|
hostFinal[resp.Hostname] = true
|
||||||
}
|
}
|
||||||
|
lastResponse = time.Now()
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
|
|
||||||
if onResponse != nil {
|
if onResponse != nil {
|
||||||
@@ -119,8 +123,7 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
|
|||||||
// Use a dynamic timeout: wait for initial responses, then extend
|
// Use a dynamic timeout: wait for initial responses, then extend
|
||||||
// timeout after each response until no new responses or max timeout
|
// timeout after each response until no new responses or max timeout
|
||||||
deadline := time.Now().Add(cfg.Timeout)
|
deadline := time.Now().Add(cfg.Timeout)
|
||||||
lastResponse := time.Now()
|
idleTimeout := 30 * time.Second // wait this long after last response for new hosts
|
||||||
idleTimeout := 30 * time.Second // wait this long after last response
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@@ -128,7 +131,9 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
|
|||||||
return result, ctx.Err()
|
return result, ctx.Err()
|
||||||
case <-time.After(1 * time.Second):
|
case <-time.After(1 * time.Second):
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
responseCount := len(result.Responses)
|
seenCount := len(hostSeen)
|
||||||
|
finalCount := len(hostFinal)
|
||||||
|
lastResponseTime := lastResponse
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
@@ -138,21 +143,19 @@ func Deploy(ctx context.Context, cfg DeployConfig, onResponse func(*messages.Dep
|
|||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have responses, use idle timeout
|
// If all hosts that responded have sent final status, we're done
|
||||||
if responseCount > 0 {
|
// Add a short grace period for late arrivals from other hosts
|
||||||
mu.Lock()
|
if seenCount > 0 && seenCount == finalCount {
|
||||||
lastResponseTime := lastResponse
|
// Wait a bit for any other hosts to respond
|
||||||
// Update lastResponse time if we got new responses
|
if now.Sub(lastResponseTime) > 2*time.Second {
|
||||||
if responseCount > 0 {
|
|
||||||
// Simple approximation - in practice you'd track this more precisely
|
|
||||||
lastResponseTime = now
|
|
||||||
}
|
|
||||||
mu.Unlock()
|
|
||||||
|
|
||||||
if now.Sub(lastResponseTime) > idleTimeout {
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have responses but waiting for more hosts, use idle timeout
|
||||||
|
if seenCount > 0 && now.Sub(lastResponseTime) > idleTimeout {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ in
|
|||||||
|
|
||||||
environment = cfg.environment;
|
environment = cfg.environment;
|
||||||
|
|
||||||
path = [ pkgs.git ];
|
path = [ pkgs.git config.system.build.nixos-rebuild ];
|
||||||
|
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
Type = "simple";
|
Type = "simple";
|
||||||
|
|||||||
Reference in New Issue
Block a user