feat: add --debug flag for metrics troubleshooting

Add a --debug flag to the listener command that enables debug-level
logging. When enabled, the listener logs detailed information about
metrics recording including:

- When deployment start/end metrics are recorded
- The action, success status, and duration being recorded
- Whether metrics are enabled or disabled (skipped)

This helps troubleshoot issues where deployment metrics appear to
remain at zero after deployments.

Also add extraArgs option to the NixOS module to allow passing
additional arguments like --debug to the service.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 01:26:12 +01:00
parent 723a1f769f
commit c934d1ba38
3 changed files with 60 additions and 4 deletions

View File

@@ -27,6 +27,7 @@ type Config struct {
MetricsEnabled bool
MetricsAddr string
Version string
Debug bool
}
// Listener handles deployment requests from NATS.
@@ -203,7 +204,14 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
// Record deployment start for metrics
if l.metrics != nil {
l.logger.Debug("recording deployment start metric",
"metrics_enabled", true,
)
l.metrics.RecordDeploymentStart()
} else {
l.logger.Debug("skipping deployment start metric",
"metrics_enabled", false,
)
}
startTime := time.Now()
@@ -219,9 +227,19 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
messages.StatusFailed,
fmt.Sprintf("revision validation failed: %v", err),
).WithError(messages.ErrorInvalidRevision))
duration := time.Since(startTime).Seconds()
if l.metrics != nil {
duration := time.Since(startTime).Seconds()
l.logger.Debug("recording deployment failure metric (revision validation)",
"action", req.Action,
"error_code", messages.ErrorInvalidRevision,
"duration_seconds", duration,
)
l.metrics.RecordDeploymentFailure(req.Action, messages.ErrorInvalidRevision, duration)
} else {
l.logger.Debug("skipping deployment failure metric",
"metrics_enabled", false,
"duration_seconds", duration,
)
}
return
}
@@ -265,7 +283,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
l.logger.Error("failed to flush completed response", "error", err)
}
if l.metrics != nil {
l.logger.Debug("recording deployment end metric (success)",
"action", req.Action,
"success", true,
"duration_seconds", duration,
)
l.metrics.RecordDeploymentEnd(req.Action, true, duration)
} else {
l.logger.Debug("skipping deployment end metric",
"metrics_enabled", false,
"duration_seconds", duration,
)
}
// After a successful switch, signal restart so we pick up any new version
@@ -305,7 +333,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
fmt.Sprintf("deployment failed (exit code %d): %s", result.ExitCode, result.Stderr),
).WithError(errorCode))
if l.metrics != nil {
l.logger.Debug("recording deployment failure metric",
"action", req.Action,
"error_code", errorCode,
"duration_seconds", duration,
)
l.metrics.RecordDeploymentFailure(req.Action, errorCode, duration)
} else {
l.logger.Debug("skipping deployment failure metric",
"metrics_enabled", false,
"duration_seconds", duration,
)
}
}
}