feat: add --debug flag for metrics troubleshooting
Add a --debug flag to the listener command that enables debug-level logging. When enabled, the listener logs detailed information about metrics recording including: - When deployment start/end metrics are recorded - The action, success status, and duration being recorded - Whether metrics are enabled or disabled (skipped) This helps troubleshoot issues where deployment metrics appear to remain at zero after deployments. Also add extraArgs option to the NixOS module to allow passing additional arguments like --debug to the service. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
const version = "0.1.13"
|
const version = "0.1.14"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
app := &cli.Command{
|
app := &cli.Command{
|
||||||
@@ -42,6 +42,10 @@ func listenerCommand() *cli.Command {
|
|||||||
Name: "listener",
|
Name: "listener",
|
||||||
Usage: "Run as a deployment listener (systemd service mode)",
|
Usage: "Run as a deployment listener (systemd service mode)",
|
||||||
Flags: []cli.Flag{
|
Flags: []cli.Flag{
|
||||||
|
&cli.BoolFlag{
|
||||||
|
Name: "debug",
|
||||||
|
Usage: "Enable debug logging for troubleshooting",
|
||||||
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "hostname",
|
Name: "hostname",
|
||||||
Usage: "Hostname for this listener",
|
Usage: "Hostname for this listener",
|
||||||
@@ -125,10 +129,16 @@ func listenerCommand() *cli.Command {
|
|||||||
MetricsEnabled: c.Bool("metrics-enabled"),
|
MetricsEnabled: c.Bool("metrics-enabled"),
|
||||||
MetricsAddr: c.String("metrics-addr"),
|
MetricsAddr: c.String("metrics-addr"),
|
||||||
Version: version,
|
Version: version,
|
||||||
|
Debug: c.Bool("debug"),
|
||||||
|
}
|
||||||
|
|
||||||
|
logLevel := slog.LevelInfo
|
||||||
|
if c.Bool("debug") {
|
||||||
|
logLevel = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||||
Level: slog.LevelInfo,
|
Level: logLevel,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
l := listener.New(cfg, logger)
|
l := listener.New(cfg, logger)
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ type Config struct {
|
|||||||
MetricsEnabled bool
|
MetricsEnabled bool
|
||||||
MetricsAddr string
|
MetricsAddr string
|
||||||
Version string
|
Version string
|
||||||
|
Debug bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Listener handles deployment requests from NATS.
|
// Listener handles deployment requests from NATS.
|
||||||
@@ -203,7 +204,14 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
|
|
||||||
// Record deployment start for metrics
|
// Record deployment start for metrics
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment start metric",
|
||||||
|
"metrics_enabled", true,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentStart()
|
l.metrics.RecordDeploymentStart()
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment start metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
|
|
||||||
@@ -219,9 +227,19 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
messages.StatusFailed,
|
messages.StatusFailed,
|
||||||
fmt.Sprintf("revision validation failed: %v", err),
|
fmt.Sprintf("revision validation failed: %v", err),
|
||||||
).WithError(messages.ErrorInvalidRevision))
|
).WithError(messages.ErrorInvalidRevision))
|
||||||
if l.metrics != nil {
|
|
||||||
duration := time.Since(startTime).Seconds()
|
duration := time.Since(startTime).Seconds()
|
||||||
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment failure metric (revision validation)",
|
||||||
|
"action", req.Action,
|
||||||
|
"error_code", messages.ErrorInvalidRevision,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentFailure(req.Action, messages.ErrorInvalidRevision, duration)
|
l.metrics.RecordDeploymentFailure(req.Action, messages.ErrorInvalidRevision, duration)
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment failure metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -265,7 +283,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
l.logger.Error("failed to flush completed response", "error", err)
|
l.logger.Error("failed to flush completed response", "error", err)
|
||||||
}
|
}
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment end metric (success)",
|
||||||
|
"action", req.Action,
|
||||||
|
"success", true,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentEnd(req.Action, true, duration)
|
l.metrics.RecordDeploymentEnd(req.Action, true, duration)
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment end metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// After a successful switch, signal restart so we pick up any new version
|
// After a successful switch, signal restart so we pick up any new version
|
||||||
@@ -305,7 +333,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
fmt.Sprintf("deployment failed (exit code %d): %s", result.ExitCode, result.Stderr),
|
fmt.Sprintf("deployment failed (exit code %d): %s", result.ExitCode, result.Stderr),
|
||||||
).WithError(errorCode))
|
).WithError(errorCode))
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment failure metric",
|
||||||
|
"action", req.Action,
|
||||||
|
"error_code", errorCode,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentFailure(req.Action, errorCode, duration)
|
l.metrics.RecordDeploymentFailure(req.Action, errorCode, duration)
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment failure metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ let
|
|||||||
++ lib.optionals cfg.metrics.enable [
|
++ lib.optionals cfg.metrics.enable [
|
||||||
"--metrics-enabled"
|
"--metrics-enabled"
|
||||||
"--metrics-addr ${lib.escapeShellArg cfg.metrics.address}"
|
"--metrics-addr ${lib.escapeShellArg cfg.metrics.address}"
|
||||||
]);
|
]
|
||||||
|
++ cfg.extraArgs);
|
||||||
|
|
||||||
# Extract port from metrics address for firewall rule
|
# Extract port from metrics address for firewall rule
|
||||||
metricsPort = let
|
metricsPort = let
|
||||||
@@ -122,6 +123,13 @@ in
|
|||||||
description = "Open firewall for metrics port";
|
description = "Open firewall for metrics port";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extraArgs = lib.mkOption {
|
||||||
|
type = lib.types.listOf lib.types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Extra command line arguments to pass to the listener";
|
||||||
|
example = [ "--debug" ];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf cfg.enable {
|
||||||
|
|||||||
Reference in New Issue
Block a user