Compare commits

..

2 Commits

Author SHA1 Message Date
277a49a666 chore: update flake inputs
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-09 15:44:18 +01:00
bc02393c5a fix: wait for metrics scrape before restarting after switch deployment
After a successful switch deployment, the listener now waits for Prometheus
to scrape the /metrics endpoint before exiting for restart. This ensures
deployment metrics are captured before the process restarts and resets
in-memory counters. Falls back to a 60 second timeout if no scrape occurs.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-09 15:44:14 +01:00
8 changed files with 8 additions and 560 deletions

View File

@@ -63,8 +63,6 @@ homelab-deploy listener \
| `--discover-subject` | No | Discovery subject (default: `deploy.discover`) |
| `--metrics-enabled` | No | Enable Prometheus metrics endpoint |
| `--metrics-addr` | No | Metrics HTTP server address (default: `:9972`) |
| `--heartbeat-interval` | No | Status update interval in seconds during deployment (default: 15) |
| `--debug` | No | Enable debug logging for troubleshooting |
#### Subject Templates
@@ -216,7 +214,6 @@ Add the module to your NixOS configuration:
| `metrics.enable` | bool | `false` | Enable Prometheus metrics endpoint |
| `metrics.address` | string | `":9972"` | Metrics HTTP server address |
| `metrics.openFirewall` | bool | `false` | Open firewall for metrics port |
| `extraArgs` | list of string | `[]` | Extra command line arguments (e.g., `["--debug"]`) |
Default `deploySubjects`:
```nix
@@ -301,57 +298,6 @@ histogram_quantile(0.95, rate(homelab_deploy_deployment_duration_seconds_bucket[
sum(homelab_deploy_deployment_in_progress)
```
## Troubleshooting
### Debug Logging
Enable debug logging to diagnose issues with deployments or metrics:
**CLI:**
```bash
homelab-deploy listener --debug \
--hostname myhost \
--tier prod \
--nats-url nats://nats.example.com:4222 \
--nkey-file /run/secrets/listener.nkey \
--flake-url git+https://git.example.com/user/nixos-configs.git \
--metrics-enabled
```
**NixOS module:**
```nix
services.homelab-deploy.listener = {
enable = true;
tier = "prod";
natsUrl = "nats://nats.example.com:4222";
nkeyFile = "/run/secrets/homelab-deploy-nkey";
flakeUrl = "git+https://git.example.com/user/nixos-configs.git";
metrics.enable = true;
extraArgs = [ "--debug" ];
};
```
With debug logging enabled, the listener outputs detailed information about metrics recording:
```json
{"level":"DEBUG","msg":"recording deployment start metric","metrics_enabled":true}
{"level":"DEBUG","msg":"recording deployment end metric (success)","action":"switch","success":true,"duration_seconds":120.5}
```
### Metrics Showing Zero
If deployment metrics remain at zero after deployments:
1. **Check metrics are enabled**: Verify `--metrics-enabled` is set and the metrics endpoint is accessible at `/metrics`
2. **Enable debug logging**: Use `--debug` to confirm metrics recording is being called
3. **Check deployment status**: Metrics are only recorded for deployments that complete (success or failure). Rejected requests (e.g., already running) increment the counter with `status="rejected"` but don't record duration
4. **Check after restart**: After a successful `switch` deployment, the listener restarts. Metrics reset to zero in the new instance. The listener waits up to 60 seconds for a Prometheus scrape before restarting to capture the final metrics
5. **Verify Prometheus scrape timing**: Ensure Prometheus scrapes frequently enough to capture metrics before the listener restarts
## Message Protocol
### Deploy Request

View File

@@ -16,7 +16,7 @@ import (
"github.com/urfave/cli/v3"
)
const version = "0.1.14"
const version = "0.1.13"
func main() {
app := &cli.Command{
@@ -42,10 +42,6 @@ func listenerCommand() *cli.Command {
Name: "listener",
Usage: "Run as a deployment listener (systemd service mode)",
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "debug",
Usage: "Enable debug logging for troubleshooting",
},
&cli.StringFlag{
Name: "hostname",
Usage: "Hostname for this listener",
@@ -129,16 +125,10 @@ func listenerCommand() *cli.Command {
MetricsEnabled: c.Bool("metrics-enabled"),
MetricsAddr: c.String("metrics-addr"),
Version: version,
Debug: c.Bool("debug"),
}
logLevel := slog.LevelInfo
if c.Bool("debug") {
logLevel = slog.LevelDebug
}
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: logLevel,
Level: slog.LevelInfo,
}))
l := listener.New(cfg, logger)

6
flake.lock generated
View File

@@ -2,11 +2,11 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1770197578,
"narHash": "sha256-AYqlWrX09+HvGs8zM6ebZ1pwUqjkfpnv8mewYwAo+iM=",
"lastModified": 1770562336,
"narHash": "sha256-ub1gpAONMFsT/GU2hV6ZWJjur8rJ6kKxdm9IlCT0j84=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "00c21e4c93d963c50d4c0c89bfa84ed6e0694df2",
"rev": "d6c71932130818840fc8fe9509cf50be8c64634f",
"type": "github"
},
"original": {

View File

@@ -27,7 +27,6 @@ type Config struct {
MetricsEnabled bool
MetricsAddr string
Version string
Debug bool
}
// Listener handles deployment requests from NATS.
@@ -204,14 +203,7 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
// Record deployment start for metrics
if l.metrics != nil {
l.logger.Debug("recording deployment start metric",
"metrics_enabled", true,
)
l.metrics.RecordDeploymentStart()
} else {
l.logger.Debug("skipping deployment start metric",
"metrics_enabled", false,
)
}
startTime := time.Now()
@@ -227,19 +219,9 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
messages.StatusFailed,
fmt.Sprintf("revision validation failed: %v", err),
).WithError(messages.ErrorInvalidRevision))
duration := time.Since(startTime).Seconds()
if l.metrics != nil {
l.logger.Debug("recording deployment failure metric (revision validation)",
"action", req.Action,
"error_code", messages.ErrorInvalidRevision,
"duration_seconds", duration,
)
duration := time.Since(startTime).Seconds()
l.metrics.RecordDeploymentFailure(req.Action, messages.ErrorInvalidRevision, duration)
} else {
l.logger.Debug("skipping deployment failure metric",
"metrics_enabled", false,
"duration_seconds", duration,
)
}
return
}
@@ -283,17 +265,7 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
l.logger.Error("failed to flush completed response", "error", err)
}
if l.metrics != nil {
l.logger.Debug("recording deployment end metric (success)",
"action", req.Action,
"success", true,
"duration_seconds", duration,
)
l.metrics.RecordDeploymentEnd(req.Action, true, duration)
} else {
l.logger.Debug("skipping deployment end metric",
"metrics_enabled", false,
"duration_seconds", duration,
)
}
// After a successful switch, signal restart so we pick up any new version
@@ -333,17 +305,7 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
fmt.Sprintf("deployment failed (exit code %d): %s", result.ExitCode, result.Stderr),
).WithError(errorCode))
if l.metrics != nil {
l.logger.Debug("recording deployment failure metric",
"action", req.Action,
"error_code", errorCode,
"duration_seconds", duration,
)
l.metrics.RecordDeploymentFailure(req.Action, errorCode, duration)
} else {
l.logger.Debug("skipping deployment failure metric",
"metrics_enabled", false,
"duration_seconds", duration,
)
}
}
}

View File

@@ -2,14 +2,8 @@ package listener
import (
"log/slog"
"strings"
"testing"
"time"
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
"git.t-juice.club/torjus/homelab-deploy/internal/metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
)
func TestNew(t *testing.T) {
@@ -57,148 +51,3 @@ func TestNew_WithLogger(t *testing.T) {
t.Error("should use provided logger")
}
}
func TestNew_WithMetricsEnabled(t *testing.T) {
cfg := Config{
Hostname: "test-host",
Tier: "test",
MetricsEnabled: true,
MetricsAddr: ":0",
}
l := New(cfg, nil)
if l.metricsServer == nil {
t.Error("metricsServer should not be nil when MetricsEnabled is true")
}
if l.metrics == nil {
t.Error("metrics should not be nil when MetricsEnabled is true")
}
}
func TestListener_MetricsRecordedOnDeployment(t *testing.T) {
// This test verifies that the listener correctly calls metrics functions
// when processing deployments. We test this by directly calling the internal
// metrics recording logic that handleDeployRequest uses.
reg := prometheus.NewRegistry()
collector := metrics.NewCollector(reg)
// Simulate what handleDeployRequest does for a successful deployment
collector.RecordDeploymentStart()
collector.RecordDeploymentEnd(messages.ActionSwitch, true, 120.5)
// Verify counter was incremented
counterExpected := `
# HELP homelab_deploy_deployments_total Total deployment requests processed
# TYPE homelab_deploy_deployments_total counter
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 1
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
`
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
t.Errorf("unexpected counter metrics: %v", err)
}
// Verify histogram was updated (120.5 seconds falls into le="300" and higher buckets)
histogramExpected := `
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
# TYPE homelab_deploy_deployment_duration_seconds histogram
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 1
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 120.5
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
`
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
t.Errorf("unexpected histogram metrics: %v", err)
}
}

View File

@@ -78,103 +78,6 @@ homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
t.Errorf("unexpected counter metrics: %v", err)
}
// Check histogram recorded the duration (120.5 seconds falls into le="300" and higher buckets)
histogramExpected := `
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
# TYPE homelab_deploy_deployment_duration_seconds histogram
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 1
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 120.5
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
`
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
t.Errorf("unexpected histogram metrics: %v", err)
}
}
func TestCollector_RecordDeploymentEnd_Failure(t *testing.T) {
@@ -199,103 +102,6 @@ homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
t.Errorf("unexpected counter metrics: %v", err)
}
// Check histogram recorded the duration (60.0 seconds falls into le="60" and higher buckets)
histogramExpected := `
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
# TYPE homelab_deploy_deployment_duration_seconds histogram
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 1
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 60
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
`
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
t.Errorf("unexpected histogram metrics: %v", err)
}
}
func TestCollector_RecordDeploymentFailure(t *testing.T) {
@@ -321,103 +127,6 @@ homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
t.Errorf("unexpected counter metrics: %v", err)
}
// Check histogram recorded the duration (300.0 seconds falls into le="300" and higher buckets)
histogramExpected := `
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
# TYPE homelab_deploy_deployment_duration_seconds histogram
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 1
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 300
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 1
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
`
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
t.Errorf("unexpected histogram metrics: %v", err)
}
}
func TestCollector_RecordRejection(t *testing.T) {

View File

@@ -36,7 +36,7 @@ func NewServer(cfg ServerConfig) *Server {
registry := prometheus.NewRegistry()
collector := NewCollector(registry)
scrapeCh := make(chan struct{}, 1)
scrapeCh := make(chan struct{})
metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
Registry: registry,

View File

@@ -19,8 +19,7 @@ let
++ lib.optionals cfg.metrics.enable [
"--metrics-enabled"
"--metrics-addr ${lib.escapeShellArg cfg.metrics.address}"
]
++ cfg.extraArgs);
]);
# Extract port from metrics address for firewall rule
metricsPort = let
@@ -123,13 +122,6 @@ in
description = "Open firewall for metrics port";
};
};
extraArgs = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
description = "Extra command line arguments to pass to the listener";
example = [ "--debug" ];
};
};
config = lib.mkIf cfg.enable {