From 746e30b24f52dfbdaba1dce5d765082f231bb305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sun, 8 Feb 2026 21:29:36 +0100 Subject: [PATCH] fix: initialize counter and histogram metrics at startup Counter and histogram metrics were absent from Prometheus scrapes until the first deployment occurred, making it impossible to distinguish "no deployments" from "exporter not running" in dashboards and alerts. Initialize all expected label combinations with zero values when the collector is created so metrics appear in every scrape from startup. Co-Authored-By: Claude Opus 4.5 --- cmd/homelab-deploy/main.go | 2 +- internal/metrics/metrics.go | 27 ++++++ internal/metrics/metrics_test.go | 151 ++++++++++++++++++++++++++++++- 3 files changed, 178 insertions(+), 2 deletions(-) diff --git a/cmd/homelab-deploy/main.go b/cmd/homelab-deploy/main.go index 71891a0..837014a 100644 --- a/cmd/homelab-deploy/main.go +++ b/cmd/homelab-deploy/main.go @@ -16,7 +16,7 @@ import ( "github.com/urfave/cli/v3" ) -const version = "0.1.10" +const version = "0.1.11" func main() { app := &cli.Command{ diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index c4e872b..bbb9535 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -53,9 +53,36 @@ func NewCollector(reg prometheus.Registerer) *Collector { reg.MustRegister(c.deploymentInProgress) reg.MustRegister(c.info) + c.initMetrics() + return c } +// initMetrics initializes all metric label combinations with zero values. +// This ensures metrics appear in Prometheus scrapes before any deployments occur. +func (c *Collector) initMetrics() { + actions := []messages.Action{ + messages.ActionSwitch, + messages.ActionBoot, + messages.ActionTest, + messages.ActionDryActivate, + } + + // Initialize deployment counter for common status/action combinations + for _, action := range actions { + // Successful completions (no error code) + c.deploymentsTotal.WithLabelValues("completed", string(action), "") + // Failed deployments (no error code - from RecordDeploymentEnd) + c.deploymentsTotal.WithLabelValues("failed", string(action), "") + } + + // Initialize histogram for all action/success combinations + for _, action := range actions { + c.deploymentDuration.WithLabelValues(string(action), "true") + c.deploymentDuration.WithLabelValues(string(action), "false") + } +} + // SetInfo sets the static instance metadata. func (c *Collector) SetInfo(hostname, tier, role, version string) { c.info.WithLabelValues(hostname, tier, role, version).Set(1) diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go index 3acc383..2fe7a74 100644 --- a/internal/metrics/metrics_test.go +++ b/internal/metrics/metrics_test.go @@ -62,11 +62,18 @@ homelab_deploy_deployment_in_progress 0 t.Errorf("unexpected in_progress metrics: %v", err) } - // Check counter incremented + // Check counter incremented (includes all pre-initialized metrics) counterExpected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 1 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected counter metrics: %v", err) @@ -83,7 +90,14 @@ func TestCollector_RecordDeploymentEnd_Failure(t *testing.T) { counterExpected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 1 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected counter metrics: %v", err) @@ -100,7 +114,15 @@ func TestCollector_RecordDeploymentFailure(t *testing.T) { counterExpected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 homelab_deploy_deployments_total{action="switch",error_code="build_failed",status="failed"} 1 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected counter metrics: %v", err) @@ -116,13 +138,140 @@ func TestCollector_RecordRejection(t *testing.T) { expected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 homelab_deploy_deployments_total{action="switch",error_code="already_running",status="rejected"} 1 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(expected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected metrics: %v", err) } } +func TestCollector_MetricsInitializedAtStartup(t *testing.T) { + reg := prometheus.NewRegistry() + _ = NewCollector(reg) + + // Verify counter metrics are initialized with zero values before any deployments + counterExpected := ` +# HELP homelab_deploy_deployments_total Total deployment requests processed +# TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 +` + if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { + t.Errorf("counter metrics not initialized: %v", err) + } + + // Verify histogram metrics are initialized with zero values before any deployments + histogramExpected := ` +# HELP homelab_deploy_deployment_duration_seconds Deployment execution time +# TYPE homelab_deploy_deployment_duration_seconds histogram +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0 +` + if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil { + t.Errorf("histogram metrics not initialized: %v", err) + } +} + func TestServer_StartShutdown(t *testing.T) { srv := NewServer(ServerConfig{ Addr: ":0", // Let OS pick a free port