diff --git a/cmd/homelab-deploy/main.go b/cmd/homelab-deploy/main.go index 71891a0..837014a 100644 --- a/cmd/homelab-deploy/main.go +++ b/cmd/homelab-deploy/main.go @@ -16,7 +16,7 @@ import ( "github.com/urfave/cli/v3" ) -const version = "0.1.10" +const version = "0.1.11" func main() { app := &cli.Command{ diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index c4e872b..bbb9535 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -53,9 +53,36 @@ func NewCollector(reg prometheus.Registerer) *Collector { reg.MustRegister(c.deploymentInProgress) reg.MustRegister(c.info) + c.initMetrics() + return c } +// initMetrics initializes all metric label combinations with zero values. +// This ensures metrics appear in Prometheus scrapes before any deployments occur. +func (c *Collector) initMetrics() { + actions := []messages.Action{ + messages.ActionSwitch, + messages.ActionBoot, + messages.ActionTest, + messages.ActionDryActivate, + } + + // Initialize deployment counter for common status/action combinations + for _, action := range actions { + // Successful completions (no error code) + c.deploymentsTotal.WithLabelValues("completed", string(action), "") + // Failed deployments (no error code - from RecordDeploymentEnd) + c.deploymentsTotal.WithLabelValues("failed", string(action), "") + } + + // Initialize histogram for all action/success combinations + for _, action := range actions { + c.deploymentDuration.WithLabelValues(string(action), "true") + c.deploymentDuration.WithLabelValues(string(action), "false") + } +} + // SetInfo sets the static instance metadata. func (c *Collector) SetInfo(hostname, tier, role, version string) { c.info.WithLabelValues(hostname, tier, role, version).Set(1) diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go index 3acc383..2fe7a74 100644 --- a/internal/metrics/metrics_test.go +++ b/internal/metrics/metrics_test.go @@ -62,11 +62,18 @@ homelab_deploy_deployment_in_progress 0 t.Errorf("unexpected in_progress metrics: %v", err) } - // Check counter incremented + // Check counter incremented (includes all pre-initialized metrics) counterExpected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 1 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected counter metrics: %v", err) @@ -83,7 +90,14 @@ func TestCollector_RecordDeploymentEnd_Failure(t *testing.T) { counterExpected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 1 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected counter metrics: %v", err) @@ -100,7 +114,15 @@ func TestCollector_RecordDeploymentFailure(t *testing.T) { counterExpected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 homelab_deploy_deployments_total{action="switch",error_code="build_failed",status="failed"} 1 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected counter metrics: %v", err) @@ -116,13 +138,140 @@ func TestCollector_RecordRejection(t *testing.T) { expected := ` # HELP homelab_deploy_deployments_total Total deployment requests processed # TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 homelab_deploy_deployments_total{action="switch",error_code="already_running",status="rejected"} 1 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(expected), "homelab_deploy_deployments_total"); err != nil { t.Errorf("unexpected metrics: %v", err) } } +func TestCollector_MetricsInitializedAtStartup(t *testing.T) { + reg := prometheus.NewRegistry() + _ = NewCollector(reg) + + // Verify counter metrics are initialized with zero values before any deployments + counterExpected := ` +# HELP homelab_deploy_deployments_total Total deployment requests processed +# TYPE homelab_deploy_deployments_total counter +homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0 +homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0 +` + if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil { + t.Errorf("counter metrics not initialized: %v", err) + } + + // Verify histogram metrics are initialized with zero values before any deployments + histogramExpected := ` +# HELP homelab_deploy_deployment_duration_seconds Deployment execution time +# TYPE homelab_deploy_deployment_duration_seconds histogram +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0 +homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0 +homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0 +homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0 +homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0 +` + if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil { + t.Errorf("histogram metrics not initialized: %v", err) + } +} + func TestServer_StartShutdown(t *testing.T) { srv := NewServer(ServerConfig{ Addr: ":0", // Let OS pick a free port