This repository has been archived on 2026-03-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
homelab-deploy/internal/metrics/metrics.go
Torjus Håkestad 746e30b24f fix: initialize counter and histogram metrics at startup
Counter and histogram metrics were absent from Prometheus scrapes until
the first deployment occurred, making it impossible to distinguish
"no deployments" from "exporter not running" in dashboards and alerts.

Initialize all expected label combinations with zero values when the
collector is created so metrics appear in every scrape from startup.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 21:29:36 +01:00

126 lines
4.0 KiB
Go

// Package metrics provides Prometheus metrics for the homelab-deploy listener.
package metrics
import (
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
"github.com/prometheus/client_golang/prometheus"
)
// Collector holds all Prometheus metrics for the listener.
type Collector struct {
deploymentsTotal *prometheus.CounterVec
deploymentDuration *prometheus.HistogramVec
deploymentInProgress prometheus.Gauge
info *prometheus.GaugeVec
}
// NewCollector creates a new metrics collector and registers it with the given registerer.
func NewCollector(reg prometheus.Registerer) *Collector {
c := &Collector{
deploymentsTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "homelab_deploy_deployments_total",
Help: "Total deployment requests processed",
},
[]string{"status", "action", "error_code"},
),
deploymentDuration: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "homelab_deploy_deployment_duration_seconds",
Help: "Deployment execution time",
// Bucket boundaries for typical NixOS build times
Buckets: []float64{30, 60, 120, 300, 600, 900, 1200, 1800},
},
[]string{"action", "success"},
),
deploymentInProgress: prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "homelab_deploy_deployment_in_progress",
Help: "1 if deployment running, 0 otherwise",
},
),
info: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "homelab_deploy_info",
Help: "Static instance metadata",
},
[]string{"hostname", "tier", "role", "version"},
),
}
reg.MustRegister(c.deploymentsTotal)
reg.MustRegister(c.deploymentDuration)
reg.MustRegister(c.deploymentInProgress)
reg.MustRegister(c.info)
c.initMetrics()
return c
}
// initMetrics initializes all metric label combinations with zero values.
// This ensures metrics appear in Prometheus scrapes before any deployments occur.
func (c *Collector) initMetrics() {
actions := []messages.Action{
messages.ActionSwitch,
messages.ActionBoot,
messages.ActionTest,
messages.ActionDryActivate,
}
// Initialize deployment counter for common status/action combinations
for _, action := range actions {
// Successful completions (no error code)
c.deploymentsTotal.WithLabelValues("completed", string(action), "")
// Failed deployments (no error code - from RecordDeploymentEnd)
c.deploymentsTotal.WithLabelValues("failed", string(action), "")
}
// Initialize histogram for all action/success combinations
for _, action := range actions {
c.deploymentDuration.WithLabelValues(string(action), "true")
c.deploymentDuration.WithLabelValues(string(action), "false")
}
}
// SetInfo sets the static instance metadata.
func (c *Collector) SetInfo(hostname, tier, role, version string) {
c.info.WithLabelValues(hostname, tier, role, version).Set(1)
}
// RecordDeploymentStart marks the start of a deployment.
func (c *Collector) RecordDeploymentStart() {
c.deploymentInProgress.Set(1)
}
// RecordDeploymentEnd records the completion of a deployment.
func (c *Collector) RecordDeploymentEnd(action messages.Action, success bool, durationSeconds float64) {
c.deploymentInProgress.Set(0)
successLabel := "false"
if success {
successLabel = "true"
}
c.deploymentDuration.WithLabelValues(string(action), successLabel).Observe(durationSeconds)
status := "completed"
if !success {
status = "failed"
}
c.deploymentsTotal.WithLabelValues(status, string(action), "").Inc()
}
// RecordDeploymentFailure records a deployment failure with an error code.
func (c *Collector) RecordDeploymentFailure(action messages.Action, errorCode messages.ErrorCode, durationSeconds float64) {
c.deploymentInProgress.Set(0)
c.deploymentDuration.WithLabelValues(string(action), "false").Observe(durationSeconds)
c.deploymentsTotal.WithLabelValues("failed", string(action), string(errorCode)).Inc()
}
// RecordRejection records a rejected deployment request.
func (c *Collector) RecordRejection(action messages.Action, errorCode messages.ErrorCode) {
c.deploymentsTotal.WithLabelValues("rejected", string(action), string(errorCode)).Inc()
}