// Package metrics provides Prometheus metrics for the homelab-deploy listener. package metrics import ( "code.t-juice.club/torjus/homelab-deploy/internal/messages" "github.com/prometheus/client_golang/prometheus" ) // Collector holds all Prometheus metrics for the listener. type Collector struct { deploymentsTotal *prometheus.CounterVec deploymentDuration *prometheus.HistogramVec deploymentInProgress prometheus.Gauge info *prometheus.GaugeVec } // NewCollector creates a new metrics collector and registers it with the given registerer. func NewCollector(reg prometheus.Registerer) *Collector { c := &Collector{ deploymentsTotal: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "homelab_deploy_deployments_total", Help: "Total deployment requests processed", }, []string{"status", "action", "error_code"}, ), deploymentDuration: prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "homelab_deploy_deployment_duration_seconds", Help: "Deployment execution time", // Bucket boundaries for typical NixOS build times Buckets: []float64{30, 60, 120, 300, 600, 900, 1200, 1800}, }, []string{"action", "success"}, ), deploymentInProgress: prometheus.NewGauge( prometheus.GaugeOpts{ Name: "homelab_deploy_deployment_in_progress", Help: "1 if deployment running, 0 otherwise", }, ), info: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "homelab_deploy_info", Help: "Static instance metadata", }, []string{"hostname", "tier", "role", "version"}, ), } reg.MustRegister(c.deploymentsTotal) reg.MustRegister(c.deploymentDuration) reg.MustRegister(c.deploymentInProgress) reg.MustRegister(c.info) c.initMetrics() return c } // initMetrics initializes all metric label combinations with zero values. // This ensures metrics appear in Prometheus scrapes before any deployments occur. func (c *Collector) initMetrics() { actions := []messages.Action{ messages.ActionSwitch, messages.ActionBoot, messages.ActionTest, messages.ActionDryActivate, } // Initialize deployment counter for common status/action combinations for _, action := range actions { // Successful completions (no error code) c.deploymentsTotal.WithLabelValues("completed", string(action), "") // Failed deployments (no error code - from RecordDeploymentEnd) c.deploymentsTotal.WithLabelValues("failed", string(action), "") } // Initialize histogram for all action/success combinations for _, action := range actions { c.deploymentDuration.WithLabelValues(string(action), "true") c.deploymentDuration.WithLabelValues(string(action), "false") } } // SetInfo sets the static instance metadata. func (c *Collector) SetInfo(hostname, tier, role, version string) { c.info.WithLabelValues(hostname, tier, role, version).Set(1) } // RecordDeploymentStart marks the start of a deployment. func (c *Collector) RecordDeploymentStart() { c.deploymentInProgress.Set(1) } // RecordDeploymentEnd records the completion of a deployment. func (c *Collector) RecordDeploymentEnd(action messages.Action, success bool, durationSeconds float64) { c.deploymentInProgress.Set(0) successLabel := "false" if success { successLabel = "true" } c.deploymentDuration.WithLabelValues(string(action), successLabel).Observe(durationSeconds) status := "completed" if !success { status = "failed" } c.deploymentsTotal.WithLabelValues(status, string(action), "").Inc() } // RecordDeploymentFailure records a deployment failure with an error code. func (c *Collector) RecordDeploymentFailure(action messages.Action, errorCode messages.ErrorCode, durationSeconds float64) { c.deploymentInProgress.Set(0) c.deploymentDuration.WithLabelValues(string(action), "false").Observe(durationSeconds) c.deploymentsTotal.WithLabelValues("failed", string(action), string(errorCode)).Inc() } // RecordRejection records a rejected deployment request. func (c *Collector) RecordRejection(action messages.Action, errorCode messages.ErrorCode) { c.deploymentsTotal.WithLabelValues("rejected", string(action), string(errorCode)).Inc() }