Add an optional Prometheus metrics HTTP endpoint to the listener for monitoring deployment operations. Includes four metrics: - homelab_deploy_deployments_total (counter with status/action/error_code) - homelab_deploy_deployment_duration_seconds (histogram with action/success) - homelab_deploy_deployment_in_progress (gauge) - homelab_deploy_info (gauge with hostname/tier/role/version) New CLI flags: --metrics-enabled, --metrics-addr (default :9972) New NixOS options: metrics.enable, metrics.address, metrics.openFirewall Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
99 lines
3.1 KiB
Go
99 lines
3.1 KiB
Go
// Package metrics provides Prometheus metrics for the homelab-deploy listener.
|
|
package metrics
|
|
|
|
import (
|
|
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
// Collector holds all Prometheus metrics for the listener.
|
|
type Collector struct {
|
|
deploymentsTotal *prometheus.CounterVec
|
|
deploymentDuration *prometheus.HistogramVec
|
|
deploymentInProgress prometheus.Gauge
|
|
info *prometheus.GaugeVec
|
|
}
|
|
|
|
// NewCollector creates a new metrics collector and registers it with the given registerer.
|
|
func NewCollector(reg prometheus.Registerer) *Collector {
|
|
c := &Collector{
|
|
deploymentsTotal: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "homelab_deploy_deployments_total",
|
|
Help: "Total deployment requests processed",
|
|
},
|
|
[]string{"status", "action", "error_code"},
|
|
),
|
|
deploymentDuration: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "homelab_deploy_deployment_duration_seconds",
|
|
Help: "Deployment execution time",
|
|
// Bucket boundaries for typical NixOS build times
|
|
Buckets: []float64{30, 60, 120, 300, 600, 900, 1200, 1800},
|
|
},
|
|
[]string{"action", "success"},
|
|
),
|
|
deploymentInProgress: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "homelab_deploy_deployment_in_progress",
|
|
Help: "1 if deployment running, 0 otherwise",
|
|
},
|
|
),
|
|
info: prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "homelab_deploy_info",
|
|
Help: "Static instance metadata",
|
|
},
|
|
[]string{"hostname", "tier", "role", "version"},
|
|
),
|
|
}
|
|
|
|
reg.MustRegister(c.deploymentsTotal)
|
|
reg.MustRegister(c.deploymentDuration)
|
|
reg.MustRegister(c.deploymentInProgress)
|
|
reg.MustRegister(c.info)
|
|
|
|
return c
|
|
}
|
|
|
|
// SetInfo sets the static instance metadata.
|
|
func (c *Collector) SetInfo(hostname, tier, role, version string) {
|
|
c.info.WithLabelValues(hostname, tier, role, version).Set(1)
|
|
}
|
|
|
|
// RecordDeploymentStart marks the start of a deployment.
|
|
func (c *Collector) RecordDeploymentStart() {
|
|
c.deploymentInProgress.Set(1)
|
|
}
|
|
|
|
// RecordDeploymentEnd records the completion of a deployment.
|
|
func (c *Collector) RecordDeploymentEnd(action messages.Action, success bool, durationSeconds float64) {
|
|
c.deploymentInProgress.Set(0)
|
|
|
|
successLabel := "false"
|
|
if success {
|
|
successLabel = "true"
|
|
}
|
|
|
|
c.deploymentDuration.WithLabelValues(string(action), successLabel).Observe(durationSeconds)
|
|
|
|
status := "completed"
|
|
if !success {
|
|
status = "failed"
|
|
}
|
|
|
|
c.deploymentsTotal.WithLabelValues(status, string(action), "").Inc()
|
|
}
|
|
|
|
// RecordDeploymentFailure records a deployment failure with an error code.
|
|
func (c *Collector) RecordDeploymentFailure(action messages.Action, errorCode messages.ErrorCode, durationSeconds float64) {
|
|
c.deploymentInProgress.Set(0)
|
|
c.deploymentDuration.WithLabelValues(string(action), "false").Observe(durationSeconds)
|
|
c.deploymentsTotal.WithLabelValues("failed", string(action), string(errorCode)).Inc()
|
|
}
|
|
|
|
// RecordRejection records a rejected deployment request.
|
|
func (c *Collector) RecordRejection(action messages.Action, errorCode messages.ErrorCode) {
|
|
c.deploymentsTotal.WithLabelValues("rejected", string(action), string(errorCode)).Inc()
|
|
}
|