Counter and histogram metrics were absent from Prometheus scrapes until the first deployment occurred, making it impossible to distinguish "no deployments" from "exporter not running" in dashboards and alerts. Initialize all expected label combinations with zero values when the collector is created so metrics appear in every scrape from startup. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
360 lines
18 KiB
Go
360 lines
18 KiB
Go
package metrics
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/testutil"
|
|
)
|
|
|
|
func TestCollector_SetInfo(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
c := NewCollector(reg)
|
|
|
|
c.SetInfo("testhost", "test", "web", "1.0.0")
|
|
|
|
expected := `
|
|
# HELP homelab_deploy_info Static instance metadata
|
|
# TYPE homelab_deploy_info gauge
|
|
homelab_deploy_info{hostname="testhost",role="web",tier="test",version="1.0.0"} 1
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(expected), "homelab_deploy_info"); err != nil {
|
|
t.Errorf("unexpected metrics: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollector_RecordDeploymentStart(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
c := NewCollector(reg)
|
|
|
|
c.RecordDeploymentStart()
|
|
|
|
expected := `
|
|
# HELP homelab_deploy_deployment_in_progress 1 if deployment running, 0 otherwise
|
|
# TYPE homelab_deploy_deployment_in_progress gauge
|
|
homelab_deploy_deployment_in_progress 1
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(expected), "homelab_deploy_deployment_in_progress"); err != nil {
|
|
t.Errorf("unexpected metrics: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollector_RecordDeploymentEnd_Success(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
c := NewCollector(reg)
|
|
|
|
c.RecordDeploymentStart()
|
|
c.RecordDeploymentEnd(messages.ActionSwitch, true, 120.5)
|
|
|
|
// Check in_progress is 0
|
|
inProgressExpected := `
|
|
# HELP homelab_deploy_deployment_in_progress 1 if deployment running, 0 otherwise
|
|
# TYPE homelab_deploy_deployment_in_progress gauge
|
|
homelab_deploy_deployment_in_progress 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(inProgressExpected), "homelab_deploy_deployment_in_progress"); err != nil {
|
|
t.Errorf("unexpected in_progress metrics: %v", err)
|
|
}
|
|
|
|
// Check counter incremented (includes all pre-initialized metrics)
|
|
counterExpected := `
|
|
# HELP homelab_deploy_deployments_total Total deployment requests processed
|
|
# TYPE homelab_deploy_deployments_total counter
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 1
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
|
t.Errorf("unexpected counter metrics: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollector_RecordDeploymentEnd_Failure(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
c := NewCollector(reg)
|
|
|
|
c.RecordDeploymentStart()
|
|
c.RecordDeploymentEnd(messages.ActionBoot, false, 60.0)
|
|
|
|
counterExpected := `
|
|
# HELP homelab_deploy_deployments_total Total deployment requests processed
|
|
# TYPE homelab_deploy_deployments_total counter
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 1
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
|
t.Errorf("unexpected counter metrics: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollector_RecordDeploymentFailure(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
c := NewCollector(reg)
|
|
|
|
c.RecordDeploymentStart()
|
|
c.RecordDeploymentFailure(messages.ActionSwitch, messages.ErrorBuildFailed, 300.0)
|
|
|
|
counterExpected := `
|
|
# HELP homelab_deploy_deployments_total Total deployment requests processed
|
|
# TYPE homelab_deploy_deployments_total counter
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="build_failed",status="failed"} 1
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
|
t.Errorf("unexpected counter metrics: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollector_RecordRejection(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
c := NewCollector(reg)
|
|
|
|
c.RecordRejection(messages.ActionSwitch, messages.ErrorAlreadyRunning)
|
|
|
|
expected := `
|
|
# HELP homelab_deploy_deployments_total Total deployment requests processed
|
|
# TYPE homelab_deploy_deployments_total counter
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="already_running",status="rejected"} 1
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(expected), "homelab_deploy_deployments_total"); err != nil {
|
|
t.Errorf("unexpected metrics: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollector_MetricsInitializedAtStartup(t *testing.T) {
|
|
reg := prometheus.NewRegistry()
|
|
_ = NewCollector(reg)
|
|
|
|
// Verify counter metrics are initialized with zero values before any deployments
|
|
counterExpected := `
|
|
# HELP homelab_deploy_deployments_total Total deployment requests processed
|
|
# TYPE homelab_deploy_deployments_total counter
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
|
|
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
|
t.Errorf("counter metrics not initialized: %v", err)
|
|
}
|
|
|
|
// Verify histogram metrics are initialized with zero values before any deployments
|
|
histogramExpected := `
|
|
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
|
|
# TYPE homelab_deploy_deployment_duration_seconds histogram
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
|
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
|
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
|
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
|
|
`
|
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
|
|
t.Errorf("histogram metrics not initialized: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestServer_StartShutdown(t *testing.T) {
|
|
srv := NewServer(ServerConfig{
|
|
Addr: ":0", // Let OS pick a free port
|
|
})
|
|
|
|
if err := srv.Start(); err != nil {
|
|
t.Fatalf("failed to start server: %v", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
if err := srv.Shutdown(ctx); err != nil {
|
|
t.Errorf("failed to shutdown server: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestServer_Endpoints(t *testing.T) {
|
|
srv := NewServer(ServerConfig{
|
|
Addr: "127.0.0.1:19972", // Use a fixed port for testing
|
|
})
|
|
|
|
if err := srv.Start(); err != nil {
|
|
t.Fatalf("failed to start server: %v", err)
|
|
}
|
|
|
|
defer func() {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
_ = srv.Shutdown(ctx)
|
|
}()
|
|
|
|
// Give server time to start
|
|
time.Sleep(50 * time.Millisecond)
|
|
|
|
t.Run("health endpoint", func(t *testing.T) {
|
|
resp, err := http.Get("http://127.0.0.1:19972/health")
|
|
if err != nil {
|
|
t.Fatalf("failed to get health endpoint: %v", err)
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Errorf("expected status 200, got %d", resp.StatusCode)
|
|
}
|
|
|
|
body, _ := io.ReadAll(resp.Body)
|
|
if string(body) != "ok" {
|
|
t.Errorf("expected body 'ok', got %q", string(body))
|
|
}
|
|
})
|
|
|
|
t.Run("metrics endpoint", func(t *testing.T) {
|
|
// Set some info to have metrics to display
|
|
srv.Collector().SetInfo("testhost", "test", "web", "1.0.0")
|
|
|
|
resp, err := http.Get("http://127.0.0.1:19972/metrics")
|
|
if err != nil {
|
|
t.Fatalf("failed to get metrics endpoint: %v", err)
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Errorf("expected status 200, got %d", resp.StatusCode)
|
|
}
|
|
|
|
body, _ := io.ReadAll(resp.Body)
|
|
bodyStr := string(body)
|
|
|
|
if !strings.Contains(bodyStr, "homelab_deploy_info") {
|
|
t.Error("expected metrics to contain homelab_deploy_info")
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestServer_Collector(t *testing.T) {
|
|
srv := NewServer(ServerConfig{
|
|
Addr: ":0",
|
|
})
|
|
|
|
collector := srv.Collector()
|
|
if collector == nil {
|
|
t.Error("expected non-nil collector")
|
|
}
|
|
}
|