Compare commits
4 Commits
master
...
fix/metric
| Author | SHA1 | Date | |
|---|---|---|---|
|
c272ce6903
|
|||
|
c934d1ba38
|
|||
|
723a1f769f
|
|||
|
46fc6a7e96
|
54
README.md
54
README.md
@@ -63,6 +63,8 @@ homelab-deploy listener \
|
|||||||
| `--discover-subject` | No | Discovery subject (default: `deploy.discover`) |
|
| `--discover-subject` | No | Discovery subject (default: `deploy.discover`) |
|
||||||
| `--metrics-enabled` | No | Enable Prometheus metrics endpoint |
|
| `--metrics-enabled` | No | Enable Prometheus metrics endpoint |
|
||||||
| `--metrics-addr` | No | Metrics HTTP server address (default: `:9972`) |
|
| `--metrics-addr` | No | Metrics HTTP server address (default: `:9972`) |
|
||||||
|
| `--heartbeat-interval` | No | Status update interval in seconds during deployment (default: 15) |
|
||||||
|
| `--debug` | No | Enable debug logging for troubleshooting |
|
||||||
|
|
||||||
#### Subject Templates
|
#### Subject Templates
|
||||||
|
|
||||||
@@ -214,6 +216,7 @@ Add the module to your NixOS configuration:
|
|||||||
| `metrics.enable` | bool | `false` | Enable Prometheus metrics endpoint |
|
| `metrics.enable` | bool | `false` | Enable Prometheus metrics endpoint |
|
||||||
| `metrics.address` | string | `":9972"` | Metrics HTTP server address |
|
| `metrics.address` | string | `":9972"` | Metrics HTTP server address |
|
||||||
| `metrics.openFirewall` | bool | `false` | Open firewall for metrics port |
|
| `metrics.openFirewall` | bool | `false` | Open firewall for metrics port |
|
||||||
|
| `extraArgs` | list of string | `[]` | Extra command line arguments (e.g., `["--debug"]`) |
|
||||||
|
|
||||||
Default `deploySubjects`:
|
Default `deploySubjects`:
|
||||||
```nix
|
```nix
|
||||||
@@ -298,6 +301,57 @@ histogram_quantile(0.95, rate(homelab_deploy_deployment_duration_seconds_bucket[
|
|||||||
sum(homelab_deploy_deployment_in_progress)
|
sum(homelab_deploy_deployment_in_progress)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Debug Logging
|
||||||
|
|
||||||
|
Enable debug logging to diagnose issues with deployments or metrics:
|
||||||
|
|
||||||
|
**CLI:**
|
||||||
|
```bash
|
||||||
|
homelab-deploy listener --debug \
|
||||||
|
--hostname myhost \
|
||||||
|
--tier prod \
|
||||||
|
--nats-url nats://nats.example.com:4222 \
|
||||||
|
--nkey-file /run/secrets/listener.nkey \
|
||||||
|
--flake-url git+https://git.example.com/user/nixos-configs.git \
|
||||||
|
--metrics-enabled
|
||||||
|
```
|
||||||
|
|
||||||
|
**NixOS module:**
|
||||||
|
```nix
|
||||||
|
services.homelab-deploy.listener = {
|
||||||
|
enable = true;
|
||||||
|
tier = "prod";
|
||||||
|
natsUrl = "nats://nats.example.com:4222";
|
||||||
|
nkeyFile = "/run/secrets/homelab-deploy-nkey";
|
||||||
|
flakeUrl = "git+https://git.example.com/user/nixos-configs.git";
|
||||||
|
metrics.enable = true;
|
||||||
|
extraArgs = [ "--debug" ];
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
With debug logging enabled, the listener outputs detailed information about metrics recording:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"level":"DEBUG","msg":"recording deployment start metric","metrics_enabled":true}
|
||||||
|
{"level":"DEBUG","msg":"recording deployment end metric (success)","action":"switch","success":true,"duration_seconds":120.5}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metrics Showing Zero
|
||||||
|
|
||||||
|
If deployment metrics remain at zero after deployments:
|
||||||
|
|
||||||
|
1. **Check metrics are enabled**: Verify `--metrics-enabled` is set and the metrics endpoint is accessible at `/metrics`
|
||||||
|
|
||||||
|
2. **Enable debug logging**: Use `--debug` to confirm metrics recording is being called
|
||||||
|
|
||||||
|
3. **Check deployment status**: Metrics are only recorded for deployments that complete (success or failure). Rejected requests (e.g., already running) increment the counter with `status="rejected"` but don't record duration
|
||||||
|
|
||||||
|
4. **Check after restart**: After a successful `switch` deployment, the listener restarts. Metrics reset to zero in the new instance. The listener waits up to 60 seconds for a Prometheus scrape before restarting to capture the final metrics
|
||||||
|
|
||||||
|
5. **Verify Prometheus scrape timing**: Ensure Prometheus scrapes frequently enough to capture metrics before the listener restarts
|
||||||
|
|
||||||
## Message Protocol
|
## Message Protocol
|
||||||
|
|
||||||
### Deploy Request
|
### Deploy Request
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
const version = "0.1.11"
|
const version = "0.1.14"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
app := &cli.Command{
|
app := &cli.Command{
|
||||||
@@ -42,6 +42,10 @@ func listenerCommand() *cli.Command {
|
|||||||
Name: "listener",
|
Name: "listener",
|
||||||
Usage: "Run as a deployment listener (systemd service mode)",
|
Usage: "Run as a deployment listener (systemd service mode)",
|
||||||
Flags: []cli.Flag{
|
Flags: []cli.Flag{
|
||||||
|
&cli.BoolFlag{
|
||||||
|
Name: "debug",
|
||||||
|
Usage: "Enable debug logging for troubleshooting",
|
||||||
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "hostname",
|
Name: "hostname",
|
||||||
Usage: "Hostname for this listener",
|
Usage: "Hostname for this listener",
|
||||||
@@ -125,10 +129,16 @@ func listenerCommand() *cli.Command {
|
|||||||
MetricsEnabled: c.Bool("metrics-enabled"),
|
MetricsEnabled: c.Bool("metrics-enabled"),
|
||||||
MetricsAddr: c.String("metrics-addr"),
|
MetricsAddr: c.String("metrics-addr"),
|
||||||
Version: version,
|
Version: version,
|
||||||
|
Debug: c.Bool("debug"),
|
||||||
|
}
|
||||||
|
|
||||||
|
logLevel := slog.LevelInfo
|
||||||
|
if c.Bool("debug") {
|
||||||
|
logLevel = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||||
Level: slog.LevelInfo,
|
Level: logLevel,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
l := listener.New(cfg, logger)
|
l := listener.New(cfg, logger)
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ type Config struct {
|
|||||||
MetricsEnabled bool
|
MetricsEnabled bool
|
||||||
MetricsAddr string
|
MetricsAddr string
|
||||||
Version string
|
Version string
|
||||||
|
Debug bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Listener handles deployment requests from NATS.
|
// Listener handles deployment requests from NATS.
|
||||||
@@ -203,7 +204,14 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
|
|
||||||
// Record deployment start for metrics
|
// Record deployment start for metrics
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment start metric",
|
||||||
|
"metrics_enabled", true,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentStart()
|
l.metrics.RecordDeploymentStart()
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment start metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
|
|
||||||
@@ -219,9 +227,19 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
messages.StatusFailed,
|
messages.StatusFailed,
|
||||||
fmt.Sprintf("revision validation failed: %v", err),
|
fmt.Sprintf("revision validation failed: %v", err),
|
||||||
).WithError(messages.ErrorInvalidRevision))
|
).WithError(messages.ErrorInvalidRevision))
|
||||||
|
duration := time.Since(startTime).Seconds()
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
duration := time.Since(startTime).Seconds()
|
l.logger.Debug("recording deployment failure metric (revision validation)",
|
||||||
|
"action", req.Action,
|
||||||
|
"error_code", messages.ErrorInvalidRevision,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentFailure(req.Action, messages.ErrorInvalidRevision, duration)
|
l.metrics.RecordDeploymentFailure(req.Action, messages.ErrorInvalidRevision, duration)
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment failure metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -265,11 +283,32 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
l.logger.Error("failed to flush completed response", "error", err)
|
l.logger.Error("failed to flush completed response", "error", err)
|
||||||
}
|
}
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment end metric (success)",
|
||||||
|
"action", req.Action,
|
||||||
|
"success", true,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentEnd(req.Action, true, duration)
|
l.metrics.RecordDeploymentEnd(req.Action, true, duration)
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment end metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// After a successful switch, signal restart so we pick up any new version
|
// After a successful switch, signal restart so we pick up any new version
|
||||||
if req.Action == messages.ActionSwitch {
|
if req.Action == messages.ActionSwitch {
|
||||||
|
// Wait for metrics scrape before restarting (if metrics enabled)
|
||||||
|
if l.metricsServer != nil {
|
||||||
|
l.logger.Info("waiting for metrics scrape before restart")
|
||||||
|
select {
|
||||||
|
case <-l.metricsServer.ScrapeCh():
|
||||||
|
l.logger.Info("metrics scraped, proceeding with restart")
|
||||||
|
case <-time.After(60 * time.Second):
|
||||||
|
l.logger.Warn("no metrics scrape within timeout, proceeding with restart anyway")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case l.restartCh <- struct{}{}:
|
case l.restartCh <- struct{}{}:
|
||||||
default:
|
default:
|
||||||
@@ -294,7 +333,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
|
|||||||
fmt.Sprintf("deployment failed (exit code %d): %s", result.ExitCode, result.Stderr),
|
fmt.Sprintf("deployment failed (exit code %d): %s", result.ExitCode, result.Stderr),
|
||||||
).WithError(errorCode))
|
).WithError(errorCode))
|
||||||
if l.metrics != nil {
|
if l.metrics != nil {
|
||||||
|
l.logger.Debug("recording deployment failure metric",
|
||||||
|
"action", req.Action,
|
||||||
|
"error_code", errorCode,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
l.metrics.RecordDeploymentFailure(req.Action, errorCode, duration)
|
l.metrics.RecordDeploymentFailure(req.Action, errorCode, duration)
|
||||||
|
} else {
|
||||||
|
l.logger.Debug("skipping deployment failure metric",
|
||||||
|
"metrics_enabled", false,
|
||||||
|
"duration_seconds", duration,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,8 +2,14 @@ package listener
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.t-juice.club/torjus/homelab-deploy/internal/messages"
|
||||||
|
"git.t-juice.club/torjus/homelab-deploy/internal/metrics"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestNew(t *testing.T) {
|
func TestNew(t *testing.T) {
|
||||||
@@ -51,3 +57,148 @@ func TestNew_WithLogger(t *testing.T) {
|
|||||||
t.Error("should use provided logger")
|
t.Error("should use provided logger")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNew_WithMetricsEnabled(t *testing.T) {
|
||||||
|
cfg := Config{
|
||||||
|
Hostname: "test-host",
|
||||||
|
Tier: "test",
|
||||||
|
MetricsEnabled: true,
|
||||||
|
MetricsAddr: ":0",
|
||||||
|
}
|
||||||
|
|
||||||
|
l := New(cfg, nil)
|
||||||
|
|
||||||
|
if l.metricsServer == nil {
|
||||||
|
t.Error("metricsServer should not be nil when MetricsEnabled is true")
|
||||||
|
}
|
||||||
|
if l.metrics == nil {
|
||||||
|
t.Error("metrics should not be nil when MetricsEnabled is true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListener_MetricsRecordedOnDeployment(t *testing.T) {
|
||||||
|
// This test verifies that the listener correctly calls metrics functions
|
||||||
|
// when processing deployments. We test this by directly calling the internal
|
||||||
|
// metrics recording logic that handleDeployRequest uses.
|
||||||
|
|
||||||
|
reg := prometheus.NewRegistry()
|
||||||
|
collector := metrics.NewCollector(reg)
|
||||||
|
|
||||||
|
// Simulate what handleDeployRequest does for a successful deployment
|
||||||
|
collector.RecordDeploymentStart()
|
||||||
|
collector.RecordDeploymentEnd(messages.ActionSwitch, true, 120.5)
|
||||||
|
|
||||||
|
// Verify counter was incremented
|
||||||
|
counterExpected := `
|
||||||
|
# HELP homelab_deploy_deployments_total Total deployment requests processed
|
||||||
|
# TYPE homelab_deploy_deployments_total counter
|
||||||
|
homelab_deploy_deployments_total{action="boot",error_code="",status="completed"} 0
|
||||||
|
homelab_deploy_deployments_total{action="boot",error_code="",status="failed"} 0
|
||||||
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="completed"} 0
|
||||||
|
homelab_deploy_deployments_total{action="dry-activate",error_code="",status="failed"} 0
|
||||||
|
homelab_deploy_deployments_total{action="switch",error_code="",status="completed"} 1
|
||||||
|
homelab_deploy_deployments_total{action="switch",error_code="",status="failed"} 0
|
||||||
|
homelab_deploy_deployments_total{action="test",error_code="",status="completed"} 0
|
||||||
|
homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
||||||
|
`
|
||||||
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
||||||
|
t.Errorf("unexpected counter metrics: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify histogram was updated (120.5 seconds falls into le="300" and higher buckets)
|
||||||
|
histogramExpected := `
|
||||||
|
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
|
||||||
|
# TYPE homelab_deploy_deployment_duration_seconds histogram
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 120.5
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
|
||||||
|
`
|
||||||
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
|
||||||
|
t.Errorf("unexpected histogram metrics: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -78,6 +78,103 @@ homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|||||||
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
||||||
t.Errorf("unexpected counter metrics: %v", err)
|
t.Errorf("unexpected counter metrics: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check histogram recorded the duration (120.5 seconds falls into le="300" and higher buckets)
|
||||||
|
histogramExpected := `
|
||||||
|
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
|
||||||
|
# TYPE homelab_deploy_deployment_duration_seconds histogram
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 120.5
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
|
||||||
|
`
|
||||||
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
|
||||||
|
t.Errorf("unexpected histogram metrics: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollector_RecordDeploymentEnd_Failure(t *testing.T) {
|
func TestCollector_RecordDeploymentEnd_Failure(t *testing.T) {
|
||||||
@@ -102,6 +199,103 @@ homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|||||||
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
||||||
t.Errorf("unexpected counter metrics: %v", err)
|
t.Errorf("unexpected counter metrics: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check histogram recorded the duration (60.0 seconds falls into le="60" and higher buckets)
|
||||||
|
histogramExpected := `
|
||||||
|
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
|
||||||
|
# TYPE homelab_deploy_deployment_duration_seconds histogram
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 60
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
|
||||||
|
`
|
||||||
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
|
||||||
|
t.Errorf("unexpected histogram metrics: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollector_RecordDeploymentFailure(t *testing.T) {
|
func TestCollector_RecordDeploymentFailure(t *testing.T) {
|
||||||
@@ -127,6 +321,103 @@ homelab_deploy_deployments_total{action="test",error_code="",status="failed"} 0
|
|||||||
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(counterExpected), "homelab_deploy_deployments_total"); err != nil {
|
||||||
t.Errorf("unexpected counter metrics: %v", err)
|
t.Errorf("unexpected counter metrics: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check histogram recorded the duration (300.0 seconds falls into le="300" and higher buckets)
|
||||||
|
histogramExpected := `
|
||||||
|
# HELP homelab_deploy_deployment_duration_seconds Deployment execution time
|
||||||
|
# TYPE homelab_deploy_deployment_duration_seconds histogram
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="boot",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="boot",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="dry-activate",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="dry-activate",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="300"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="600"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="900"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1200"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="1800"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="false",le="+Inf"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="false"} 300
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="false"} 1
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="switch",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="switch",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="switch",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="false",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="false"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="30"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="60"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="120"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="300"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="600"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="900"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1200"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="1800"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_bucket{action="test",success="true",le="+Inf"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_sum{action="test",success="true"} 0
|
||||||
|
homelab_deploy_deployment_duration_seconds_count{action="test",success="true"} 0
|
||||||
|
`
|
||||||
|
if err := testutil.GatherAndCompare(reg, strings.NewReader(histogramExpected), "homelab_deploy_deployment_duration_seconds"); err != nil {
|
||||||
|
t.Errorf("unexpected histogram metrics: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollector_RecordRejection(t *testing.T) {
|
func TestCollector_RecordRejection(t *testing.T) {
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ type Server struct {
|
|||||||
registry *prometheus.Registry
|
registry *prometheus.Registry
|
||||||
collector *Collector
|
collector *Collector
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
|
scrapeCh chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServer creates a new metrics server.
|
// NewServer creates a new metrics server.
|
||||||
@@ -35,9 +36,20 @@ func NewServer(cfg ServerConfig) *Server {
|
|||||||
registry := prometheus.NewRegistry()
|
registry := prometheus.NewRegistry()
|
||||||
collector := NewCollector(registry)
|
collector := NewCollector(registry)
|
||||||
|
|
||||||
mux := http.NewServeMux()
|
scrapeCh := make(chan struct{}, 1)
|
||||||
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
|
||||||
|
metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
||||||
Registry: registry,
|
Registry: registry,
|
||||||
|
})
|
||||||
|
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.Handle("/metrics", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
metricsHandler.ServeHTTP(w, r)
|
||||||
|
// Signal that a scrape occurred (non-blocking)
|
||||||
|
select {
|
||||||
|
case scrapeCh <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
}))
|
}))
|
||||||
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
@@ -53,6 +65,7 @@ func NewServer(cfg ServerConfig) *Server {
|
|||||||
registry: registry,
|
registry: registry,
|
||||||
collector: collector,
|
collector: collector,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
|
scrapeCh: scrapeCh,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,6 +74,11 @@ func (s *Server) Collector() *Collector {
|
|||||||
return s.collector
|
return s.collector
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
|
||||||
|
func (s *Server) ScrapeCh() <-chan struct{} {
|
||||||
|
return s.scrapeCh
|
||||||
|
}
|
||||||
|
|
||||||
// Start starts the HTTP server in a goroutine.
|
// Start starts the HTTP server in a goroutine.
|
||||||
func (s *Server) Start() error {
|
func (s *Server) Start() error {
|
||||||
s.logger.Info("starting metrics server", "addr", s.httpServer.Addr)
|
s.logger.Info("starting metrics server", "addr", s.httpServer.Addr)
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ let
|
|||||||
++ lib.optionals cfg.metrics.enable [
|
++ lib.optionals cfg.metrics.enable [
|
||||||
"--metrics-enabled"
|
"--metrics-enabled"
|
||||||
"--metrics-addr ${lib.escapeShellArg cfg.metrics.address}"
|
"--metrics-addr ${lib.escapeShellArg cfg.metrics.address}"
|
||||||
]);
|
]
|
||||||
|
++ cfg.extraArgs);
|
||||||
|
|
||||||
# Extract port from metrics address for firewall rule
|
# Extract port from metrics address for firewall rule
|
||||||
metricsPort = let
|
metricsPort = let
|
||||||
@@ -122,6 +123,13 @@ in
|
|||||||
description = "Open firewall for metrics port";
|
description = "Open firewall for metrics port";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extraArgs = lib.mkOption {
|
||||||
|
type = lib.types.listOf lib.types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Extra command line arguments to pass to the listener";
|
||||||
|
example = [ "--debug" ];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf cfg.enable {
|
||||||
|
|||||||
Reference in New Issue
Block a user