This repository has been archived on 2026-03-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
homelab-deploy/internal/metrics/server.go
Torjus Håkestad 46fc6a7e96 fix: wait for metrics scrape before restarting after switch deployment
After a successful switch deployment, the listener now waits for Prometheus
to scrape the /metrics endpoint before exiting for restart. This ensures
deployment metrics are captured before the process restarts and resets
in-memory counters. Falls back to a 60 second timeout if no scrape occurs.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 23:38:26 +01:00

103 lines
2.4 KiB
Go

package metrics
import (
"context"
"fmt"
"log/slog"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// ServerConfig holds configuration for the metrics server.
type ServerConfig struct {
Addr string
Logger *slog.Logger
}
// Server serves Prometheus metrics over HTTP.
type Server struct {
httpServer *http.Server
registry *prometheus.Registry
collector *Collector
logger *slog.Logger
scrapeCh chan struct{}
}
// NewServer creates a new metrics server.
func NewServer(cfg ServerConfig) *Server {
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
registry := prometheus.NewRegistry()
collector := NewCollector(registry)
scrapeCh := make(chan struct{}, 1)
metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
Registry: registry,
})
mux := http.NewServeMux()
mux.Handle("/metrics", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
metricsHandler.ServeHTTP(w, r)
// Signal that a scrape occurred (non-blocking)
select {
case scrapeCh <- struct{}{}:
default:
}
}))
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("ok"))
})
return &Server{
httpServer: &http.Server{
Addr: cfg.Addr,
Handler: mux,
ReadHeaderTimeout: 10 * time.Second,
},
registry: registry,
collector: collector,
logger: logger,
scrapeCh: scrapeCh,
}
}
// Collector returns the metrics collector.
func (s *Server) Collector() *Collector {
return s.collector
}
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
func (s *Server) ScrapeCh() <-chan struct{} {
return s.scrapeCh
}
// Start starts the HTTP server in a goroutine.
func (s *Server) Start() error {
s.logger.Info("starting metrics server", "addr", s.httpServer.Addr)
go func() {
if err := s.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
s.logger.Error("metrics server error", "error", err)
}
}()
return nil
}
// Shutdown gracefully shuts down the server.
func (s *Server) Shutdown(ctx context.Context) error {
s.logger.Info("shutting down metrics server")
if err := s.httpServer.Shutdown(ctx); err != nil {
return fmt.Errorf("failed to shutdown metrics server: %w", err)
}
return nil
}