After a successful switch deployment, the listener now waits for Prometheus to scrape the /metrics endpoint before exiting for restart. This ensures deployment metrics are captured before the process restarts and resets in-memory counters. Falls back to a 60 second timeout if no scrape occurs. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
103 lines
2.4 KiB
Go
103 lines
2.4 KiB
Go
package metrics
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
)
|
|
|
|
// ServerConfig holds configuration for the metrics server.
|
|
type ServerConfig struct {
|
|
Addr string
|
|
Logger *slog.Logger
|
|
}
|
|
|
|
// Server serves Prometheus metrics over HTTP.
|
|
type Server struct {
|
|
httpServer *http.Server
|
|
registry *prometheus.Registry
|
|
collector *Collector
|
|
logger *slog.Logger
|
|
scrapeCh chan struct{}
|
|
}
|
|
|
|
// NewServer creates a new metrics server.
|
|
func NewServer(cfg ServerConfig) *Server {
|
|
logger := cfg.Logger
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
|
|
registry := prometheus.NewRegistry()
|
|
collector := NewCollector(registry)
|
|
|
|
scrapeCh := make(chan struct{})
|
|
|
|
metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
|
Registry: registry,
|
|
})
|
|
|
|
mux := http.NewServeMux()
|
|
mux.Handle("/metrics", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
metricsHandler.ServeHTTP(w, r)
|
|
// Signal that a scrape occurred (non-blocking)
|
|
select {
|
|
case scrapeCh <- struct{}{}:
|
|
default:
|
|
}
|
|
}))
|
|
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte("ok"))
|
|
})
|
|
|
|
return &Server{
|
|
httpServer: &http.Server{
|
|
Addr: cfg.Addr,
|
|
Handler: mux,
|
|
ReadHeaderTimeout: 10 * time.Second,
|
|
},
|
|
registry: registry,
|
|
collector: collector,
|
|
logger: logger,
|
|
scrapeCh: scrapeCh,
|
|
}
|
|
}
|
|
|
|
// Collector returns the metrics collector.
|
|
func (s *Server) Collector() *Collector {
|
|
return s.collector
|
|
}
|
|
|
|
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
|
|
func (s *Server) ScrapeCh() <-chan struct{} {
|
|
return s.scrapeCh
|
|
}
|
|
|
|
// Start starts the HTTP server in a goroutine.
|
|
func (s *Server) Start() error {
|
|
s.logger.Info("starting metrics server", "addr", s.httpServer.Addr)
|
|
|
|
go func() {
|
|
if err := s.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
s.logger.Error("metrics server error", "error", err)
|
|
}
|
|
}()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Shutdown gracefully shuts down the server.
|
|
func (s *Server) Shutdown(ctx context.Context) error {
|
|
s.logger.Info("shutting down metrics server")
|
|
if err := s.httpServer.Shutdown(ctx); err != nil {
|
|
return fmt.Errorf("failed to shutdown metrics server: %w", err)
|
|
}
|
|
return nil
|
|
}
|