fix: wait for metrics scrape before restarting after switch deployment

After a successful switch deployment, the listener now waits for Prometheus
to scrape the /metrics endpoint before exiting for restart. This ensures
deployment metrics are captured before the process restarts and resets
in-memory counters. Falls back to a 60 second timeout if no scrape occurs.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 23:38:26 +01:00
parent 746e30b24f
commit bc02393c5a
3 changed files with 32 additions and 3 deletions

View File

@@ -270,6 +270,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
// After a successful switch, signal restart so we pick up any new version
if req.Action == messages.ActionSwitch {
// Wait for metrics scrape before restarting (if metrics enabled)
if l.metricsServer != nil {
l.logger.Info("waiting for metrics scrape before restart")
select {
case <-l.metricsServer.ScrapeCh():
l.logger.Info("metrics scraped, proceeding with restart")
case <-time.After(60 * time.Second):
l.logger.Warn("no metrics scrape within timeout, proceeding with restart anyway")
}
}
select {
case l.restartCh <- struct{}{}:
default:

View File

@@ -23,6 +23,7 @@ type Server struct {
registry *prometheus.Registry
collector *Collector
logger *slog.Logger
scrapeCh chan struct{}
}
// NewServer creates a new metrics server.
@@ -35,9 +36,20 @@ func NewServer(cfg ServerConfig) *Server {
registry := prometheus.NewRegistry()
collector := NewCollector(registry)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
scrapeCh := make(chan struct{})
metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
Registry: registry,
})
mux := http.NewServeMux()
mux.Handle("/metrics", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
metricsHandler.ServeHTTP(w, r)
// Signal that a scrape occurred (non-blocking)
select {
case scrapeCh <- struct{}{}:
default:
}
}))
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
@@ -53,6 +65,7 @@ func NewServer(cfg ServerConfig) *Server {
registry: registry,
collector: collector,
logger: logger,
scrapeCh: scrapeCh,
}
}
@@ -61,6 +74,11 @@ func (s *Server) Collector() *Collector {
return s.collector
}
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
func (s *Server) ScrapeCh() <-chan struct{} {
return s.scrapeCh
}
// Start starts the HTTP server in a goroutine.
func (s *Server) Start() error {
s.logger.Info("starting metrics server", "addr", s.httpServer.Addr)