Compare commits

...

2 Commits

Author SHA1 Message Date
277a49a666 chore: update flake inputs
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-09 15:44:18 +01:00
bc02393c5a fix: wait for metrics scrape before restarting after switch deployment
After a successful switch deployment, the listener now waits for Prometheus
to scrape the /metrics endpoint before exiting for restart. This ensures
deployment metrics are captured before the process restarts and resets
in-memory counters. Falls back to a 60 second timeout if no scrape occurs.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-09 15:44:14 +01:00
4 changed files with 35 additions and 6 deletions

View File

@@ -16,7 +16,7 @@ import (
"github.com/urfave/cli/v3"
)
const version = "0.1.11"
const version = "0.1.13"
func main() {
app := &cli.Command{

6
flake.lock generated
View File

@@ -2,11 +2,11 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1770197578,
"narHash": "sha256-AYqlWrX09+HvGs8zM6ebZ1pwUqjkfpnv8mewYwAo+iM=",
"lastModified": 1770562336,
"narHash": "sha256-ub1gpAONMFsT/GU2hV6ZWJjur8rJ6kKxdm9IlCT0j84=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "00c21e4c93d963c50d4c0c89bfa84ed6e0694df2",
"rev": "d6c71932130818840fc8fe9509cf50be8c64634f",
"type": "github"
},
"original": {

View File

@@ -270,6 +270,17 @@ func (l *Listener) handleDeployRequest(subject string, data []byte) {
// After a successful switch, signal restart so we pick up any new version
if req.Action == messages.ActionSwitch {
// Wait for metrics scrape before restarting (if metrics enabled)
if l.metricsServer != nil {
l.logger.Info("waiting for metrics scrape before restart")
select {
case <-l.metricsServer.ScrapeCh():
l.logger.Info("metrics scraped, proceeding with restart")
case <-time.After(60 * time.Second):
l.logger.Warn("no metrics scrape within timeout, proceeding with restart anyway")
}
}
select {
case l.restartCh <- struct{}{}:
default:

View File

@@ -23,6 +23,7 @@ type Server struct {
registry *prometheus.Registry
collector *Collector
logger *slog.Logger
scrapeCh chan struct{}
}
// NewServer creates a new metrics server.
@@ -35,9 +36,20 @@ func NewServer(cfg ServerConfig) *Server {
registry := prometheus.NewRegistry()
collector := NewCollector(registry)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
scrapeCh := make(chan struct{})
metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
Registry: registry,
})
mux := http.NewServeMux()
mux.Handle("/metrics", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
metricsHandler.ServeHTTP(w, r)
// Signal that a scrape occurred (non-blocking)
select {
case scrapeCh <- struct{}{}:
default:
}
}))
mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
@@ -53,6 +65,7 @@ func NewServer(cfg ServerConfig) *Server {
registry: registry,
collector: collector,
logger: logger,
scrapeCh: scrapeCh,
}
}
@@ -61,6 +74,11 @@ func (s *Server) Collector() *Collector {
return s.collector
}
// ScrapeCh returns a channel that receives a signal each time the metrics endpoint is scraped.
func (s *Server) ScrapeCh() <-chan struct{} {
return s.scrapeCh
}
// Start starts the HTTP server in a goroutine.
func (s *Server) Start() error {
s.logger.Info("starting metrics server", "addr", s.httpServer.Addr)