feat: add NATS cache sharing and smart cache refresh

Add two complementary features to reduce remote revision cache staleness:

1. Smart local cache: When current system revision matches cached remote
   revision, force an immediate cache refresh to check for newer revisions.

2. NATS integration: Share cache updates across hosts via NATS pub/sub.
   Hosts publish revision updates when they fetch new data, and subscribe
   to receive updates from other hosts. Features include:
   - Auto-reconnect with infinite retries
   - Graceful fallback when NATS unavailable
   - Filtering by flake URL and hostname

New CLI flags:
  --flake.nats.enable
  --flake.nats.url
  --flake.nats.subject
  --flake.nats.credentials-file

New NixOS module options under services.prometheus.exporters.nixos.flake.nats

Bumps version to 0.3.0.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 23:05:52 +01:00
parent cf0ce85899
commit 5cc0e7eadd
7 changed files with 315 additions and 18 deletions

26
main.go
View File

@@ -15,7 +15,7 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const version = "0.2.3"
const version = "0.3.0"
func main() {
cfg, err := config.Parse()
@@ -30,10 +30,23 @@ func main() {
slog.Info("Registered generation collector")
// Register flake collector if enabled
var flakeCollector *collector.FlakeCollector
if cfg.FlakeCollector {
flakeCollector := collector.NewFlakeCollector(cfg.FlakeURL, cfg.FlakeCheckInterval)
var err error
flakeCollector, err = collector.NewFlakeCollectorWithNATS(collector.FlakeCollectorConfig{
FlakeURL: cfg.FlakeURL,
CheckInterval: cfg.FlakeCheckInterval,
NATSEnabled: cfg.FlakeNATSEnable,
NATSURL: cfg.FlakeNATSURL,
NATSSubject: cfg.FlakeNATSSubject,
NATSCredentialsFile: cfg.FlakeNATSCredentialsFile,
})
if err != nil {
slog.Error("Failed to create flake collector", "error", err)
os.Exit(1)
}
prometheus.MustRegister(flakeCollector)
slog.Info("Registered flake collector", "url", cfg.FlakeURL, "check_interval", cfg.FlakeCheckInterval)
slog.Info("Registered flake collector", "url", cfg.FlakeURL, "check_interval", cfg.FlakeCheckInterval, "nats_enabled", cfg.FlakeNATSEnable)
}
mux := http.NewServeMux()
@@ -71,6 +84,13 @@ func main() {
<-ctx.Done()
slog.Info("Shutting down server")
// Close flake collector (NATS connection)
if flakeCollector != nil {
if err := flakeCollector.Close(); err != nil {
slog.Error("Failed to close flake collector", "error", err)
}
}
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()