package collector import ( "context" "encoding/json" "fmt" "log/slog" "os" "os/exec" "regexp" "strings" "sync" "time" "github.com/prometheus/client_golang/prometheus" ) const nixosVersionPath = "/run/current-system/nixos-version" // revisionPattern extracts the git hash from nixos-version. // Formats: "25.11.20260203.e576e3c" or "1994-294a625" var revisionPattern = regexp.MustCompile(`[.-]([a-f0-9]{7,40})$`) type FlakeCollector struct { flakeURL string checkInterval time.Duration inputAge *prometheus.Desc inputInfo *prometheus.Desc revisionBehind *prometheus.Desc mu sync.RWMutex cachedData *flakeMetadata lastFetch time.Time fetchError error } type flakeMetadata struct { Revision string `json:"revision"` Locks flakeLocks `json:"locks"` } type flakeLocks struct { Nodes map[string]flakeLockNode `json:"nodes"` Root string `json:"root"` } type flakeLockNode struct { Inputs map[string]interface{} `json:"inputs,omitempty"` Locked *lockedInfo `json:"locked,omitempty"` Original *originalInfo `json:"original,omitempty"` } type lockedInfo struct { LastModified int64 `json:"lastModified"` Rev string `json:"rev"` Type string `json:"type"` } type originalInfo struct { Type string `json:"type"` } func NewFlakeCollector(flakeURL string, checkInterval time.Duration) *FlakeCollector { return &FlakeCollector{ flakeURL: flakeURL, checkInterval: checkInterval, inputAge: prometheus.NewDesc( "nixos_flake_input_age_seconds", "Age of flake input in seconds", []string{"input"}, nil, ), inputInfo: prometheus.NewDesc( "nixos_flake_input_info", "Info gauge with revision and type labels", []string{"input", "rev", "type"}, nil, ), revisionBehind: prometheus.NewDesc( "nixos_flake_revision_behind", "1 if current system revision differs from remote latest, 0 if match", nil, nil, ), } } func (c *FlakeCollector) Describe(ch chan<- *prometheus.Desc) { ch <- c.inputAge ch <- c.inputInfo ch <- c.revisionBehind } func (c *FlakeCollector) Collect(ch chan<- prometheus.Metric) { data, err := c.getFlakeData() if err != nil { slog.Error("Failed to get flake data", "error", err) return } c.collectInputMetrics(ch, data) c.collectRevisionBehind(ch, data) } func (c *FlakeCollector) getFlakeData() (*flakeMetadata, error) { c.mu.RLock() if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval { data := c.cachedData c.mu.RUnlock() return data, nil } c.mu.RUnlock() c.mu.Lock() defer c.mu.Unlock() // Double-check after acquiring write lock if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval { return c.cachedData, nil } data, err := fetchFlakeMetadata(c.flakeURL) if err != nil { c.fetchError = err // Return cached data if available, even if stale if c.cachedData != nil { slog.Warn("Using stale flake data due to fetch error", "error", err) return c.cachedData, nil } return nil, err } c.cachedData = data c.lastFetch = time.Now() c.fetchError = nil return data, nil } func (c *FlakeCollector) collectInputMetrics(ch chan<- prometheus.Metric, data *flakeMetadata) { now := time.Now().Unix() for name, node := range data.Locks.Nodes { // Skip the root node if name == "root" { continue } if node.Locked == nil { continue } // Input age age := float64(now - node.Locked.LastModified) ch <- prometheus.MustNewConstMetric(c.inputAge, prometheus.GaugeValue, age, name) // Input info rev := node.Locked.Rev if len(rev) > 7 { rev = rev[:7] } inputType := node.Locked.Type ch <- prometheus.MustNewConstMetric(c.inputInfo, prometheus.GaugeValue, 1, name, rev, inputType) } } func (c *FlakeCollector) collectRevisionBehind(ch chan<- prometheus.Metric, data *flakeMetadata) { currentRev, err := getCurrentSystemRevision() if err != nil { slog.Error("Failed to get current system revision", "error", err) return } behind := 0.0 if currentRev != "" && data.Revision != "" { // Compare short hashes remoteShort := data.Revision if len(remoteShort) > 7 { remoteShort = remoteShort[:7] } if currentRev != remoteShort && !strings.HasPrefix(data.Revision, currentRev) { behind = 1.0 } } ch <- prometheus.MustNewConstMetric(c.revisionBehind, prometheus.GaugeValue, behind) } func fetchFlakeMetadata(flakeURL string) (*flakeMetadata, error) { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() cmd := exec.CommandContext(ctx, "nix", "flake", "metadata", "--json", flakeURL) output, err := cmd.Output() if err != nil { if exitErr, ok := err.(*exec.ExitError); ok { return nil, fmt.Errorf("nix flake metadata failed: %s", strings.TrimSpace(string(exitErr.Stderr))) } return nil, fmt.Errorf("nix flake metadata failed: %w", err) } var data flakeMetadata if err := json.Unmarshal(output, &data); err != nil { return nil, fmt.Errorf("failed to parse flake metadata: %w", err) } return &data, nil } func getCurrentSystemRevision() (string, error) { data, err := os.ReadFile(nixosVersionPath) if err != nil { return "", err } version := strings.TrimSpace(string(data)) matches := revisionPattern.FindStringSubmatch(version) if matches == nil { return "", nil } rev := matches[1] if len(rev) > 7 { rev = rev[:7] } return rev, nil }