Files
nixos-exporter/collector/flake.go
Torjus Håkestad 86eaeb4b2a fix: use configuration-revision for current_rev in flake info metric
The nixos_flake_info metric's current_rev label was incorrectly showing
the nixpkgs input revision (from /run/current-system/nixos-version)
instead of the flake's own revision.

Now reads from /run/current-system/configuration-revision which contains
the flake's self.rev when system.configurationRevision is set in the
NixOS configuration.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 00:27:38 +01:00

228 lines
5.5 KiB
Go

package collector
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"os/exec"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const configRevisionPath = "/run/current-system/configuration-revision"
type FlakeCollector struct {
flakeURL string
checkInterval time.Duration
inputAge *prometheus.Desc
inputInfo *prometheus.Desc
flakeInfo *prometheus.Desc
revisionBehind *prometheus.Desc
mu sync.RWMutex
cachedData *flakeMetadata
lastFetch time.Time
fetchError error
}
type flakeMetadata struct {
Revision string `json:"revision"`
Locks flakeLocks `json:"locks"`
}
type flakeLocks struct {
Nodes map[string]flakeLockNode `json:"nodes"`
Root string `json:"root"`
}
type flakeLockNode struct {
Inputs map[string]interface{} `json:"inputs,omitempty"`
Locked *lockedInfo `json:"locked,omitempty"`
Original *originalInfo `json:"original,omitempty"`
}
type lockedInfo struct {
LastModified int64 `json:"lastModified"`
Rev string `json:"rev"`
Type string `json:"type"`
}
type originalInfo struct {
Type string `json:"type"`
}
func NewFlakeCollector(flakeURL string, checkInterval time.Duration) *FlakeCollector {
return &FlakeCollector{
flakeURL: flakeURL,
checkInterval: checkInterval,
inputAge: prometheus.NewDesc(
"nixos_flake_input_age_seconds",
"Age of flake input in seconds",
[]string{"input"}, nil,
),
inputInfo: prometheus.NewDesc(
"nixos_flake_input_info",
"Info gauge with revision and type labels",
[]string{"input", "rev", "type"}, nil,
),
flakeInfo: prometheus.NewDesc(
"nixos_flake_info",
"Info gauge with current and remote flake revisions",
[]string{"current_rev", "remote_rev"}, nil,
),
revisionBehind: prometheus.NewDesc(
"nixos_flake_revision_behind",
"1 if current system revision differs from remote latest, 0 if match",
nil, nil,
),
}
}
func (c *FlakeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.inputAge
ch <- c.inputInfo
ch <- c.flakeInfo
ch <- c.revisionBehind
}
func (c *FlakeCollector) Collect(ch chan<- prometheus.Metric) {
data, err := c.getFlakeData()
if err != nil {
slog.Error("Failed to get flake data", "error", err)
return
}
c.collectInputMetrics(ch, data)
c.collectRevisionBehind(ch, data)
}
func (c *FlakeCollector) getFlakeData() (*flakeMetadata, error) {
c.mu.RLock()
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
data := c.cachedData
c.mu.RUnlock()
return data, nil
}
c.mu.RUnlock()
c.mu.Lock()
defer c.mu.Unlock()
// Double-check after acquiring write lock
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
return c.cachedData, nil
}
data, err := fetchFlakeMetadata(c.flakeURL)
if err != nil {
c.fetchError = err
// Return cached data if available, even if stale
if c.cachedData != nil {
slog.Warn("Using stale flake data due to fetch error", "error", err)
return c.cachedData, nil
}
return nil, err
}
c.cachedData = data
c.lastFetch = time.Now()
c.fetchError = nil
return data, nil
}
func (c *FlakeCollector) collectInputMetrics(ch chan<- prometheus.Metric, data *flakeMetadata) {
now := time.Now().Unix()
for name, node := range data.Locks.Nodes {
// Skip the root node
if name == "root" {
continue
}
if node.Locked == nil {
continue
}
// Input age
age := float64(now - node.Locked.LastModified)
ch <- prometheus.MustNewConstMetric(c.inputAge, prometheus.GaugeValue, age, name)
// Input info
rev := node.Locked.Rev
if len(rev) > 7 {
rev = rev[:7]
}
inputType := node.Locked.Type
ch <- prometheus.MustNewConstMetric(c.inputInfo, prometheus.GaugeValue, 1, name, rev, inputType)
}
}
func (c *FlakeCollector) collectRevisionBehind(ch chan<- prometheus.Metric, data *flakeMetadata) {
currentRev, err := getCurrentSystemRevision()
if err != nil {
slog.Error("Failed to get current system revision", "error", err)
return
}
remoteRev := data.Revision
if len(remoteRev) > 7 {
remoteRev = remoteRev[:7]
}
// Emit flake info metric with revisions
ch <- prometheus.MustNewConstMetric(c.flakeInfo, prometheus.GaugeValue, 1, currentRev, remoteRev)
behind := 0.0
if currentRev != "" && data.Revision != "" {
if currentRev != remoteRev && !strings.HasPrefix(data.Revision, currentRev) {
behind = 1.0
}
}
ch <- prometheus.MustNewConstMetric(c.revisionBehind, prometheus.GaugeValue, behind)
}
func fetchFlakeMetadata(flakeURL string) (*flakeMetadata, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, "nix", "flake", "metadata", "--json", flakeURL)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("nix flake metadata failed: %s", strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("nix flake metadata failed: %w", err)
}
var data flakeMetadata
if err := json.Unmarshal(output, &data); err != nil {
return nil, fmt.Errorf("failed to parse flake metadata: %w", err)
}
return &data, nil
}
func getCurrentSystemRevision() (string, error) {
data, err := os.ReadFile(configRevisionPath)
if err != nil {
if os.IsNotExist(err) {
// configuration-revision doesn't exist; user hasn't set system.configurationRevision
return "", nil
}
return "", err
}
rev := strings.TrimSpace(string(data))
if len(rev) > 7 {
rev = rev[:7]
}
return rev, nil
}