Files
nixos-exporter/collector/flake.go
Torjus Håkestad 9c29505814 feat: add nixos_flake_info metric with current and remote revisions
Add a new info metric that exposes the current system's flake revision
and the latest remote revision as labels. This makes it easier to see
exactly which revision is deployed vs available.

Also adds version constant to Go code and extracts it in flake.nix,
providing a single source of truth for the version.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 00:16:19 +01:00

235 lines
5.7 KiB
Go

package collector
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"os/exec"
"regexp"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const nixosVersionPath = "/run/current-system/nixos-version"
// revisionPattern extracts the git hash from nixos-version.
// Formats: "25.11.20260203.e576e3c" or "1994-294a625"
var revisionPattern = regexp.MustCompile(`[.-]([a-f0-9]{7,40})$`)
type FlakeCollector struct {
flakeURL string
checkInterval time.Duration
inputAge *prometheus.Desc
inputInfo *prometheus.Desc
flakeInfo *prometheus.Desc
revisionBehind *prometheus.Desc
mu sync.RWMutex
cachedData *flakeMetadata
lastFetch time.Time
fetchError error
}
type flakeMetadata struct {
Revision string `json:"revision"`
Locks flakeLocks `json:"locks"`
}
type flakeLocks struct {
Nodes map[string]flakeLockNode `json:"nodes"`
Root string `json:"root"`
}
type flakeLockNode struct {
Inputs map[string]interface{} `json:"inputs,omitempty"`
Locked *lockedInfo `json:"locked,omitempty"`
Original *originalInfo `json:"original,omitempty"`
}
type lockedInfo struct {
LastModified int64 `json:"lastModified"`
Rev string `json:"rev"`
Type string `json:"type"`
}
type originalInfo struct {
Type string `json:"type"`
}
func NewFlakeCollector(flakeURL string, checkInterval time.Duration) *FlakeCollector {
return &FlakeCollector{
flakeURL: flakeURL,
checkInterval: checkInterval,
inputAge: prometheus.NewDesc(
"nixos_flake_input_age_seconds",
"Age of flake input in seconds",
[]string{"input"}, nil,
),
inputInfo: prometheus.NewDesc(
"nixos_flake_input_info",
"Info gauge with revision and type labels",
[]string{"input", "rev", "type"}, nil,
),
flakeInfo: prometheus.NewDesc(
"nixos_flake_info",
"Info gauge with current and remote flake revisions",
[]string{"current_rev", "remote_rev"}, nil,
),
revisionBehind: prometheus.NewDesc(
"nixos_flake_revision_behind",
"1 if current system revision differs from remote latest, 0 if match",
nil, nil,
),
}
}
func (c *FlakeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.inputAge
ch <- c.inputInfo
ch <- c.flakeInfo
ch <- c.revisionBehind
}
func (c *FlakeCollector) Collect(ch chan<- prometheus.Metric) {
data, err := c.getFlakeData()
if err != nil {
slog.Error("Failed to get flake data", "error", err)
return
}
c.collectInputMetrics(ch, data)
c.collectRevisionBehind(ch, data)
}
func (c *FlakeCollector) getFlakeData() (*flakeMetadata, error) {
c.mu.RLock()
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
data := c.cachedData
c.mu.RUnlock()
return data, nil
}
c.mu.RUnlock()
c.mu.Lock()
defer c.mu.Unlock()
// Double-check after acquiring write lock
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
return c.cachedData, nil
}
data, err := fetchFlakeMetadata(c.flakeURL)
if err != nil {
c.fetchError = err
// Return cached data if available, even if stale
if c.cachedData != nil {
slog.Warn("Using stale flake data due to fetch error", "error", err)
return c.cachedData, nil
}
return nil, err
}
c.cachedData = data
c.lastFetch = time.Now()
c.fetchError = nil
return data, nil
}
func (c *FlakeCollector) collectInputMetrics(ch chan<- prometheus.Metric, data *flakeMetadata) {
now := time.Now().Unix()
for name, node := range data.Locks.Nodes {
// Skip the root node
if name == "root" {
continue
}
if node.Locked == nil {
continue
}
// Input age
age := float64(now - node.Locked.LastModified)
ch <- prometheus.MustNewConstMetric(c.inputAge, prometheus.GaugeValue, age, name)
// Input info
rev := node.Locked.Rev
if len(rev) > 7 {
rev = rev[:7]
}
inputType := node.Locked.Type
ch <- prometheus.MustNewConstMetric(c.inputInfo, prometheus.GaugeValue, 1, name, rev, inputType)
}
}
func (c *FlakeCollector) collectRevisionBehind(ch chan<- prometheus.Metric, data *flakeMetadata) {
currentRev, err := getCurrentSystemRevision()
if err != nil {
slog.Error("Failed to get current system revision", "error", err)
return
}
remoteRev := data.Revision
if len(remoteRev) > 7 {
remoteRev = remoteRev[:7]
}
// Emit flake info metric with revisions
ch <- prometheus.MustNewConstMetric(c.flakeInfo, prometheus.GaugeValue, 1, currentRev, remoteRev)
behind := 0.0
if currentRev != "" && data.Revision != "" {
if currentRev != remoteRev && !strings.HasPrefix(data.Revision, currentRev) {
behind = 1.0
}
}
ch <- prometheus.MustNewConstMetric(c.revisionBehind, prometheus.GaugeValue, behind)
}
func fetchFlakeMetadata(flakeURL string) (*flakeMetadata, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, "nix", "flake", "metadata", "--json", flakeURL)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("nix flake metadata failed: %s", strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("nix flake metadata failed: %w", err)
}
var data flakeMetadata
if err := json.Unmarshal(output, &data); err != nil {
return nil, fmt.Errorf("failed to parse flake metadata: %w", err)
}
return &data, nil
}
func getCurrentSystemRevision() (string, error) {
data, err := os.ReadFile(nixosVersionPath)
if err != nil {
return "", err
}
version := strings.TrimSpace(string(data))
matches := revisionPattern.FindStringSubmatch(version)
if matches == nil {
return "", nil
}
rev := matches[1]
if len(rev) > 7 {
rev = rev[:7]
}
return rev, nil
}