feat: implement nixos-exporter

Prometheus exporter for NixOS-specific metrics including:
- Generation collector: count, current, booted, age, config mismatch
- Flake collector: input age, input info, revision behind

Includes NixOS module, flake packaging, and documentation.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 22:50:14 +01:00
commit f637da487c
14 changed files with 1345 additions and 0 deletions

216
collector/flake.go Normal file
View File

@@ -0,0 +1,216 @@
package collector
import (
"encoding/json"
"log/slog"
"os"
"os/exec"
"regexp"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const nixosVersionPath = "/run/current-system/nixos-version"
// revisionPattern extracts the git hash from nixos-version.
// Formats: "25.11.20260203.e576e3c" or "1994-294a625"
var revisionPattern = regexp.MustCompile(`[.-]([a-f0-9]{7,40})$`)
type FlakeCollector struct {
flakeURL string
checkInterval time.Duration
inputAge *prometheus.Desc
inputInfo *prometheus.Desc
revisionBehind *prometheus.Desc
mu sync.RWMutex
cachedData *flakeMetadata
lastFetch time.Time
fetchError error
}
type flakeMetadata struct {
Revision string `json:"revision"`
Locks flakeLocks `json:"locks"`
}
type flakeLocks struct {
Nodes map[string]flakeLockNode `json:"nodes"`
Root string `json:"root"`
}
type flakeLockNode struct {
Inputs map[string]interface{} `json:"inputs,omitempty"`
Locked *lockedInfo `json:"locked,omitempty"`
Original *originalInfo `json:"original,omitempty"`
}
type lockedInfo struct {
LastModified int64 `json:"lastModified"`
Rev string `json:"rev"`
Type string `json:"type"`
}
type originalInfo struct {
Type string `json:"type"`
}
func NewFlakeCollector(flakeURL string, checkInterval time.Duration) *FlakeCollector {
return &FlakeCollector{
flakeURL: flakeURL,
checkInterval: checkInterval,
inputAge: prometheus.NewDesc(
"nixos_flake_input_age_seconds",
"Age of flake input in seconds",
[]string{"input"}, nil,
),
inputInfo: prometheus.NewDesc(
"nixos_flake_input_info",
"Info gauge with revision and type labels",
[]string{"input", "rev", "type"}, nil,
),
revisionBehind: prometheus.NewDesc(
"nixos_flake_revision_behind",
"1 if current system revision differs from remote latest, 0 if match",
nil, nil,
),
}
}
func (c *FlakeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.inputAge
ch <- c.inputInfo
ch <- c.revisionBehind
}
func (c *FlakeCollector) Collect(ch chan<- prometheus.Metric) {
data, err := c.getFlakeData()
if err != nil {
slog.Error("Failed to get flake data", "error", err)
return
}
c.collectInputMetrics(ch, data)
c.collectRevisionBehind(ch, data)
}
func (c *FlakeCollector) getFlakeData() (*flakeMetadata, error) {
c.mu.RLock()
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
data := c.cachedData
c.mu.RUnlock()
return data, nil
}
c.mu.RUnlock()
c.mu.Lock()
defer c.mu.Unlock()
// Double-check after acquiring write lock
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
return c.cachedData, nil
}
data, err := fetchFlakeMetadata(c.flakeURL)
if err != nil {
c.fetchError = err
// Return cached data if available, even if stale
if c.cachedData != nil {
slog.Warn("Using stale flake data due to fetch error", "error", err)
return c.cachedData, nil
}
return nil, err
}
c.cachedData = data
c.lastFetch = time.Now()
c.fetchError = nil
return data, nil
}
func (c *FlakeCollector) collectInputMetrics(ch chan<- prometheus.Metric, data *flakeMetadata) {
now := time.Now().Unix()
for name, node := range data.Locks.Nodes {
// Skip the root node
if name == "root" {
continue
}
if node.Locked == nil {
continue
}
// Input age
age := float64(now - node.Locked.LastModified)
ch <- prometheus.MustNewConstMetric(c.inputAge, prometheus.GaugeValue, age, name)
// Input info
rev := node.Locked.Rev
if len(rev) > 7 {
rev = rev[:7]
}
inputType := node.Locked.Type
ch <- prometheus.MustNewConstMetric(c.inputInfo, prometheus.GaugeValue, 1, name, rev, inputType)
}
}
func (c *FlakeCollector) collectRevisionBehind(ch chan<- prometheus.Metric, data *flakeMetadata) {
currentRev, err := getCurrentSystemRevision()
if err != nil {
slog.Error("Failed to get current system revision", "error", err)
return
}
behind := 0.0
if currentRev != "" && data.Revision != "" {
// Compare short hashes
remoteShort := data.Revision
if len(remoteShort) > 7 {
remoteShort = remoteShort[:7]
}
if currentRev != remoteShort && !strings.HasPrefix(data.Revision, currentRev) {
behind = 1.0
}
}
ch <- prometheus.MustNewConstMetric(c.revisionBehind, prometheus.GaugeValue, behind)
}
func fetchFlakeMetadata(flakeURL string) (*flakeMetadata, error) {
cmd := exec.Command("nix", "flake", "metadata", "--json", flakeURL)
output, err := cmd.Output()
if err != nil {
return nil, err
}
var data flakeMetadata
if err := json.Unmarshal(output, &data); err != nil {
return nil, err
}
return &data, nil
}
func getCurrentSystemRevision() (string, error) {
data, err := os.ReadFile(nixosVersionPath)
if err != nil {
return "", err
}
version := strings.TrimSpace(string(data))
matches := revisionPattern.FindStringSubmatch(version)
if matches == nil {
return "", nil
}
rev := matches[1]
if len(rev) > 7 {
rev = rev[:7]
}
return rev, nil
}

151
collector/flake_test.go Normal file
View File

@@ -0,0 +1,151 @@
package collector
import (
"encoding/json"
"os"
"path/filepath"
"testing"
)
func TestRevisionPattern(t *testing.T) {
tests := []struct {
version string
wantRev string
}{
{"25.11.20260203.e576e3c", "e576e3c"},
{"1994-294a625", "294a625"},
{"25.05.20250101.abcdef1234567890", "abcdef1234567890"},
{"no-revision-here", ""},
{"", ""},
}
for _, tt := range tests {
t.Run(tt.version, func(t *testing.T) {
matches := revisionPattern.FindStringSubmatch(tt.version)
var got string
if matches != nil {
got = matches[1]
}
if got != tt.wantRev {
t.Errorf("revisionPattern.FindStringSubmatch(%q) = %q, want %q", tt.version, got, tt.wantRev)
}
})
}
}
func TestGetCurrentSystemRevision(t *testing.T) {
// Skip if not on NixOS
if _, err := os.Stat(nixosVersionPath); os.IsNotExist(err) {
t.Skip("not running on NixOS")
}
rev, err := getCurrentSystemRevision()
if err != nil {
t.Fatal(err)
}
// Just check it returns something reasonable
t.Logf("current system revision: %s", rev)
}
func TestGetCurrentSystemRevisionFromFile(t *testing.T) {
// Create a temp file to simulate /run/current-system/nixos-version
dir := t.TempDir()
versionPath := filepath.Join(dir, "nixos-version")
tests := []struct {
content string
wantRev string
}{
{"25.11.20260203.e576e3c\n", "e576e3c"},
{"1994-294a625\n", "294a625"},
{"25.05.20250101.abcdef1234567890\n", "abcdef1"},
{"no-hash", ""},
}
for _, tt := range tests {
t.Run(tt.content, func(t *testing.T) {
if err := os.WriteFile(versionPath, []byte(tt.content), 0644); err != nil {
t.Fatal(err)
}
// We can't easily test the actual function without modifying the constant,
// so we test the pattern extraction logic directly
version := tt.content
if len(version) > 0 && version[len(version)-1] == '\n' {
version = version[:len(version)-1]
}
matches := revisionPattern.FindStringSubmatch(version)
var rev string
if matches != nil {
rev = matches[1]
if len(rev) > 7 {
rev = rev[:7]
}
}
if rev != tt.wantRev {
t.Errorf("got revision %q, want %q", rev, tt.wantRev)
}
})
}
}
func TestFlakeLocksUnmarshal(t *testing.T) {
jsonData := `{
"revision": "abc1234567890",
"locks": {
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1700000000,
"rev": "def4567890123",
"type": "github"
}
},
"home-manager": {
"locked": {
"lastModified": 1699000000,
"rev": "ghi7890123456",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs",
"home-manager": "home-manager"
}
}
},
"root": "root"
}
}`
var data flakeMetadata
if err := json.Unmarshal([]byte(jsonData), &data); err != nil {
t.Fatal(err)
}
if data.Revision != "abc1234567890" {
t.Errorf("expected revision abc1234567890, got %s", data.Revision)
}
if len(data.Locks.Nodes) != 3 {
t.Errorf("expected 3 nodes, got %d", len(data.Locks.Nodes))
}
nixpkgs := data.Locks.Nodes["nixpkgs"]
if nixpkgs.Locked == nil {
t.Fatal("expected nixpkgs to have locked info")
}
if nixpkgs.Locked.LastModified != 1700000000 {
t.Errorf("expected lastModified 1700000000, got %d", nixpkgs.Locked.LastModified)
}
if nixpkgs.Locked.Rev != "def4567890123" {
t.Errorf("expected rev def4567890123, got %s", nixpkgs.Locked.Rev)
}
if nixpkgs.Locked.Type != "github" {
t.Errorf("expected type github, got %s", nixpkgs.Locked.Type)
}
}

218
collector/generation.go Normal file
View File

@@ -0,0 +1,218 @@
package collector
import (
"log/slog"
"os"
"path/filepath"
"regexp"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
profileDir = "/nix/var/nix/profiles"
currentSystemDir = "/run/current-system"
bootedSystemDir = "/run/booted-system"
)
var generationPattern = regexp.MustCompile(`^system-(\d+)-link$`)
type GenerationCollector struct {
generationCount *prometheus.Desc
currentGen *prometheus.Desc
bootedGen *prometheus.Desc
generationAge *prometheus.Desc
configMismatch *prometheus.Desc
}
func NewGenerationCollector() *GenerationCollector {
return &GenerationCollector{
generationCount: prometheus.NewDesc(
"nixos_generation_count",
"Total number of system generations",
nil, nil,
),
currentGen: prometheus.NewDesc(
"nixos_current_generation",
"Currently active generation number",
nil, nil,
),
bootedGen: prometheus.NewDesc(
"nixos_booted_generation",
"Generation that was booted",
nil, nil,
),
generationAge: prometheus.NewDesc(
"nixos_generation_age_seconds",
"Age of current generation in seconds",
nil, nil,
),
configMismatch: prometheus.NewDesc(
"nixos_config_mismatch",
"1 if booted generation differs from current",
nil, nil,
),
}
}
func (c *GenerationCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.generationCount
ch <- c.currentGen
ch <- c.bootedGen
ch <- c.generationAge
ch <- c.configMismatch
}
func (c *GenerationCollector) Collect(ch chan<- prometheus.Metric) {
c.collectGenerationCount(ch)
c.collectCurrentGeneration(ch)
c.collectBootedGeneration(ch)
c.collectGenerationAge(ch)
c.collectConfigMismatch(ch)
}
func (c *GenerationCollector) collectGenerationCount(ch chan<- prometheus.Metric) {
count, err := countGenerations(profileDir)
if err != nil {
slog.Error("Failed to count generations", "error", err)
return
}
ch <- prometheus.MustNewConstMetric(c.generationCount, prometheus.GaugeValue, float64(count))
}
func (c *GenerationCollector) collectCurrentGeneration(ch chan<- prometheus.Metric) {
gen, err := getCurrentGeneration(profileDir)
if err != nil {
slog.Error("Failed to get current generation", "error", err)
return
}
ch <- prometheus.MustNewConstMetric(c.currentGen, prometheus.GaugeValue, float64(gen))
}
func (c *GenerationCollector) collectBootedGeneration(ch chan<- prometheus.Metric) {
gen, err := getBootedGeneration(profileDir, bootedSystemDir)
if err != nil {
slog.Error("Failed to get booted generation", "error", err)
return
}
ch <- prometheus.MustNewConstMetric(c.bootedGen, prometheus.GaugeValue, float64(gen))
}
func (c *GenerationCollector) collectGenerationAge(ch chan<- prometheus.Metric) {
age, err := getGenerationAge(profileDir)
if err != nil {
slog.Error("Failed to get generation age", "error", err)
return
}
ch <- prometheus.MustNewConstMetric(c.generationAge, prometheus.GaugeValue, age)
}
func (c *GenerationCollector) collectConfigMismatch(ch chan<- prometheus.Metric) {
mismatch, err := checkConfigMismatch(currentSystemDir, bootedSystemDir)
if err != nil {
slog.Error("Failed to check config mismatch", "error", err)
return
}
value := 0.0
if mismatch {
value = 1.0
}
ch <- prometheus.MustNewConstMetric(c.configMismatch, prometheus.GaugeValue, value)
}
// countGenerations counts system-*-link entries in the profile directory.
func countGenerations(profileDir string) (int, error) {
entries, err := os.ReadDir(profileDir)
if err != nil {
return 0, err
}
count := 0
for _, entry := range entries {
if generationPattern.MatchString(entry.Name()) {
count++
}
}
return count, nil
}
// getCurrentGeneration parses the generation number from the system symlink.
func getCurrentGeneration(profileDir string) (int, error) {
systemLink := filepath.Join(profileDir, "system")
target, err := os.Readlink(systemLink)
if err != nil {
return 0, err
}
// Target is relative like "system-123-link"
base := filepath.Base(target)
matches := generationPattern.FindStringSubmatch(base)
if matches == nil {
return 0, nil
}
return strconv.Atoi(matches[1])
}
// getBootedGeneration finds the generation that matches /run/booted-system.
func getBootedGeneration(profileDir, bootedSystemDir string) (int, error) {
bootedTarget, err := os.Readlink(bootedSystemDir)
if err != nil {
return 0, err
}
entries, err := os.ReadDir(profileDir)
if err != nil {
return 0, err
}
for _, entry := range entries {
if !generationPattern.MatchString(entry.Name()) {
continue
}
linkPath := filepath.Join(profileDir, entry.Name())
target, err := os.Readlink(linkPath)
if err != nil {
continue
}
if target == bootedTarget {
matches := generationPattern.FindStringSubmatch(entry.Name())
if matches != nil {
return strconv.Atoi(matches[1])
}
}
}
return 0, nil
}
// getGenerationAge returns the age of the current system profile in seconds.
func getGenerationAge(profileDir string) (float64, error) {
systemLink := filepath.Join(profileDir, "system")
info, err := os.Lstat(systemLink)
if err != nil {
return 0, err
}
age := time.Since(info.ModTime()).Seconds()
return age, nil
}
// checkConfigMismatch compares /run/current-system and /run/booted-system targets.
func checkConfigMismatch(currentSystemDir, bootedSystemDir string) (bool, error) {
currentTarget, err := os.Readlink(currentSystemDir)
if err != nil {
return false, err
}
bootedTarget, err := os.Readlink(bootedSystemDir)
if err != nil {
return false, err
}
return currentTarget != bootedTarget, nil
}

View File

@@ -0,0 +1,178 @@
package collector
import (
"os"
"path/filepath"
"testing"
)
func TestCountGenerations(t *testing.T) {
dir := t.TempDir()
// Create some generation symlinks
for _, name := range []string{
"system-1-link",
"system-2-link",
"system-10-link",
"system", // current system link, should not be counted
"other-file", // unrelated file
"system-x-link", // malformed, should not be counted
} {
path := filepath.Join(dir, name)
if err := os.Symlink("/nix/store/dummy", path); err != nil {
t.Fatal(err)
}
}
count, err := countGenerations(dir)
if err != nil {
t.Fatal(err)
}
if count != 3 {
t.Errorf("expected 3 generations, got %d", count)
}
}
func TestGetCurrentGeneration(t *testing.T) {
dir := t.TempDir()
// Create system symlink pointing to a generation
if err := os.Symlink("system-42-link", filepath.Join(dir, "system")); err != nil {
t.Fatal(err)
}
gen, err := getCurrentGeneration(dir)
if err != nil {
t.Fatal(err)
}
if gen != 42 {
t.Errorf("expected generation 42, got %d", gen)
}
}
func TestGetBootedGeneration(t *testing.T) {
profileDir := t.TempDir()
bootedDir := t.TempDir()
storePath := "/nix/store/abc123-nixos-system"
// Create generation symlinks
if err := os.Symlink("/nix/store/other", filepath.Join(profileDir, "system-1-link")); err != nil {
t.Fatal(err)
}
if err := os.Symlink(storePath, filepath.Join(profileDir, "system-2-link")); err != nil {
t.Fatal(err)
}
if err := os.Symlink("/nix/store/another", filepath.Join(profileDir, "system-3-link")); err != nil {
t.Fatal(err)
}
// Create booted-system symlink
bootedSystemPath := filepath.Join(bootedDir, "booted-system")
if err := os.Symlink(storePath, bootedSystemPath); err != nil {
t.Fatal(err)
}
gen, err := getBootedGeneration(profileDir, bootedSystemPath)
if err != nil {
t.Fatal(err)
}
if gen != 2 {
t.Errorf("expected booted generation 2, got %d", gen)
}
}
func TestCheckConfigMismatch(t *testing.T) {
dir := t.TempDir()
currentPath := filepath.Join(dir, "current-system")
bootedPath := filepath.Join(dir, "booted-system")
// Same target = no mismatch
if err := os.Symlink("/nix/store/same", currentPath); err != nil {
t.Fatal(err)
}
if err := os.Symlink("/nix/store/same", bootedPath); err != nil {
t.Fatal(err)
}
mismatch, err := checkConfigMismatch(currentPath, bootedPath)
if err != nil {
t.Fatal(err)
}
if mismatch {
t.Error("expected no mismatch when targets are the same")
}
// Different targets = mismatch
if err := os.Remove(currentPath); err != nil {
t.Fatal(err)
}
if err := os.Symlink("/nix/store/different", currentPath); err != nil {
t.Fatal(err)
}
mismatch, err = checkConfigMismatch(currentPath, bootedPath)
if err != nil {
t.Fatal(err)
}
if !mismatch {
t.Error("expected mismatch when targets differ")
}
}
func TestGetGenerationAge(t *testing.T) {
dir := t.TempDir()
// Create system symlink
if err := os.Symlink("system-1-link", filepath.Join(dir, "system")); err != nil {
t.Fatal(err)
}
age, err := getGenerationAge(dir)
if err != nil {
t.Fatal(err)
}
// Age should be very small since we just created it
if age < 0 || age > 1 {
t.Errorf("expected age close to 0, got %f", age)
}
}
func TestGenerationPattern(t *testing.T) {
tests := []struct {
name string
match bool
genNum string
}{
{"system-1-link", true, "1"},
{"system-42-link", true, "42"},
{"system-123-link", true, "123"},
{"system", false, ""},
{"system-link", false, ""},
{"system--link", false, ""},
{"system-abc-link", false, ""},
{"other-1-link", false, ""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
matches := generationPattern.FindStringSubmatch(tt.name)
if tt.match {
if matches == nil {
t.Errorf("expected %q to match", tt.name)
} else if matches[1] != tt.genNum {
t.Errorf("expected generation %q, got %q", tt.genNum, matches[1])
}
} else {
if matches != nil {
t.Errorf("expected %q not to match", tt.name)
}
}
})
}
}