feat: implement nixos-exporter
Prometheus exporter for NixOS-specific metrics including: - Generation collector: count, current, booted, age, config mismatch - Flake collector: input age, input info, revision behind Includes NixOS module, flake packaging, and documentation. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
216
collector/flake.go
Normal file
216
collector/flake.go
Normal file
@@ -0,0 +1,216 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const nixosVersionPath = "/run/current-system/nixos-version"
|
||||
|
||||
// revisionPattern extracts the git hash from nixos-version.
|
||||
// Formats: "25.11.20260203.e576e3c" or "1994-294a625"
|
||||
var revisionPattern = regexp.MustCompile(`[.-]([a-f0-9]{7,40})$`)
|
||||
|
||||
type FlakeCollector struct {
|
||||
flakeURL string
|
||||
checkInterval time.Duration
|
||||
|
||||
inputAge *prometheus.Desc
|
||||
inputInfo *prometheus.Desc
|
||||
revisionBehind *prometheus.Desc
|
||||
|
||||
mu sync.RWMutex
|
||||
cachedData *flakeMetadata
|
||||
lastFetch time.Time
|
||||
fetchError error
|
||||
}
|
||||
|
||||
type flakeMetadata struct {
|
||||
Revision string `json:"revision"`
|
||||
Locks flakeLocks `json:"locks"`
|
||||
}
|
||||
|
||||
type flakeLocks struct {
|
||||
Nodes map[string]flakeLockNode `json:"nodes"`
|
||||
Root string `json:"root"`
|
||||
}
|
||||
|
||||
type flakeLockNode struct {
|
||||
Inputs map[string]interface{} `json:"inputs,omitempty"`
|
||||
Locked *lockedInfo `json:"locked,omitempty"`
|
||||
Original *originalInfo `json:"original,omitempty"`
|
||||
}
|
||||
|
||||
type lockedInfo struct {
|
||||
LastModified int64 `json:"lastModified"`
|
||||
Rev string `json:"rev"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type originalInfo struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func NewFlakeCollector(flakeURL string, checkInterval time.Duration) *FlakeCollector {
|
||||
return &FlakeCollector{
|
||||
flakeURL: flakeURL,
|
||||
checkInterval: checkInterval,
|
||||
inputAge: prometheus.NewDesc(
|
||||
"nixos_flake_input_age_seconds",
|
||||
"Age of flake input in seconds",
|
||||
[]string{"input"}, nil,
|
||||
),
|
||||
inputInfo: prometheus.NewDesc(
|
||||
"nixos_flake_input_info",
|
||||
"Info gauge with revision and type labels",
|
||||
[]string{"input", "rev", "type"}, nil,
|
||||
),
|
||||
revisionBehind: prometheus.NewDesc(
|
||||
"nixos_flake_revision_behind",
|
||||
"1 if current system revision differs from remote latest, 0 if match",
|
||||
nil, nil,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FlakeCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- c.inputAge
|
||||
ch <- c.inputInfo
|
||||
ch <- c.revisionBehind
|
||||
}
|
||||
|
||||
func (c *FlakeCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
data, err := c.getFlakeData()
|
||||
if err != nil {
|
||||
slog.Error("Failed to get flake data", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.collectInputMetrics(ch, data)
|
||||
c.collectRevisionBehind(ch, data)
|
||||
}
|
||||
|
||||
func (c *FlakeCollector) getFlakeData() (*flakeMetadata, error) {
|
||||
c.mu.RLock()
|
||||
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
|
||||
data := c.cachedData
|
||||
c.mu.RUnlock()
|
||||
return data, nil
|
||||
}
|
||||
c.mu.RUnlock()
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// Double-check after acquiring write lock
|
||||
if c.cachedData != nil && time.Since(c.lastFetch) < c.checkInterval {
|
||||
return c.cachedData, nil
|
||||
}
|
||||
|
||||
data, err := fetchFlakeMetadata(c.flakeURL)
|
||||
if err != nil {
|
||||
c.fetchError = err
|
||||
// Return cached data if available, even if stale
|
||||
if c.cachedData != nil {
|
||||
slog.Warn("Using stale flake data due to fetch error", "error", err)
|
||||
return c.cachedData, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.cachedData = data
|
||||
c.lastFetch = time.Now()
|
||||
c.fetchError = nil
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (c *FlakeCollector) collectInputMetrics(ch chan<- prometheus.Metric, data *flakeMetadata) {
|
||||
now := time.Now().Unix()
|
||||
|
||||
for name, node := range data.Locks.Nodes {
|
||||
// Skip the root node
|
||||
if name == "root" {
|
||||
continue
|
||||
}
|
||||
|
||||
if node.Locked == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Input age
|
||||
age := float64(now - node.Locked.LastModified)
|
||||
ch <- prometheus.MustNewConstMetric(c.inputAge, prometheus.GaugeValue, age, name)
|
||||
|
||||
// Input info
|
||||
rev := node.Locked.Rev
|
||||
if len(rev) > 7 {
|
||||
rev = rev[:7]
|
||||
}
|
||||
inputType := node.Locked.Type
|
||||
ch <- prometheus.MustNewConstMetric(c.inputInfo, prometheus.GaugeValue, 1, name, rev, inputType)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FlakeCollector) collectRevisionBehind(ch chan<- prometheus.Metric, data *flakeMetadata) {
|
||||
currentRev, err := getCurrentSystemRevision()
|
||||
if err != nil {
|
||||
slog.Error("Failed to get current system revision", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
behind := 0.0
|
||||
if currentRev != "" && data.Revision != "" {
|
||||
// Compare short hashes
|
||||
remoteShort := data.Revision
|
||||
if len(remoteShort) > 7 {
|
||||
remoteShort = remoteShort[:7]
|
||||
}
|
||||
if currentRev != remoteShort && !strings.HasPrefix(data.Revision, currentRev) {
|
||||
behind = 1.0
|
||||
}
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(c.revisionBehind, prometheus.GaugeValue, behind)
|
||||
}
|
||||
|
||||
func fetchFlakeMetadata(flakeURL string) (*flakeMetadata, error) {
|
||||
cmd := exec.Command("nix", "flake", "metadata", "--json", flakeURL)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var data flakeMetadata
|
||||
if err := json.Unmarshal(output, &data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
func getCurrentSystemRevision() (string, error) {
|
||||
data, err := os.ReadFile(nixosVersionPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
version := strings.TrimSpace(string(data))
|
||||
matches := revisionPattern.FindStringSubmatch(version)
|
||||
if matches == nil {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
rev := matches[1]
|
||||
if len(rev) > 7 {
|
||||
rev = rev[:7]
|
||||
}
|
||||
return rev, nil
|
||||
}
|
||||
Reference in New Issue
Block a user