Files
nixos-servers/lib/monitoring.nix
Torjus Håkestad 813c5c0f29
Some checks failed
Run nix flake check / flake-check (push) Failing after 3m7s
monitoring: separate node-exporter-only external targets
Add nodeExporterOnly list to external-targets.nix for hosts that
have node-exporter but not systemd-exporter (e.g. pve1). This
prevents a down target in the systemd-exporter scrape job.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 19:17:39 +01:00

229 lines
7.9 KiB
Nix

{ lib }:
let
# Extract IP address from CIDR notation (e.g., "10.69.13.5/24" -> "10.69.13.5")
extractIP = address:
let
parts = lib.splitString "/" address;
in
builtins.head parts;
# Check if a network interface name looks like a VPN/tunnel interface
isVpnInterface = ifaceName:
lib.hasPrefix "wg" ifaceName ||
lib.hasPrefix "tun" ifaceName ||
lib.hasPrefix "tap" ifaceName ||
lib.hasPrefix "vti" ifaceName;
# Extract monitoring info from a single host configuration
# Returns null if host should not be included
extractHostMonitoring = name: hostConfig:
let
cfg = hostConfig.config;
monConfig = (cfg.homelab or { }).monitoring or { enable = true; scrapeTargets = [ ]; };
dnsConfig = (cfg.homelab or { }).dns or { enable = true; };
hostConfig' = (cfg.homelab or { }).host or { };
hostname = cfg.networking.hostName;
networks = cfg.systemd.network.networks or { };
# Filter out VPN interfaces and find networks with static addresses
physicalNetworks = lib.filterAttrs
(netName: netCfg:
let
ifaceName = netCfg.matchConfig.Name or "";
in
!(isVpnInterface ifaceName) && (netCfg.address or [ ]) != [ ])
networks;
# Get addresses from physical networks only
networkAddresses = lib.flatten (
lib.mapAttrsToList
(netName: netCfg: netCfg.address or [ ])
physicalNetworks
);
firstAddress = if networkAddresses != [ ] then builtins.head networkAddresses else null;
in
if !(monConfig.enable or true) || !(dnsConfig.enable or true) || firstAddress == null then
null
else
{
inherit hostname;
ip = extractIP firstAddress;
scrapeTargets = monConfig.scrapeTargets or [ ];
# Host metadata for label propagation
tier = hostConfig'.tier or "prod";
priority = hostConfig'.priority or "high";
role = hostConfig'.role or null;
labels = hostConfig'.labels or { };
};
# Build effective labels for a host
# Always includes hostname and tier; only includes priority/role if non-default
buildEffectiveLabels = host:
{ hostname = host.hostname; tier = host.tier; }
// (lib.optionalAttrs (host.priority != "high") { priority = host.priority; })
// (lib.optionalAttrs (host.role != null) { role = host.role; })
// host.labels;
# Generate node-exporter targets from all flake hosts
# Returns a list of static_configs entries with labels
generateNodeExporterTargets = self: externalTargets:
let
nixosConfigs = self.nixosConfigurations or { };
hostList = lib.filter (x: x != null) (
lib.mapAttrsToList extractHostMonitoring nixosConfigs
);
# Extract hostname from a target string like "gunter.home.2rjus.net:9100"
extractHostnameFromTarget = target:
builtins.head (lib.splitString "." target);
# Build target entries with labels for each host
flakeEntries = map
(host: {
target = "${host.hostname}.home.2rjus.net:9100";
labels = buildEffectiveLabels host;
})
hostList;
# External targets get hostname extracted from the target string
externalEntries = map
(target: {
inherit target;
labels = { hostname = extractHostnameFromTarget target; };
})
(externalTargets.nodeExporter or [ ]);
# Node-exporter-only external targets (no systemd-exporter)
externalOnlyEntries = map
(target: {
inherit target;
labels = { hostname = extractHostnameFromTarget target; };
})
(externalTargets.nodeExporterOnly or [ ]);
allEntries = flakeEntries ++ externalEntries ++ externalOnlyEntries;
# Group entries by their label set for efficient static_configs
# Convert labels attrset to a string key for grouping
labelKey = entry: builtins.toJSON entry.labels;
grouped = lib.groupBy labelKey allEntries;
# Convert groups to static_configs format
# Every flake host now has at least a hostname label
staticConfigs = lib.mapAttrsToList
(key: entries:
let
labels = (builtins.head entries).labels;
in
{ targets = map (e: e.target) entries; labels = labels; }
)
grouped;
in
staticConfigs;
# Generate scrape configs from all flake hosts and external targets
# Host labels are propagated to service targets for semantic alert filtering
generateScrapeConfigs = self: externalTargets:
let
nixosConfigs = self.nixosConfigurations or { };
hostList = lib.filter (x: x != null) (
lib.mapAttrsToList extractHostMonitoring nixosConfigs
);
# Collect all scrapeTargets from all hosts, including host labels
allTargets = lib.flatten (map
(host:
map
(target: {
inherit (target) job_name port metrics_path scheme scrape_interval honor_labels;
hostname = host.hostname;
hostLabels = buildEffectiveLabels host;
})
host.scrapeTargets
)
hostList
);
# Group targets by job_name
grouped = lib.groupBy (t: t.job_name) allTargets;
# Generate a scrape config for each job
# Within each job, group targets by their host labels for efficient static_configs
flakeScrapeConfigs = lib.mapAttrsToList
(jobName: targets:
let
first = builtins.head targets;
# Group targets within this job by their host labels
labelKey = t: builtins.toJSON t.hostLabels;
groupedByLabels = lib.groupBy labelKey targets;
# Every flake host now has at least a hostname label
staticConfigs = lib.mapAttrsToList
(key: labelTargets:
let
labels = (builtins.head labelTargets).hostLabels;
targetAddrs = map
(t: "${t.hostname}.home.2rjus.net:${toString t.port}")
labelTargets;
in
{ targets = targetAddrs; labels = labels; }
)
groupedByLabels;
config = {
job_name = jobName;
static_configs = staticConfigs;
}
// (lib.optionalAttrs (first.metrics_path != "/metrics") {
metrics_path = first.metrics_path;
})
// (lib.optionalAttrs (first.scheme != "http") {
scheme = first.scheme;
})
// (lib.optionalAttrs (first.scrape_interval != null) {
scrape_interval = first.scrape_interval;
})
// (lib.optionalAttrs first.honor_labels {
honor_labels = true;
});
in
config
)
grouped;
# External scrape configs
externalScrapeConfigs = map
(ext: {
job_name = ext.job_name;
static_configs = [{
targets = ext.targets;
}];
} // (lib.optionalAttrs (ext ? metrics_path) {
metrics_path = ext.metrics_path;
}) // (lib.optionalAttrs (ext ? scheme) {
scheme = ext.scheme;
}) // (lib.optionalAttrs (ext ? scrape_interval) {
scrape_interval = ext.scrape_interval;
}))
(externalTargets.scrapeConfigs or [ ]);
in
flakeScrapeConfigs ++ externalScrapeConfigs;
# Generate systemd-exporter targets (excludes nodeExporterOnly hosts)
generateSystemdExporterTargets = self: externalTargets:
let
nodeTargets = generateNodeExporterTargets self (externalTargets // { nodeExporterOnly = [ ]; });
in
map
(cfg: cfg // {
targets = map (t: builtins.replaceStrings [ ":9100" ] [ ":9558" ] t) cfg.targets;
})
nodeTargets;
in
{
inherit extractHostMonitoring generateNodeExporterTargets generateScrapeConfigs generateSystemdExporterTargets;
}