Files
nixos-servers/lib/monitoring.nix
Torjus Håkestad 7d291f85bf monitoring: propagate host labels to Prometheus scrape targets
Extract homelab.host metadata (tier, priority, role, labels) from host
configurations and propagate them to Prometheus scrape targets. This
enables semantic alert filtering using labels instead of hardcoded
instance names.

Changes:
- lib/monitoring.nix: Extract host metadata, group targets by labels
- prometheus.nix: Use structured static_configs with labels
- rules.yml: Replace instance filters with role-based filters

Example labels in Prometheus:
- ns1/ns2: role=dns, dns_role=primary/secondary
- nix-cache01: role=build-host
- testvm*: tier=test

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 17:04:50 +01:00

202 lines
6.8 KiB
Nix

{ lib }:
let
# Extract IP address from CIDR notation (e.g., "10.69.13.5/24" -> "10.69.13.5")
extractIP = address:
let
parts = lib.splitString "/" address;
in
builtins.head parts;
# Check if a network interface name looks like a VPN/tunnel interface
isVpnInterface = ifaceName:
lib.hasPrefix "wg" ifaceName ||
lib.hasPrefix "tun" ifaceName ||
lib.hasPrefix "tap" ifaceName ||
lib.hasPrefix "vti" ifaceName;
# Extract monitoring info from a single host configuration
# Returns null if host should not be included
extractHostMonitoring = name: hostConfig:
let
cfg = hostConfig.config;
monConfig = (cfg.homelab or { }).monitoring or { enable = true; scrapeTargets = [ ]; };
dnsConfig = (cfg.homelab or { }).dns or { enable = true; };
hostConfig' = (cfg.homelab or { }).host or { };
hostname = cfg.networking.hostName;
networks = cfg.systemd.network.networks or { };
# Filter out VPN interfaces and find networks with static addresses
physicalNetworks = lib.filterAttrs
(netName: netCfg:
let
ifaceName = netCfg.matchConfig.Name or "";
in
!(isVpnInterface ifaceName) && (netCfg.address or [ ]) != [ ])
networks;
# Get addresses from physical networks only
networkAddresses = lib.flatten (
lib.mapAttrsToList
(netName: netCfg: netCfg.address or [ ])
physicalNetworks
);
firstAddress = if networkAddresses != [ ] then builtins.head networkAddresses else null;
in
if !(monConfig.enable or true) || !(dnsConfig.enable or true) || firstAddress == null then
null
else
{
inherit hostname;
ip = extractIP firstAddress;
scrapeTargets = monConfig.scrapeTargets or [ ];
# Host metadata for label propagation
tier = hostConfig'.tier or "prod";
priority = hostConfig'.priority or "high";
role = hostConfig'.role or null;
labels = hostConfig'.labels or { };
};
# Build effective labels for a host (only include non-default values)
buildEffectiveLabels = host:
(lib.optionalAttrs (host.tier != "prod") { tier = host.tier; })
// (lib.optionalAttrs (host.priority != "high") { priority = host.priority; })
// (lib.optionalAttrs (host.role != null) { role = host.role; })
// host.labels;
# Generate node-exporter targets from all flake hosts
# Returns a list of static_configs entries with labels
generateNodeExporterTargets = self: externalTargets:
let
nixosConfigs = self.nixosConfigurations or { };
hostList = lib.filter (x: x != null) (
lib.mapAttrsToList extractHostMonitoring nixosConfigs
);
# Build target entries with labels for each host
flakeEntries = map
(host: {
target = "${host.hostname}.home.2rjus.net:9100";
labels = buildEffectiveLabels host;
})
hostList;
# External targets have no labels
externalEntries = map
(target: { inherit target; labels = { }; })
(externalTargets.nodeExporter or [ ]);
allEntries = flakeEntries ++ externalEntries;
# Group entries by their label set for efficient static_configs
# Convert labels attrset to a string key for grouping
labelKey = entry: builtins.toJSON entry.labels;
grouped = lib.groupBy labelKey allEntries;
# Convert groups to static_configs format
staticConfigs = lib.mapAttrsToList
(key: entries:
let
labels = (builtins.head entries).labels;
in
{ targets = map (e: e.target) entries; }
// (lib.optionalAttrs (labels != { }) { inherit labels; })
)
grouped;
in
staticConfigs;
# Generate scrape configs from all flake hosts and external targets
# Host labels are propagated to service targets for semantic alert filtering
generateScrapeConfigs = self: externalTargets:
let
nixosConfigs = self.nixosConfigurations or { };
hostList = lib.filter (x: x != null) (
lib.mapAttrsToList extractHostMonitoring nixosConfigs
);
# Collect all scrapeTargets from all hosts, including host labels
allTargets = lib.flatten (map
(host:
map
(target: {
inherit (target) job_name port metrics_path scheme scrape_interval honor_labels;
hostname = host.hostname;
hostLabels = buildEffectiveLabels host;
})
host.scrapeTargets
)
hostList
);
# Group targets by job_name
grouped = lib.groupBy (t: t.job_name) allTargets;
# Generate a scrape config for each job
# Within each job, group targets by their host labels for efficient static_configs
flakeScrapeConfigs = lib.mapAttrsToList
(jobName: targets:
let
first = builtins.head targets;
# Group targets within this job by their host labels
labelKey = t: builtins.toJSON t.hostLabels;
groupedByLabels = lib.groupBy labelKey targets;
staticConfigs = lib.mapAttrsToList
(key: labelTargets:
let
labels = (builtins.head labelTargets).hostLabels;
targetAddrs = map
(t: "${t.hostname}.home.2rjus.net:${toString t.port}")
labelTargets;
in
{ targets = targetAddrs; }
// (lib.optionalAttrs (labels != { }) { inherit labels; })
)
groupedByLabels;
config = {
job_name = jobName;
static_configs = staticConfigs;
}
// (lib.optionalAttrs (first.metrics_path != "/metrics") {
metrics_path = first.metrics_path;
})
// (lib.optionalAttrs (first.scheme != "http") {
scheme = first.scheme;
})
// (lib.optionalAttrs (first.scrape_interval != null) {
scrape_interval = first.scrape_interval;
})
// (lib.optionalAttrs first.honor_labels {
honor_labels = true;
});
in
config
)
grouped;
# External scrape configs
externalScrapeConfigs = map
(ext: {
job_name = ext.job_name;
static_configs = [{
targets = ext.targets;
}];
} // (lib.optionalAttrs (ext ? metrics_path) {
metrics_path = ext.metrics_path;
}) // (lib.optionalAttrs (ext ? scheme) {
scheme = ext.scheme;
}) // (lib.optionalAttrs (ext ? scrape_interval) {
scrape_interval = ext.scrape_interval;
}))
(externalTargets.scrapeConfigs or [ ]);
in
flakeScrapeConfigs ++ externalScrapeConfigs;
in
{
inherit extractHostMonitoring generateNodeExporterTargets generateScrapeConfigs;
}