monitoring: propagate host labels to Prometheus scrape targets
Extract homelab.host metadata (tier, priority, role, labels) from host configurations and propagate them to Prometheus scrape targets. This enables semantic alert filtering using labels instead of hardcoded instance names. Changes: - lib/monitoring.nix: Extract host metadata, group targets by labels - prometheus.nix: Use structured static_configs with labels - rules.yml: Replace instance filters with role-based filters Example labels in Prometheus: - ns1/ns2: role=dns, dns_role=primary/secondary - nix-cache01: role=build-host - testvm*: tier=test Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ let
|
||||
cfg = hostConfig.config;
|
||||
monConfig = (cfg.homelab or { }).monitoring or { enable = true; scrapeTargets = [ ]; };
|
||||
dnsConfig = (cfg.homelab or { }).dns or { enable = true; };
|
||||
hostConfig' = (cfg.homelab or { }).host or { };
|
||||
hostname = cfg.networking.hostName;
|
||||
networks = cfg.systemd.network.networks or { };
|
||||
|
||||
@@ -49,20 +50,64 @@ let
|
||||
inherit hostname;
|
||||
ip = extractIP firstAddress;
|
||||
scrapeTargets = monConfig.scrapeTargets or [ ];
|
||||
# Host metadata for label propagation
|
||||
tier = hostConfig'.tier or "prod";
|
||||
priority = hostConfig'.priority or "high";
|
||||
role = hostConfig'.role or null;
|
||||
labels = hostConfig'.labels or { };
|
||||
};
|
||||
|
||||
# Build effective labels for a host (only include non-default values)
|
||||
buildEffectiveLabels = host:
|
||||
(lib.optionalAttrs (host.tier != "prod") { tier = host.tier; })
|
||||
// (lib.optionalAttrs (host.priority != "high") { priority = host.priority; })
|
||||
// (lib.optionalAttrs (host.role != null) { role = host.role; })
|
||||
// host.labels;
|
||||
|
||||
# Generate node-exporter targets from all flake hosts
|
||||
# Returns a list of static_configs entries with labels
|
||||
generateNodeExporterTargets = self: externalTargets:
|
||||
let
|
||||
nixosConfigs = self.nixosConfigurations or { };
|
||||
hostList = lib.filter (x: x != null) (
|
||||
lib.mapAttrsToList extractHostMonitoring nixosConfigs
|
||||
);
|
||||
flakeTargets = map (host: "${host.hostname}.home.2rjus.net:9100") hostList;
|
||||
|
||||
# Build target entries with labels for each host
|
||||
flakeEntries = map
|
||||
(host: {
|
||||
target = "${host.hostname}.home.2rjus.net:9100";
|
||||
labels = buildEffectiveLabels host;
|
||||
})
|
||||
hostList;
|
||||
|
||||
# External targets have no labels
|
||||
externalEntries = map
|
||||
(target: { inherit target; labels = { }; })
|
||||
(externalTargets.nodeExporter or [ ]);
|
||||
|
||||
allEntries = flakeEntries ++ externalEntries;
|
||||
|
||||
# Group entries by their label set for efficient static_configs
|
||||
# Convert labels attrset to a string key for grouping
|
||||
labelKey = entry: builtins.toJSON entry.labels;
|
||||
grouped = lib.groupBy labelKey allEntries;
|
||||
|
||||
# Convert groups to static_configs format
|
||||
staticConfigs = lib.mapAttrsToList
|
||||
(key: entries:
|
||||
let
|
||||
labels = (builtins.head entries).labels;
|
||||
in
|
||||
{ targets = map (e: e.target) entries; }
|
||||
// (lib.optionalAttrs (labels != { }) { inherit labels; })
|
||||
)
|
||||
grouped;
|
||||
in
|
||||
flakeTargets ++ (externalTargets.nodeExporter or [ ]);
|
||||
staticConfigs;
|
||||
|
||||
# Generate scrape configs from all flake hosts and external targets
|
||||
# Host labels are propagated to service targets for semantic alert filtering
|
||||
generateScrapeConfigs = self: externalTargets:
|
||||
let
|
||||
nixosConfigs = self.nixosConfigurations or { };
|
||||
@@ -70,13 +115,14 @@ let
|
||||
lib.mapAttrsToList extractHostMonitoring nixosConfigs
|
||||
);
|
||||
|
||||
# Collect all scrapeTargets from all hosts, grouped by job_name
|
||||
# Collect all scrapeTargets from all hosts, including host labels
|
||||
allTargets = lib.flatten (map
|
||||
(host:
|
||||
map
|
||||
(target: {
|
||||
inherit (target) job_name port metrics_path scheme scrape_interval honor_labels;
|
||||
hostname = host.hostname;
|
||||
hostLabels = buildEffectiveLabels host;
|
||||
})
|
||||
host.scrapeTargets
|
||||
)
|
||||
@@ -87,22 +133,32 @@ let
|
||||
grouped = lib.groupBy (t: t.job_name) allTargets;
|
||||
|
||||
# Generate a scrape config for each job
|
||||
# Within each job, group targets by their host labels for efficient static_configs
|
||||
flakeScrapeConfigs = lib.mapAttrsToList
|
||||
(jobName: targets:
|
||||
let
|
||||
first = builtins.head targets;
|
||||
targetAddrs = map
|
||||
(t:
|
||||
|
||||
# Group targets within this job by their host labels
|
||||
labelKey = t: builtins.toJSON t.hostLabels;
|
||||
groupedByLabels = lib.groupBy labelKey targets;
|
||||
|
||||
staticConfigs = lib.mapAttrsToList
|
||||
(key: labelTargets:
|
||||
let
|
||||
portStr = toString t.port;
|
||||
labels = (builtins.head labelTargets).hostLabels;
|
||||
targetAddrs = map
|
||||
(t: "${t.hostname}.home.2rjus.net:${toString t.port}")
|
||||
labelTargets;
|
||||
in
|
||||
"${t.hostname}.home.2rjus.net:${portStr}")
|
||||
targets;
|
||||
{ targets = targetAddrs; }
|
||||
// (lib.optionalAttrs (labels != { }) { inherit labels; })
|
||||
)
|
||||
groupedByLabels;
|
||||
|
||||
config = {
|
||||
job_name = jobName;
|
||||
static_configs = [{
|
||||
targets = targetAddrs;
|
||||
}];
|
||||
static_configs = staticConfigs;
|
||||
}
|
||||
// (lib.optionalAttrs (first.metrics_path != "/metrics") {
|
||||
metrics_path = first.metrics_path;
|
||||
|
||||
Reference in New Issue
Block a user