monitoring: auto-generate Prometheus scrape targets from host configs
Add homelab.monitoring NixOS options (enable, scrapeTargets) following the same pattern as homelab.dns. Prometheus scrape configs are now auto-generated from flake host configurations and external targets, replacing hardcoded target lists. Also cleans up alert rules: snake_case naming, fix zigbee2mqtt typo, remove duplicate pushgateway alert, add for clauses to monitoring_rules, remove hardcoded WireGuard public key, and add new alerts for certificates, proxmox, caddy, smartctl temperature, filesystem prediction, systemd state, file descriptors, and host reboots. Fixes grafana scrape target port from 3100 to 3000. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
145
lib/monitoring.nix
Normal file
145
lib/monitoring.nix
Normal file
@@ -0,0 +1,145 @@
|
||||
{ lib }:
|
||||
let
|
||||
# Extract IP address from CIDR notation (e.g., "10.69.13.5/24" -> "10.69.13.5")
|
||||
extractIP = address:
|
||||
let
|
||||
parts = lib.splitString "/" address;
|
||||
in
|
||||
builtins.head parts;
|
||||
|
||||
# Check if a network interface name looks like a VPN/tunnel interface
|
||||
isVpnInterface = ifaceName:
|
||||
lib.hasPrefix "wg" ifaceName ||
|
||||
lib.hasPrefix "tun" ifaceName ||
|
||||
lib.hasPrefix "tap" ifaceName ||
|
||||
lib.hasPrefix "vti" ifaceName;
|
||||
|
||||
# Extract monitoring info from a single host configuration
|
||||
# Returns null if host should not be included
|
||||
extractHostMonitoring = name: hostConfig:
|
||||
let
|
||||
cfg = hostConfig.config;
|
||||
monConfig = (cfg.homelab or { }).monitoring or { enable = true; scrapeTargets = [ ]; };
|
||||
dnsConfig = (cfg.homelab or { }).dns or { enable = true; };
|
||||
hostname = cfg.networking.hostName;
|
||||
networks = cfg.systemd.network.networks or { };
|
||||
|
||||
# Filter out VPN interfaces and find networks with static addresses
|
||||
physicalNetworks = lib.filterAttrs
|
||||
(netName: netCfg:
|
||||
let
|
||||
ifaceName = netCfg.matchConfig.Name or "";
|
||||
in
|
||||
!(isVpnInterface ifaceName) && (netCfg.address or [ ]) != [ ])
|
||||
networks;
|
||||
|
||||
# Get addresses from physical networks only
|
||||
networkAddresses = lib.flatten (
|
||||
lib.mapAttrsToList
|
||||
(netName: netCfg: netCfg.address or [ ])
|
||||
physicalNetworks
|
||||
);
|
||||
|
||||
firstAddress = if networkAddresses != [ ] then builtins.head networkAddresses else null;
|
||||
in
|
||||
if !(monConfig.enable or true) || !(dnsConfig.enable or true) || firstAddress == null then
|
||||
null
|
||||
else
|
||||
{
|
||||
inherit hostname;
|
||||
ip = extractIP firstAddress;
|
||||
scrapeTargets = monConfig.scrapeTargets or [ ];
|
||||
};
|
||||
|
||||
# Generate node-exporter targets from all flake hosts
|
||||
generateNodeExporterTargets = self: externalTargets:
|
||||
let
|
||||
nixosConfigs = self.nixosConfigurations or { };
|
||||
hostList = lib.filter (x: x != null) (
|
||||
lib.mapAttrsToList extractHostMonitoring nixosConfigs
|
||||
);
|
||||
flakeTargets = map (host: "${host.hostname}.home.2rjus.net:9100") hostList;
|
||||
in
|
||||
flakeTargets ++ (externalTargets.nodeExporter or [ ]);
|
||||
|
||||
# Generate scrape configs from all flake hosts and external targets
|
||||
generateScrapeConfigs = self: externalTargets:
|
||||
let
|
||||
nixosConfigs = self.nixosConfigurations or { };
|
||||
hostList = lib.filter (x: x != null) (
|
||||
lib.mapAttrsToList extractHostMonitoring nixosConfigs
|
||||
);
|
||||
|
||||
# Collect all scrapeTargets from all hosts, grouped by job_name
|
||||
allTargets = lib.flatten (map
|
||||
(host:
|
||||
map
|
||||
(target: {
|
||||
inherit (target) job_name port metrics_path scheme scrape_interval honor_labels;
|
||||
hostname = host.hostname;
|
||||
})
|
||||
host.scrapeTargets
|
||||
)
|
||||
hostList
|
||||
);
|
||||
|
||||
# Group targets by job_name
|
||||
grouped = lib.groupBy (t: t.job_name) allTargets;
|
||||
|
||||
# Generate a scrape config for each job
|
||||
flakeScrapeConfigs = lib.mapAttrsToList
|
||||
(jobName: targets:
|
||||
let
|
||||
first = builtins.head targets;
|
||||
targetAddrs = map
|
||||
(t:
|
||||
let
|
||||
portStr = toString t.port;
|
||||
in
|
||||
"${t.hostname}.home.2rjus.net:${portStr}")
|
||||
targets;
|
||||
config = {
|
||||
job_name = jobName;
|
||||
static_configs = [{
|
||||
targets = targetAddrs;
|
||||
}];
|
||||
}
|
||||
// (lib.optionalAttrs (first.metrics_path != "/metrics") {
|
||||
metrics_path = first.metrics_path;
|
||||
})
|
||||
// (lib.optionalAttrs (first.scheme != "http") {
|
||||
scheme = first.scheme;
|
||||
})
|
||||
// (lib.optionalAttrs (first.scrape_interval != null) {
|
||||
scrape_interval = first.scrape_interval;
|
||||
})
|
||||
// (lib.optionalAttrs first.honor_labels {
|
||||
honor_labels = true;
|
||||
});
|
||||
in
|
||||
config
|
||||
)
|
||||
grouped;
|
||||
|
||||
# External scrape configs
|
||||
externalScrapeConfigs = map
|
||||
(ext: {
|
||||
job_name = ext.job_name;
|
||||
static_configs = [{
|
||||
targets = ext.targets;
|
||||
}];
|
||||
} // (lib.optionalAttrs (ext ? metrics_path) {
|
||||
metrics_path = ext.metrics_path;
|
||||
}) // (lib.optionalAttrs (ext ? scheme) {
|
||||
scheme = ext.scheme;
|
||||
}) // (lib.optionalAttrs (ext ? scrape_interval) {
|
||||
scrape_interval = ext.scrape_interval;
|
||||
}))
|
||||
(externalTargets.scrapeConfigs or [ ]);
|
||||
in
|
||||
flakeScrapeConfigs ++ externalScrapeConfigs;
|
||||
|
||||
in
|
||||
{
|
||||
inherit extractHostMonitoring generateNodeExporterTargets generateScrapeConfigs;
|
||||
}
|
||||
Reference in New Issue
Block a user