Files
nixos-servers/lib/monitoring.nix
Torjus Håkestad dd1b64de27
Some checks failed
Run nix flake check / flake-check (pull_request) Successful in 2m49s
Run nix flake check / flake-check (push) Has been cancelled
monitoring: auto-generate Prometheus scrape targets from host configs
Add homelab.monitoring NixOS options (enable, scrapeTargets) following
the same pattern as homelab.dns. Prometheus scrape configs are now
auto-generated from flake host configurations and external targets,
replacing hardcoded target lists.

Also cleans up alert rules: snake_case naming, fix zigbee2mqtt typo,
remove duplicate pushgateway alert, add for clauses to monitoring_rules,
remove hardcoded WireGuard public key, and add new alerts for
certificates, proxmox, caddy, smartctl temperature, filesystem
prediction, systemd state, file descriptors, and host reboots.

Fixes grafana scrape target port from 3100 to 3000.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:49:07 +01:00

146 lines
4.7 KiB
Nix

{ lib }:
let
# Extract IP address from CIDR notation (e.g., "10.69.13.5/24" -> "10.69.13.5")
extractIP = address:
let
parts = lib.splitString "/" address;
in
builtins.head parts;
# Check if a network interface name looks like a VPN/tunnel interface
isVpnInterface = ifaceName:
lib.hasPrefix "wg" ifaceName ||
lib.hasPrefix "tun" ifaceName ||
lib.hasPrefix "tap" ifaceName ||
lib.hasPrefix "vti" ifaceName;
# Extract monitoring info from a single host configuration
# Returns null if host should not be included
extractHostMonitoring = name: hostConfig:
let
cfg = hostConfig.config;
monConfig = (cfg.homelab or { }).monitoring or { enable = true; scrapeTargets = [ ]; };
dnsConfig = (cfg.homelab or { }).dns or { enable = true; };
hostname = cfg.networking.hostName;
networks = cfg.systemd.network.networks or { };
# Filter out VPN interfaces and find networks with static addresses
physicalNetworks = lib.filterAttrs
(netName: netCfg:
let
ifaceName = netCfg.matchConfig.Name or "";
in
!(isVpnInterface ifaceName) && (netCfg.address or [ ]) != [ ])
networks;
# Get addresses from physical networks only
networkAddresses = lib.flatten (
lib.mapAttrsToList
(netName: netCfg: netCfg.address or [ ])
physicalNetworks
);
firstAddress = if networkAddresses != [ ] then builtins.head networkAddresses else null;
in
if !(monConfig.enable or true) || !(dnsConfig.enable or true) || firstAddress == null then
null
else
{
inherit hostname;
ip = extractIP firstAddress;
scrapeTargets = monConfig.scrapeTargets or [ ];
};
# Generate node-exporter targets from all flake hosts
generateNodeExporterTargets = self: externalTargets:
let
nixosConfigs = self.nixosConfigurations or { };
hostList = lib.filter (x: x != null) (
lib.mapAttrsToList extractHostMonitoring nixosConfigs
);
flakeTargets = map (host: "${host.hostname}.home.2rjus.net:9100") hostList;
in
flakeTargets ++ (externalTargets.nodeExporter or [ ]);
# Generate scrape configs from all flake hosts and external targets
generateScrapeConfigs = self: externalTargets:
let
nixosConfigs = self.nixosConfigurations or { };
hostList = lib.filter (x: x != null) (
lib.mapAttrsToList extractHostMonitoring nixosConfigs
);
# Collect all scrapeTargets from all hosts, grouped by job_name
allTargets = lib.flatten (map
(host:
map
(target: {
inherit (target) job_name port metrics_path scheme scrape_interval honor_labels;
hostname = host.hostname;
})
host.scrapeTargets
)
hostList
);
# Group targets by job_name
grouped = lib.groupBy (t: t.job_name) allTargets;
# Generate a scrape config for each job
flakeScrapeConfigs = lib.mapAttrsToList
(jobName: targets:
let
first = builtins.head targets;
targetAddrs = map
(t:
let
portStr = toString t.port;
in
"${t.hostname}.home.2rjus.net:${portStr}")
targets;
config = {
job_name = jobName;
static_configs = [{
targets = targetAddrs;
}];
}
// (lib.optionalAttrs (first.metrics_path != "/metrics") {
metrics_path = first.metrics_path;
})
// (lib.optionalAttrs (first.scheme != "http") {
scheme = first.scheme;
})
// (lib.optionalAttrs (first.scrape_interval != null) {
scrape_interval = first.scrape_interval;
})
// (lib.optionalAttrs first.honor_labels {
honor_labels = true;
});
in
config
)
grouped;
# External scrape configs
externalScrapeConfigs = map
(ext: {
job_name = ext.job_name;
static_configs = [{
targets = ext.targets;
}];
} // (lib.optionalAttrs (ext ? metrics_path) {
metrics_path = ext.metrics_path;
}) // (lib.optionalAttrs (ext ? scheme) {
scheme = ext.scheme;
}) // (lib.optionalAttrs (ext ? scrape_interval) {
scrape_interval = ext.scrape_interval;
}))
(externalTargets.scrapeConfigs or [ ]);
in
flakeScrapeConfigs ++ externalScrapeConfigs;
in
{
inherit extractHostMonitoring generateNodeExporterTargets generateScrapeConfigs;
}