{ self, lib, ... }: let monLib = import ../../lib/monitoring.nix { inherit lib; }; externalTargets = import ./external-targets.nix; nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets; autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets; in { # OpenBao token for scraping metrics vault.secrets.openbao-token = { secretPath = "hosts/monitoring01/openbao-token"; extractKey = "token"; outputDir = "/run/secrets/prometheus"; mode = "0400"; owner = "prometheus"; services = [ "prometheus" ]; }; services.prometheus = { enable = true; # syntax-only check because we use external credential files (e.g., openbao-token) checkConfig = "syntax-only"; alertmanager = { enable = true; configuration = { global = { }; route = { receiver = "webhook_natstonotify"; group_wait = "30s"; group_interval = "5m"; repeat_interval = "1h"; group_by = [ "alertname" ]; }; receivers = [ { name = "webhook_natstonotify"; webhook_configs = [ { url = "http://localhost:5001/alert"; } ]; } ]; }; }; alertmanagers = [ { static_configs = [ { targets = [ "localhost:9093" ]; } ]; } ]; retentionTime = "30d"; globalConfig = { scrape_interval = "15s"; }; rules = [ (builtins.readFile ./rules.yml) ]; scrapeConfigs = [ # Auto-generated node-exporter targets from flake hosts + external { job_name = "node-exporter"; static_configs = [ { targets = nodeExporterTargets; } ]; } # Systemd exporter on all hosts (same targets, different port) { job_name = "systemd-exporter"; static_configs = [ { targets = map (t: builtins.replaceStrings [":9100"] [":9558"] t) nodeExporterTargets; } ]; } # Local monitoring services (not auto-generated) { job_name = "prometheus"; static_configs = [ { targets = [ "localhost:9090" ]; } ]; } { job_name = "loki"; static_configs = [ { targets = [ "localhost:3100" ]; } ]; } { job_name = "grafana"; static_configs = [ { targets = [ "localhost:3000" ]; } ]; } { job_name = "alertmanager"; static_configs = [ { targets = [ "localhost:9093" ]; } ]; } { job_name = "pushgateway"; honor_labels = true; static_configs = [ { targets = [ "localhost:9091" ]; } ]; } { job_name = "labmon"; static_configs = [ { targets = [ "monitoring01.home.2rjus.net:9969" ]; } ]; } # TODO: nix-cache_caddy can't be auto-generated because the cert is issued # for nix-cache.home.2rjus.net (service CNAME), not nix-cache01 (hostname). # Consider adding a target override to homelab.monitoring.scrapeTargets. { job_name = "nix-cache_caddy"; scheme = "https"; static_configs = [ { targets = [ "nix-cache.home.2rjus.net" ]; } ]; } # pve-exporter with complex relabel config { job_name = "pve-exporter"; static_configs = [ { targets = [ "10.69.12.75" ]; } ]; metrics_path = "/pve"; params = { module = [ "default" ]; cluster = [ "1" ]; node = [ "1" ]; }; relabel_configs = [ { source_labels = [ "__address__" ]; target_label = "__param_target"; } { source_labels = [ "__param_target" ]; target_label = "instance"; } { target_label = "__address__"; replacement = "127.0.0.1:9221"; } ]; } # OpenBao metrics with bearer token auth { job_name = "openbao"; scheme = "https"; metrics_path = "/v1/sys/metrics"; params = { format = [ "prometheus" ]; }; static_configs = [{ targets = [ "vault01.home.2rjus.net:8200" ]; }]; authorization = { type = "Bearer"; credentials_file = "/run/secrets/prometheus/openbao-token"; }; } ] ++ autoScrapeConfigs; pushgateway = { enable = true; web = { external-url = "https://pushgw.home.2rjus.net"; }; }; }; }