Some checks failed
Run nix flake check / flake-check (push) Failing after 3m7s
Add nodeExporterOnly list to external-targets.nix for hosts that have node-exporter but not systemd-exporter (e.g. pve1). This prevents a down target in the systemd-exporter scrape job. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
268 lines
7.9 KiB
Nix
268 lines
7.9 KiB
Nix
{ self, config, lib, pkgs, ... }:
|
|
let
|
|
monLib = import ../../lib/monitoring.nix { inherit lib; };
|
|
externalTargets = import ../monitoring/external-targets.nix;
|
|
|
|
nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets;
|
|
systemdExporterTargets = monLib.generateSystemdExporterTargets self externalTargets;
|
|
autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets;
|
|
|
|
# TLS endpoints to monitor for certificate expiration via blackbox exporter
|
|
tlsTargets = [
|
|
"https://vault.home.2rjus.net:8200"
|
|
"https://auth.home.2rjus.net"
|
|
"https://testvm01.home.2rjus.net"
|
|
"https://nzbget.home.2rjus.net"
|
|
"https://radarr.home.2rjus.net"
|
|
"https://sonarr.home.2rjus.net"
|
|
"https://ha.home.2rjus.net"
|
|
"https://z2m.home.2rjus.net"
|
|
"https://metrics.home.2rjus.net"
|
|
"https://alertmanager.home.2rjus.net"
|
|
"https://grafana.home.2rjus.net"
|
|
"https://jelly.home.2rjus.net"
|
|
"https://nix-cache.home.2rjus.net"
|
|
"https://grafana-test.home.2rjus.net"
|
|
];
|
|
|
|
# Script to fetch AppRole token for VictoriaMetrics to use when scraping OpenBao metrics
|
|
fetchOpenbaoToken = pkgs.writeShellApplication {
|
|
name = "fetch-openbao-token-vm";
|
|
runtimeInputs = [ pkgs.curl pkgs.jq ];
|
|
text = ''
|
|
VAULT_ADDR="https://vault01.home.2rjus.net:8200"
|
|
APPROLE_DIR="/var/lib/vault/approle"
|
|
OUTPUT_FILE="/run/secrets/victoriametrics/openbao-token"
|
|
|
|
# Read AppRole credentials
|
|
if [ ! -f "$APPROLE_DIR/role-id" ] || [ ! -f "$APPROLE_DIR/secret-id" ]; then
|
|
echo "AppRole credentials not found at $APPROLE_DIR" >&2
|
|
exit 1
|
|
fi
|
|
|
|
ROLE_ID=$(cat "$APPROLE_DIR/role-id")
|
|
SECRET_ID=$(cat "$APPROLE_DIR/secret-id")
|
|
|
|
# Authenticate to Vault
|
|
AUTH_RESPONSE=$(curl -sf -k -X POST \
|
|
-d "{\"role_id\":\"$ROLE_ID\",\"secret_id\":\"$SECRET_ID\"}" \
|
|
"$VAULT_ADDR/v1/auth/approle/login")
|
|
|
|
# Extract token
|
|
VAULT_TOKEN=$(echo "$AUTH_RESPONSE" | jq -r '.auth.client_token')
|
|
if [ -z "$VAULT_TOKEN" ] || [ "$VAULT_TOKEN" = "null" ]; then
|
|
echo "Failed to extract Vault token from response" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Write token to file
|
|
mkdir -p "$(dirname "$OUTPUT_FILE")"
|
|
echo -n "$VAULT_TOKEN" > "$OUTPUT_FILE"
|
|
chown victoriametrics:victoriametrics "$OUTPUT_FILE"
|
|
chmod 0400 "$OUTPUT_FILE"
|
|
|
|
echo "Successfully fetched OpenBao token"
|
|
'';
|
|
};
|
|
|
|
scrapeConfigs = [
|
|
# Auto-generated node-exporter targets from flake hosts + external
|
|
{
|
|
job_name = "node-exporter";
|
|
static_configs = nodeExporterTargets;
|
|
}
|
|
# Systemd exporter on hosts that have it (excludes nodeExporterOnly hosts)
|
|
{
|
|
job_name = "systemd-exporter";
|
|
static_configs = systemdExporterTargets;
|
|
}
|
|
# Local monitoring services
|
|
{
|
|
job_name = "victoriametrics";
|
|
static_configs = [{ targets = [ "localhost:8428" ]; }];
|
|
}
|
|
{
|
|
job_name = "loki";
|
|
static_configs = [{ targets = [ "localhost:3100" ]; }];
|
|
}
|
|
{
|
|
job_name = "grafana";
|
|
static_configs = [{ targets = [ "localhost:3000" ]; }];
|
|
}
|
|
{
|
|
job_name = "alertmanager";
|
|
static_configs = [{ targets = [ "localhost:9093" ]; }];
|
|
}
|
|
# Caddy metrics from nix-cache02
|
|
{
|
|
job_name = "nix-cache_caddy";
|
|
scheme = "https";
|
|
static_configs = [{ targets = [ "nix-cache.home.2rjus.net" ]; }];
|
|
}
|
|
# OpenBao metrics with bearer token auth
|
|
{
|
|
job_name = "openbao";
|
|
scheme = "https";
|
|
metrics_path = "/v1/sys/metrics";
|
|
params = { format = [ "prometheus" ]; };
|
|
static_configs = [{ targets = [ "vault01.home.2rjus.net:8200" ]; }];
|
|
authorization = {
|
|
type = "Bearer";
|
|
credentials_file = "/run/secrets/victoriametrics/openbao-token";
|
|
};
|
|
}
|
|
# Apiary external service
|
|
{
|
|
job_name = "apiary";
|
|
scheme = "https";
|
|
scrape_interval = "60s";
|
|
static_configs = [{ targets = [ "apiary.t-juice.club" ]; }];
|
|
authorization = {
|
|
type = "Bearer";
|
|
credentials_file = "/run/secrets/victoriametrics-apiary-token";
|
|
};
|
|
}
|
|
# Blackbox TLS certificate monitoring
|
|
{
|
|
job_name = "blackbox_tls";
|
|
metrics_path = "/probe";
|
|
params = {
|
|
module = [ "https_cert" ];
|
|
};
|
|
static_configs = [{ targets = tlsTargets; }];
|
|
relabel_configs = [
|
|
{
|
|
source_labels = [ "__address__" ];
|
|
target_label = "__param_target";
|
|
}
|
|
{
|
|
source_labels = [ "__param_target" ];
|
|
target_label = "instance";
|
|
}
|
|
{
|
|
target_label = "__address__";
|
|
replacement = "127.0.0.1:9115";
|
|
}
|
|
];
|
|
}
|
|
# Sonarr exporter
|
|
{
|
|
job_name = "sonarr";
|
|
static_configs = [{ targets = [ "localhost:9709" ]; }];
|
|
}
|
|
# Proxmox VE exporter
|
|
{
|
|
job_name = "pve";
|
|
static_configs = [{ targets = [ "localhost:9221" ]; }];
|
|
}
|
|
] ++ autoScrapeConfigs;
|
|
in
|
|
{
|
|
# Static user for VictoriaMetrics (overrides DynamicUser) so vault.secrets
|
|
# and credential files can be owned by this user
|
|
users.users.victoriametrics = {
|
|
isSystemUser = true;
|
|
group = "victoriametrics";
|
|
};
|
|
users.groups.victoriametrics = { };
|
|
|
|
# Override DynamicUser since we need a static user for credential file access
|
|
systemd.services.victoriametrics.serviceConfig = {
|
|
DynamicUser = lib.mkForce false;
|
|
User = "victoriametrics";
|
|
Group = "victoriametrics";
|
|
};
|
|
|
|
# Systemd service to fetch AppRole token for OpenBao scraping
|
|
systemd.services.victoriametrics-openbao-token = {
|
|
description = "Fetch OpenBao token for VictoriaMetrics metrics scraping";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
before = [ "victoriametrics.service" ];
|
|
requiredBy = [ "victoriametrics.service" ];
|
|
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
ExecStart = lib.getExe fetchOpenbaoToken;
|
|
};
|
|
};
|
|
|
|
# Timer to periodically refresh the token (AppRole tokens have 1-hour TTL)
|
|
systemd.timers.victoriametrics-openbao-token = {
|
|
description = "Refresh OpenBao token for VictoriaMetrics";
|
|
wantedBy = [ "timers.target" ];
|
|
timerConfig = {
|
|
OnBootSec = "5min";
|
|
OnUnitActiveSec = "30min";
|
|
RandomizedDelaySec = "5min";
|
|
};
|
|
};
|
|
|
|
# Fetch apiary bearer token from Vault
|
|
vault.secrets.victoriametrics-apiary-token = {
|
|
secretPath = "hosts/monitoring02/apiary-token";
|
|
extractKey = "password";
|
|
owner = "victoriametrics";
|
|
group = "victoriametrics";
|
|
services = [ "victoriametrics" ];
|
|
};
|
|
|
|
services.victoriametrics = {
|
|
enable = true;
|
|
retentionPeriod = "3"; # 3 months
|
|
# Disable config check since we reference external credential files
|
|
checkConfig = false;
|
|
prometheusConfig = {
|
|
global.scrape_interval = "15s";
|
|
scrape_configs = scrapeConfigs;
|
|
};
|
|
};
|
|
|
|
# vmalert for alerting rules
|
|
services.vmalert.instances.default = {
|
|
enable = true;
|
|
settings = {
|
|
"datasource.url" = "http://localhost:8428";
|
|
"notifier.url" = [ "http://localhost:9093" ];
|
|
"rule" = [ ../monitoring/rules.yml ];
|
|
};
|
|
};
|
|
|
|
# Caddy reverse proxy for VictoriaMetrics and vmalert
|
|
services.caddy.virtualHosts."metrics.home.2rjus.net".extraConfig = ''
|
|
reverse_proxy http://127.0.0.1:8428
|
|
'';
|
|
services.caddy.virtualHosts."vmalert.home.2rjus.net".extraConfig = ''
|
|
reverse_proxy http://127.0.0.1:8880
|
|
'';
|
|
|
|
# Alertmanager
|
|
services.caddy.virtualHosts."alertmanager.home.2rjus.net".extraConfig = ''
|
|
reverse_proxy http://127.0.0.1:9093
|
|
'';
|
|
|
|
services.prometheus.alertmanager = {
|
|
enable = true;
|
|
configuration = {
|
|
global = { };
|
|
route = {
|
|
receiver = "webhook_natstonotify";
|
|
group_wait = "30s";
|
|
group_interval = "5m";
|
|
repeat_interval = "1h";
|
|
group_by = [ "alertname" ];
|
|
};
|
|
receivers = [
|
|
{
|
|
name = "webhook_natstonotify";
|
|
webhook_configs = [
|
|
{
|
|
url = "http://localhost:5001/alert";
|
|
}
|
|
];
|
|
}
|
|
];
|
|
};
|
|
};
|
|
}
|