Remove monitoring01 host configuration and unused service modules (prometheus, grafana, loki, tempo, pyroscope). Migrate blackbox, exportarr, and pve exporters to monitoring02 with scrape configs moved to VictoriaMetrics. Update alert rules, terraform vault policies/secrets, http-proxy entries, and documentation to reflect the monitoring02 migration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
271 lines
8.0 KiB
Nix
271 lines
8.0 KiB
Nix
{ self, config, lib, pkgs, ... }:
|
|
let
|
|
monLib = import ../../lib/monitoring.nix { inherit lib; };
|
|
externalTargets = import ../monitoring/external-targets.nix;
|
|
|
|
nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets;
|
|
autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets;
|
|
|
|
# TLS endpoints to monitor for certificate expiration via blackbox exporter
|
|
tlsTargets = [
|
|
"https://vault.home.2rjus.net:8200"
|
|
"https://auth.home.2rjus.net"
|
|
"https://testvm01.home.2rjus.net"
|
|
"https://nzbget.home.2rjus.net"
|
|
"https://radarr.home.2rjus.net"
|
|
"https://sonarr.home.2rjus.net"
|
|
"https://ha.home.2rjus.net"
|
|
"https://z2m.home.2rjus.net"
|
|
"https://metrics.home.2rjus.net"
|
|
"https://alertmanager.home.2rjus.net"
|
|
"https://grafana.home.2rjus.net"
|
|
"https://jelly.home.2rjus.net"
|
|
"https://nix-cache.home.2rjus.net"
|
|
"https://grafana-test.home.2rjus.net"
|
|
];
|
|
|
|
# Script to fetch AppRole token for VictoriaMetrics to use when scraping OpenBao metrics
|
|
fetchOpenbaoToken = pkgs.writeShellApplication {
|
|
name = "fetch-openbao-token-vm";
|
|
runtimeInputs = [ pkgs.curl pkgs.jq ];
|
|
text = ''
|
|
VAULT_ADDR="https://vault01.home.2rjus.net:8200"
|
|
APPROLE_DIR="/var/lib/vault/approle"
|
|
OUTPUT_FILE="/run/secrets/victoriametrics/openbao-token"
|
|
|
|
# Read AppRole credentials
|
|
if [ ! -f "$APPROLE_DIR/role-id" ] || [ ! -f "$APPROLE_DIR/secret-id" ]; then
|
|
echo "AppRole credentials not found at $APPROLE_DIR" >&2
|
|
exit 1
|
|
fi
|
|
|
|
ROLE_ID=$(cat "$APPROLE_DIR/role-id")
|
|
SECRET_ID=$(cat "$APPROLE_DIR/secret-id")
|
|
|
|
# Authenticate to Vault
|
|
AUTH_RESPONSE=$(curl -sf -k -X POST \
|
|
-d "{\"role_id\":\"$ROLE_ID\",\"secret_id\":\"$SECRET_ID\"}" \
|
|
"$VAULT_ADDR/v1/auth/approle/login")
|
|
|
|
# Extract token
|
|
VAULT_TOKEN=$(echo "$AUTH_RESPONSE" | jq -r '.auth.client_token')
|
|
if [ -z "$VAULT_TOKEN" ] || [ "$VAULT_TOKEN" = "null" ]; then
|
|
echo "Failed to extract Vault token from response" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Write token to file
|
|
mkdir -p "$(dirname "$OUTPUT_FILE")"
|
|
echo -n "$VAULT_TOKEN" > "$OUTPUT_FILE"
|
|
chown victoriametrics:victoriametrics "$OUTPUT_FILE"
|
|
chmod 0400 "$OUTPUT_FILE"
|
|
|
|
echo "Successfully fetched OpenBao token"
|
|
'';
|
|
};
|
|
|
|
scrapeConfigs = [
|
|
# Auto-generated node-exporter targets from flake hosts + external
|
|
{
|
|
job_name = "node-exporter";
|
|
static_configs = nodeExporterTargets;
|
|
}
|
|
# Systemd exporter on all hosts (same targets, different port)
|
|
{
|
|
job_name = "systemd-exporter";
|
|
static_configs = map
|
|
(cfg: cfg // {
|
|
targets = map (t: builtins.replaceStrings [ ":9100" ] [ ":9558" ] t) cfg.targets;
|
|
})
|
|
nodeExporterTargets;
|
|
}
|
|
# Local monitoring services
|
|
{
|
|
job_name = "victoriametrics";
|
|
static_configs = [{ targets = [ "localhost:8428" ]; }];
|
|
}
|
|
{
|
|
job_name = "loki";
|
|
static_configs = [{ targets = [ "localhost:3100" ]; }];
|
|
}
|
|
{
|
|
job_name = "grafana";
|
|
static_configs = [{ targets = [ "localhost:3000" ]; }];
|
|
}
|
|
{
|
|
job_name = "alertmanager";
|
|
static_configs = [{ targets = [ "localhost:9093" ]; }];
|
|
}
|
|
# Caddy metrics from nix-cache02
|
|
{
|
|
job_name = "nix-cache_caddy";
|
|
scheme = "https";
|
|
static_configs = [{ targets = [ "nix-cache.home.2rjus.net" ]; }];
|
|
}
|
|
# OpenBao metrics with bearer token auth
|
|
{
|
|
job_name = "openbao";
|
|
scheme = "https";
|
|
metrics_path = "/v1/sys/metrics";
|
|
params = { format = [ "prometheus" ]; };
|
|
static_configs = [{ targets = [ "vault01.home.2rjus.net:8200" ]; }];
|
|
authorization = {
|
|
type = "Bearer";
|
|
credentials_file = "/run/secrets/victoriametrics/openbao-token";
|
|
};
|
|
}
|
|
# Apiary external service
|
|
{
|
|
job_name = "apiary";
|
|
scheme = "https";
|
|
scrape_interval = "60s";
|
|
static_configs = [{ targets = [ "apiary.t-juice.club" ]; }];
|
|
authorization = {
|
|
type = "Bearer";
|
|
credentials_file = "/run/secrets/victoriametrics-apiary-token";
|
|
};
|
|
}
|
|
# Blackbox TLS certificate monitoring
|
|
{
|
|
job_name = "blackbox_tls";
|
|
metrics_path = "/probe";
|
|
params = {
|
|
module = [ "https_cert" ];
|
|
};
|
|
static_configs = [{ targets = tlsTargets; }];
|
|
relabel_configs = [
|
|
{
|
|
source_labels = [ "__address__" ];
|
|
target_label = "__param_target";
|
|
}
|
|
{
|
|
source_labels = [ "__param_target" ];
|
|
target_label = "instance";
|
|
}
|
|
{
|
|
target_label = "__address__";
|
|
replacement = "127.0.0.1:9115";
|
|
}
|
|
];
|
|
}
|
|
# Sonarr exporter
|
|
{
|
|
job_name = "sonarr";
|
|
static_configs = [{ targets = [ "localhost:9709" ]; }];
|
|
}
|
|
# Proxmox VE exporter
|
|
{
|
|
job_name = "pve";
|
|
static_configs = [{ targets = [ "localhost:9221" ]; }];
|
|
}
|
|
] ++ autoScrapeConfigs;
|
|
in
|
|
{
|
|
# Static user for VictoriaMetrics (overrides DynamicUser) so vault.secrets
|
|
# and credential files can be owned by this user
|
|
users.users.victoriametrics = {
|
|
isSystemUser = true;
|
|
group = "victoriametrics";
|
|
};
|
|
users.groups.victoriametrics = { };
|
|
|
|
# Override DynamicUser since we need a static user for credential file access
|
|
systemd.services.victoriametrics.serviceConfig = {
|
|
DynamicUser = lib.mkForce false;
|
|
User = "victoriametrics";
|
|
Group = "victoriametrics";
|
|
};
|
|
|
|
# Systemd service to fetch AppRole token for OpenBao scraping
|
|
systemd.services.victoriametrics-openbao-token = {
|
|
description = "Fetch OpenBao token for VictoriaMetrics metrics scraping";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
before = [ "victoriametrics.service" ];
|
|
requiredBy = [ "victoriametrics.service" ];
|
|
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
ExecStart = lib.getExe fetchOpenbaoToken;
|
|
};
|
|
};
|
|
|
|
# Timer to periodically refresh the token (AppRole tokens have 1-hour TTL)
|
|
systemd.timers.victoriametrics-openbao-token = {
|
|
description = "Refresh OpenBao token for VictoriaMetrics";
|
|
wantedBy = [ "timers.target" ];
|
|
timerConfig = {
|
|
OnBootSec = "5min";
|
|
OnUnitActiveSec = "30min";
|
|
RandomizedDelaySec = "5min";
|
|
};
|
|
};
|
|
|
|
# Fetch apiary bearer token from Vault
|
|
vault.secrets.victoriametrics-apiary-token = {
|
|
secretPath = "hosts/monitoring02/apiary-token";
|
|
extractKey = "password";
|
|
owner = "victoriametrics";
|
|
group = "victoriametrics";
|
|
services = [ "victoriametrics" ];
|
|
};
|
|
|
|
services.victoriametrics = {
|
|
enable = true;
|
|
retentionPeriod = "3"; # 3 months
|
|
# Disable config check since we reference external credential files
|
|
checkConfig = false;
|
|
prometheusConfig = {
|
|
global.scrape_interval = "15s";
|
|
scrape_configs = scrapeConfigs;
|
|
};
|
|
};
|
|
|
|
# vmalert for alerting rules
|
|
services.vmalert.instances.default = {
|
|
enable = true;
|
|
settings = {
|
|
"datasource.url" = "http://localhost:8428";
|
|
"notifier.url" = [ "http://localhost:9093" ];
|
|
"rule" = [ ../monitoring/rules.yml ];
|
|
};
|
|
};
|
|
|
|
# Caddy reverse proxy for VictoriaMetrics and vmalert
|
|
services.caddy.virtualHosts."metrics.home.2rjus.net".extraConfig = ''
|
|
reverse_proxy http://127.0.0.1:8428
|
|
'';
|
|
services.caddy.virtualHosts."vmalert.home.2rjus.net".extraConfig = ''
|
|
reverse_proxy http://127.0.0.1:8880
|
|
'';
|
|
|
|
# Alertmanager
|
|
services.caddy.virtualHosts."alertmanager.home.2rjus.net".extraConfig = ''
|
|
reverse_proxy http://127.0.0.1:9093
|
|
'';
|
|
|
|
services.prometheus.alertmanager = {
|
|
enable = true;
|
|
configuration = {
|
|
global = { };
|
|
route = {
|
|
receiver = "webhook_natstonotify";
|
|
group_wait = "30s";
|
|
group_interval = "5m";
|
|
repeat_interval = "1h";
|
|
group_by = [ "alertname" ];
|
|
};
|
|
receivers = [
|
|
{
|
|
name = "webhook_natstonotify";
|
|
webhook_configs = [
|
|
{
|
|
url = "http://localhost:5001/alert";
|
|
}
|
|
];
|
|
}
|
|
];
|
|
};
|
|
};
|
|
}
|