monitoring: use AppRole token for OpenBao metrics scraping
All checks were successful
Run nix flake check / flake-check (push) Successful in 2m12s
Run nix flake check / flake-check (pull_request) Successful in 2m19s

Instead of creating a long-lived Vault token in Terraform (which gets
invalidated when Terraform recreates it), monitoring01 now uses its
existing AppRole credentials to fetch a fresh token for Prometheus.

Changes:
- Add prometheus-metrics policy to monitoring01's AppRole
- Remove vault_token.prometheus_metrics resource from Terraform
- Remove openbao-token KV secret from Terraform
- Add systemd service to fetch AppRole token on boot
- Add systemd timer to refresh token every 30 minutes

This ensures Prometheus always has a valid token without depending on
Terraform state or manual intervention.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 23:51:11 +01:00
parent 88e9036cb4
commit e9857afc11
4 changed files with 74 additions and 31 deletions

View File

@@ -1,21 +1,77 @@
{ self, lib, ... }: { self, lib, pkgs, ... }:
let let
monLib = import ../../lib/monitoring.nix { inherit lib; }; monLib = import ../../lib/monitoring.nix { inherit lib; };
externalTargets = import ./external-targets.nix; externalTargets = import ./external-targets.nix;
nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets; nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets;
autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets; autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets;
# Script to fetch AppRole token for Prometheus to use when scraping OpenBao metrics
fetchOpenbaoToken = pkgs.writeShellScript "fetch-openbao-token" ''
set -euo pipefail
VAULT_ADDR="https://vault01.home.2rjus.net:8200"
APPROLE_DIR="/var/lib/vault/approle"
OUTPUT_FILE="/run/secrets/prometheus/openbao-token"
# Read AppRole credentials
if [ ! -f "$APPROLE_DIR/role-id" ] || [ ! -f "$APPROLE_DIR/secret-id" ]; then
echo "AppRole credentials not found at $APPROLE_DIR" >&2
exit 1
fi
ROLE_ID=$(cat "$APPROLE_DIR/role-id")
SECRET_ID=$(cat "$APPROLE_DIR/secret-id")
# Authenticate to Vault
AUTH_RESPONSE=$(${pkgs.curl}/bin/curl -sf -k -X POST \
-d "{\"role_id\":\"$ROLE_ID\",\"secret_id\":\"$SECRET_ID\"}" \
"$VAULT_ADDR/v1/auth/approle/login")
# Extract token
VAULT_TOKEN=$(echo "$AUTH_RESPONSE" | ${pkgs.jq}/bin/jq -r '.auth.client_token')
if [ -z "$VAULT_TOKEN" ] || [ "$VAULT_TOKEN" = "null" ]; then
echo "Failed to extract Vault token from response" >&2
exit 1
fi
# Write token to file
mkdir -p "$(dirname "$OUTPUT_FILE")"
echo -n "$VAULT_TOKEN" > "$OUTPUT_FILE"
chown prometheus:prometheus "$OUTPUT_FILE"
chmod 0400 "$OUTPUT_FILE"
echo "Successfully fetched OpenBao token"
'';
in in
{ {
# OpenBao token for scraping metrics # Systemd service to fetch AppRole token for Prometheus OpenBao scraping
vault.secrets.openbao-token = { # The token is used to authenticate when scraping /v1/sys/metrics
secretPath = "hosts/monitoring01/openbao-token"; systemd.services.prometheus-openbao-token = {
extractKey = "token"; description = "Fetch OpenBao token for Prometheus metrics scraping";
outputDir = "/run/secrets/prometheus/openbao-token"; after = [ "network-online.target" ];
mode = "0400"; wants = [ "network-online.target" ];
owner = "prometheus"; before = [ "prometheus.service" ];
services = [ "prometheus" ]; requiredBy = [ "prometheus.service" ];
serviceConfig = {
Type = "oneshot";
ExecStart = fetchOpenbaoToken;
RemainAfterExit = true;
}; };
};
# Timer to periodically refresh the token (AppRole tokens have 1-hour TTL)
systemd.timers.prometheus-openbao-token = {
description = "Refresh OpenBao token for Prometheus";
wantedBy = [ "timers.target" ];
timerConfig = {
OnBootSec = "5min";
OnUnitActiveSec = "30min";
RandomizedDelaySec = "5min";
};
};
services.prometheus = { services.prometheus = {
enable = true; enable = true;
# syntax-only check because we use external credential files (e.g., openbao-token) # syntax-only check because we use external credential files (e.g., openbao-token)

View File

@@ -15,6 +15,7 @@ locals {
# "secret/data/services/grafana/*", # "secret/data/services/grafana/*",
# "secret/data/shared/smtp/*" # "secret/data/shared/smtp/*"
# ] # ]
# extra_policies = ["some-other-policy"] # Optional: additional policies
# } # }
# Example: ha1 host # Example: ha1 host
@@ -38,6 +39,7 @@ locals {
"secret/data/shared/backup/*", "secret/data/shared/backup/*",
"secret/data/shared/nats/*", "secret/data/shared/nats/*",
] ]
extra_policies = ["prometheus-metrics"]
} }
# Wave 1: hosts with no service secrets (only need vault.enable for future use) # Wave 1: hosts with no service secrets (only need vault.enable for future use)
@@ -111,7 +113,10 @@ resource "vault_approle_auth_backend_role" "hosts" {
backend = vault_auth_backend.approle.path backend = vault_auth_backend.approle.path
role_name = each.key role_name = each.key
token_policies = ["${each.key}-policy"] token_policies = concat(
["${each.key}-policy"],
lookup(each.value, "extra_policies", [])
)
# Token configuration # Token configuration
token_ttl = 3600 # 1 hour token_ttl = 3600 # 1 hour

View File

@@ -8,14 +8,3 @@ path "sys/metrics" {
} }
EOT EOT
} }
# Long-lived token for Prometheus to scrape OpenBao metrics
resource "vault_token" "prometheus_metrics" {
policies = [vault_policy.prometheus_metrics.name]
ttl = "8760h" # 1 year
renewable = true
metadata = {
purpose = "prometheus-metrics-scraping"
}
}

View File

@@ -92,13 +92,6 @@ locals {
auto_generate = false auto_generate = false
data = { token = var.actions_token_1 } data = { token = var.actions_token_1 }
} }
# Prometheus OpenBao token for scraping metrics
# Token is created by vault_token.prometheus_metrics in policies.tf
"hosts/monitoring01/openbao-token" = {
auto_generate = false
data = { token = vault_token.prometheus_metrics.client_token }
}
} }
} }