diff --git a/services/monitoring/prometheus.nix b/services/monitoring/prometheus.nix index 56d44c6..e0fe2c2 100644 --- a/services/monitoring/prometheus.nix +++ b/services/monitoring/prometheus.nix @@ -1,21 +1,77 @@ -{ self, lib, ... }: +{ self, lib, pkgs, ... }: let monLib = import ../../lib/monitoring.nix { inherit lib; }; externalTargets = import ./external-targets.nix; nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets; autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets; + + # Script to fetch AppRole token for Prometheus to use when scraping OpenBao metrics + fetchOpenbaoToken = pkgs.writeShellScript "fetch-openbao-token" '' + set -euo pipefail + + VAULT_ADDR="https://vault01.home.2rjus.net:8200" + APPROLE_DIR="/var/lib/vault/approle" + OUTPUT_FILE="/run/secrets/prometheus/openbao-token" + + # Read AppRole credentials + if [ ! -f "$APPROLE_DIR/role-id" ] || [ ! -f "$APPROLE_DIR/secret-id" ]; then + echo "AppRole credentials not found at $APPROLE_DIR" >&2 + exit 1 + fi + + ROLE_ID=$(cat "$APPROLE_DIR/role-id") + SECRET_ID=$(cat "$APPROLE_DIR/secret-id") + + # Authenticate to Vault + AUTH_RESPONSE=$(${pkgs.curl}/bin/curl -sf -k -X POST \ + -d "{\"role_id\":\"$ROLE_ID\",\"secret_id\":\"$SECRET_ID\"}" \ + "$VAULT_ADDR/v1/auth/approle/login") + + # Extract token + VAULT_TOKEN=$(echo "$AUTH_RESPONSE" | ${pkgs.jq}/bin/jq -r '.auth.client_token') + if [ -z "$VAULT_TOKEN" ] || [ "$VAULT_TOKEN" = "null" ]; then + echo "Failed to extract Vault token from response" >&2 + exit 1 + fi + + # Write token to file + mkdir -p "$(dirname "$OUTPUT_FILE")" + echo -n "$VAULT_TOKEN" > "$OUTPUT_FILE" + chown prometheus:prometheus "$OUTPUT_FILE" + chmod 0400 "$OUTPUT_FILE" + + echo "Successfully fetched OpenBao token" + ''; in { - # OpenBao token for scraping metrics - vault.secrets.openbao-token = { - secretPath = "hosts/monitoring01/openbao-token"; - extractKey = "token"; - outputDir = "/run/secrets/prometheus/openbao-token"; - mode = "0400"; - owner = "prometheus"; - services = [ "prometheus" ]; + # Systemd service to fetch AppRole token for Prometheus OpenBao scraping + # The token is used to authenticate when scraping /v1/sys/metrics + systemd.services.prometheus-openbao-token = { + description = "Fetch OpenBao token for Prometheus metrics scraping"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + before = [ "prometheus.service" ]; + requiredBy = [ "prometheus.service" ]; + + serviceConfig = { + Type = "oneshot"; + ExecStart = fetchOpenbaoToken; + RemainAfterExit = true; + }; }; + + # Timer to periodically refresh the token (AppRole tokens have 1-hour TTL) + systemd.timers.prometheus-openbao-token = { + description = "Refresh OpenBao token for Prometheus"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnBootSec = "5min"; + OnUnitActiveSec = "30min"; + RandomizedDelaySec = "5min"; + }; + }; + services.prometheus = { enable = true; # syntax-only check because we use external credential files (e.g., openbao-token) diff --git a/terraform/vault/approle.tf b/terraform/vault/approle.tf index b1c31f8..86269e6 100644 --- a/terraform/vault/approle.tf +++ b/terraform/vault/approle.tf @@ -15,6 +15,7 @@ locals { # "secret/data/services/grafana/*", # "secret/data/shared/smtp/*" # ] + # extra_policies = ["some-other-policy"] # Optional: additional policies # } # Example: ha1 host @@ -38,6 +39,7 @@ locals { "secret/data/shared/backup/*", "secret/data/shared/nats/*", ] + extra_policies = ["prometheus-metrics"] } # Wave 1: hosts with no service secrets (only need vault.enable for future use) @@ -109,9 +111,12 @@ EOT resource "vault_approle_auth_backend_role" "hosts" { for_each = local.host_policies - backend = vault_auth_backend.approle.path - role_name = each.key - token_policies = ["${each.key}-policy"] + backend = vault_auth_backend.approle.path + role_name = each.key + token_policies = concat( + ["${each.key}-policy"], + lookup(each.value, "extra_policies", []) + ) # Token configuration token_ttl = 3600 # 1 hour diff --git a/terraform/vault/policies.tf b/terraform/vault/policies.tf index e0f90e4..35c5657 100644 --- a/terraform/vault/policies.tf +++ b/terraform/vault/policies.tf @@ -1,21 +1,10 @@ # Generic policies for services (not host-specific) resource "vault_policy" "prometheus_metrics" { - name = "prometheus-metrics" + name = "prometheus-metrics" policy = <