Files
nixos-servers/services/vault/default.nix
Torjus Håkestad 3cccfc0487
Some checks failed
Run nix flake check / flake-check (push) Failing after 7m36s
monitoring: implement monitoring gaps coverage
Add exporters and scrape targets for services lacking monitoring:
- PostgreSQL: postgres-exporter on pgdb1
- Authelia: native telemetry metrics on auth01
- Unbound: unbound-exporter with remote-control on ns1/ns2
- NATS: HTTP monitoring endpoint on nats1
- OpenBao: telemetry config and Prometheus scrape with token auth
- Systemd: systemd-exporter on all hosts for per-service metrics

Add alert rules for postgres, auth (authelia + lldap), jellyfin,
vault (openbao), plus extend existing nats and unbound rules.

Add Terraform config for Prometheus metrics policy and token. The
token is created via vault_token resource and stored in KV, so no
manual token creation is needed.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 21:44:13 +01:00

212 lines
6.6 KiB
Nix

{ pkgs, ... }:
let
unsealScript = pkgs.writeShellApplication {
name = "openbao-unseal";
runtimeInputs = with pkgs; [
openbao
coreutils
gnugrep
getent
];
text = ''
# Set environment to use Unix socket
export BAO_ADDR='unix:///run/openbao/openbao.sock'
SOCKET_PATH="/run/openbao/openbao.sock"
CREDS_DIR="''${CREDENTIALS_DIRECTORY:-}"
# Wait for socket to exist
echo "Waiting for OpenBao socket..."
for _ in {1..30}; do
if [ -S "$SOCKET_PATH" ]; then
echo "Socket exists"
break
fi
sleep 1
done
# Wait for OpenBao to accept connections
echo "Waiting for OpenBao to be ready..."
for _ in {1..30}; do
output=$(timeout 2 bao status 2>&1 || true)
if echo "$output" | grep -q "Sealed.*false"; then
# Already unsealed
echo "OpenBao is already unsealed"
exit 0
elif echo "$output" | grep -qE "(Sealed|Initialized)"; then
# Got a valid response, OpenBao is ready (sealed)
echo "OpenBao is ready"
break
fi
sleep 1
done
# Check if already unsealed
if output=$(timeout 2 bao status 2>&1 || true); then
if echo "$output" | grep -q "Sealed.*false"; then
echo "OpenBao is already unsealed"
exit 0
fi
fi
# Unseal using the TPM-decrypted keys (one per line)
if [ -n "$CREDS_DIR" ] && [ -f "$CREDS_DIR/unseal-key" ]; then
echo "Unsealing OpenBao..."
while IFS= read -r key; do
# Skip empty lines
[ -z "$key" ] && continue
echo "Applying unseal key..."
bao operator unseal "$key"
# Check if unsealed after each key
if output=$(timeout 2 bao status 2>&1 || true); then
if echo "$output" | grep -q "Sealed.*false"; then
echo "OpenBao unsealed successfully"
exit 0
fi
fi
done < "$CREDS_DIR/unseal-key"
echo "WARNING: Applied all keys but OpenBao is still sealed"
exit 0
else
echo "WARNING: Unseal key credential not found, OpenBao remains sealed"
exit 0
fi
'';
};
bootstrapCertScript = pkgs.writeShellApplication {
name = "bootstrap-vault-cert";
runtimeInputs = with pkgs; [
openbao
jq
openssl
coreutils
];
text = ''
# Bootstrap vault01 with a proper certificate from its own PKI
# This solves the chicken-and-egg problem where ACME clients can't trust
# vault01's self-signed certificate.
echo "=== Bootstrapping vault01 certificate ==="
# Use Unix socket to avoid TLS issues
export BAO_ADDR='unix:///run/openbao/openbao.sock'
# ACME certificate directory
CERT_DIR="/var/lib/acme/vault01.home.2rjus.net"
# Issue certificate for vault01 with vault as SAN
echo "Issuing certificate for vault01.home.2rjus.net (with SAN: vault.home.2rjus.net)..."
OUTPUT=$(bao write -format=json pki_int/issue/homelab \
common_name="vault01.home.2rjus.net" \
alt_names="vault.home.2rjus.net" \
ttl="720h")
# Create ACME directory structure
echo "Creating ACME certificate directory..."
mkdir -p "$CERT_DIR"
# Extract certificate components to temp files
echo "$OUTPUT" | jq -r '.data.certificate' > /tmp/vault01-cert.pem
echo "$OUTPUT" | jq -r '.data.private_key' > /tmp/vault01-key.pem
echo "$OUTPUT" | jq -r '.data.issuing_ca' > /tmp/vault01-ca.pem
# Create fullchain (cert + CA)
cat /tmp/vault01-cert.pem /tmp/vault01-ca.pem > /tmp/vault01-fullchain.pem
# Backup old certificates if they exist
if [ -f "$CERT_DIR/fullchain.pem" ]; then
echo "Backing up old certificate..."
cp "$CERT_DIR/fullchain.pem" "$CERT_DIR/fullchain.pem.backup"
cp "$CERT_DIR/key.pem" "$CERT_DIR/key.pem.backup"
fi
# Install new certificates
echo "Installing new certificate..."
mv /tmp/vault01-fullchain.pem "$CERT_DIR/fullchain.pem"
mv /tmp/vault01-cert.pem "$CERT_DIR/cert.pem"
mv /tmp/vault01-ca.pem "$CERT_DIR/chain.pem"
mv /tmp/vault01-key.pem "$CERT_DIR/key.pem"
# Set proper ownership and permissions (ACME-style)
chown -R acme:acme "$CERT_DIR"
chmod 750 "$CERT_DIR"
chmod 640 "$CERT_DIR"/*.pem
echo "Certificate installed successfully!"
echo ""
echo "Certificate details:"
openssl x509 -in "$CERT_DIR/cert.pem" -noout -subject -issuer -dates
echo ""
echo "Subject Alternative Names:"
openssl x509 -in "$CERT_DIR/cert.pem" -noout -ext subjectAltName
echo ""
echo "Now restart openbao service:"
echo " systemctl restart openbao"
echo ""
echo "After restart, verify ACME endpoint is accessible:"
echo " curl https://vault01.home.2rjus.net:8200/v1/pki_int/acme/directory"
echo ""
echo "Once working, ACME will automatically manage certificate renewals."
'';
};
in
{
# Make bootstrap script available as a command
environment.systemPackages = [ bootstrapCertScript ];
services.openbao = {
enable = true;
settings = {
ui = true;
telemetry = {
prometheus_retention_time = "60s";
disable_hostname = true;
};
storage.file.path = "/var/lib/openbao";
listener.default = {
type = "tcp";
address = "0.0.0.0:8200";
tls_cert_file = "/run/credentials/openbao.service/cert.pem";
tls_key_file = "/run/credentials/openbao.service/key.pem";
};
listener.socket = {
type = "unix";
address = "/run/openbao/openbao.sock";
};
};
};
systemd.services.openbao.serviceConfig = {
LoadCredential = [
"key.pem:/var/lib/acme/vault01.home.2rjus.net/key.pem"
"cert.pem:/var/lib/acme/vault01.home.2rjus.net/fullchain.pem"
];
# TPM2-encrypted unseal key (created manually, see setup instructions)
LoadCredentialEncrypted = [
"unseal-key:/var/lib/openbao/unseal-key.cred"
];
# Auto-unseal on service start
ExecStartPost = "${unsealScript}/bin/openbao-unseal";
# Add openbao user to acme group to read certificates
SupplementaryGroups = [ "acme" ];
};
# ACME certificate management
# Bootstrapped with bootstrap-vault-cert, now managed by ACME
security.acme.certs."vault01.home.2rjus.net" = {
server = "https://vault01.home.2rjus.net:8200/v1/pki_int/acme/directory";
listenHTTP = ":80";
reloadServices = [ "openbao" ];
extraDomainNames = [ "vault.home.2rjus.net" ];
};
}