diff --git a/.gitignore b/.gitignore index 8068363..fa65636 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,12 @@ terraform/terraform.tfvars terraform/*.auto.tfvars terraform/crash.log terraform/crash.*.log + +terraform/vault/.terraform/ +terraform/vault/.terraform.lock.hcl +terraform/vault/*.tfstate +terraform/vault/*.tfstate.* +terraform/vault/terraform.tfvars +terraform/vault/*.auto.tfvars +terraform/vault/crash.log +terraform/vault/crash.*.log diff --git a/TODO.md b/TODO.md index 46f314f..553ed40 100644 --- a/TODO.md +++ b/TODO.md @@ -153,7 +153,9 @@ create-host \ --- -### Phase 4: Secrets Management with HashiCorp Vault +### Phase 4: Secrets Management with OpenBao (Vault) + +**Status:** 🚧 Phases 4a & 4b Complete, 4c & 4d In Progress **Challenge:** Current sops-nix approach has chicken-and-egg problem with age keys @@ -164,161 +166,225 @@ create-host \ 4. User commits, pushes 5. VM can now decrypt secrets -**Selected approach:** Migrate to HashiCorp Vault for centralized secrets management +**Selected approach:** Migrate to OpenBao (Vault fork) for centralized secrets management + +**Why OpenBao instead of HashiCorp Vault:** +- HashiCorp Vault switched to BSL (Business Source License), unavailable in NixOS cache +- OpenBao is the community fork maintaining the pre-BSL MPL 2.0 license +- API-compatible with Vault, uses same Terraform provider +- Maintains all Vault features we need **Benefits:** -- Industry-standard secrets management (Vault experience transferable to work) +- Industry-standard secrets management (Vault-compatible experience) - Eliminates manual age key distribution step - Secrets-as-code via OpenTofu (infrastructure-as-code aligned) -- Centralized PKI management (replaces step-ca, consolidates TLS + SSH CA) +- Centralized PKI management with ACME support (ready to replace step-ca) - Automatic secret rotation capabilities -- Audit logging for all secret access +- Audit logging for all secret access (not yet enabled) - AppRole authentication enables automated bootstrap -**Architecture:** +**Current Architecture:** ``` -vault.home.2rjus.net - ├─ KV Secrets Engine (replaces sops-nix) - ├─ PKI Engine (replaces step-ca for TLS) - ├─ SSH CA Engine (replaces step-ca SSH CA) - └─ AppRole Auth (per-host authentication) +vault.home.2rjus.net (10.69.13.19) + ├─ KV Secrets Engine (ready to replace sops-nix) + │ ├─ secret/hosts/{hostname}/* + │ ├─ secret/services/{service}/* + │ └─ secret/shared/{category}/* + ├─ PKI Engine (ready to replace step-ca for TLS) + │ ├─ Root CA (EC P-384, 10 year) + │ ├─ Intermediate CA (EC P-384, 5 year) + │ └─ ACME endpoint enabled + ├─ SSH CA Engine (TODO: Phase 4c) + └─ AppRole Auth (per-host authentication configured) ↓ - New hosts authenticate on first boot - Fetch secrets via Vault API + [Phase 4d] New hosts authenticate on first boot + [Phase 4d] Fetch secrets via Vault API No manual key distribution needed ``` +**Completed:** +- ✅ Phase 4a: OpenBao server with TPM2 auto-unseal +- ✅ Phase 4b: Infrastructure-as-code (secrets, policies, AppRoles, PKI) + +**Next Steps:** +- Phase 4c: Migrate from step-ca to OpenBao PKI +- Phase 4d: Bootstrap integration for automated secrets access + --- -#### Phase 4a: Vault Server Setup +#### Phase 4a: Vault Server Setup ✅ COMPLETED + +**Status:** ✅ Fully implemented and tested +**Completed:** 2026-02-02 **Goal:** Deploy and configure Vault server with auto-unseal -**Tasks:** -- [ ] Create `hosts/vault01/` configuration - - [ ] Basic NixOS configuration (hostname, networking, etc.) - - [ ] Vault service configuration - - [ ] Firewall rules (8200 for API, 8201 for cluster) - - [ ] Add to flake.nix and terraform -- [ ] Implement auto-unseal mechanism - - [ ] **Preferred:** TPM-based auto-unseal if hardware supports it - - [ ] Use tpm2-tools to seal/unseal Vault keys - - [ ] Systemd service to unseal on boot - - [ ] **Fallback:** Shamir secret sharing with systemd automation - - [ ] Generate 3 keys, threshold 2 - - [ ] Store 2 keys on disk (encrypted), keep 1 offline - - [ ] Systemd service auto-unseals using 2 keys -- [ ] Initial Vault setup - - [ ] Initialize Vault - - [ ] Configure storage backend (integrated raft or file) - - [ ] Set up root token management - - [ ] Enable audit logging -- [ ] Deploy to infrastructure - - [ ] Add DNS entry for vault.home.2rjus.net - - [ ] Deploy VM via terraform - - [ ] Bootstrap and verify Vault is running +**Implementation:** +- Used **OpenBao** (Vault fork) instead of HashiCorp Vault due to BSL licensing concerns +- TPM2-based auto-unseal using systemd's native `LoadCredentialEncrypted` +- Self-signed bootstrap TLS certificates (avoiding circular dependency with step-ca) +- File-based storage backend at `/var/lib/openbao` +- Unix socket + TCP listener (0.0.0.0:8200) configuration -**Deliverable:** Running Vault server that auto-unseals on boot +**Tasks:** +- [x] Create `hosts/vault01/` configuration + - [x] Basic NixOS configuration (hostname: vault01, IP: 10.69.13.19/24) + - [x] Created reusable `services/vault` module + - [x] Firewall not needed (trusted network) + - [x] Already in flake.nix, deployed via terraform +- [x] Implement auto-unseal mechanism + - [x] **TPM2-based auto-unseal** (preferred option) + - [x] systemd `LoadCredentialEncrypted` with TPM2 binding + - [x] `writeShellApplication` script with proper runtime dependencies + - [x] Reads multiple unseal keys (one per line) until unsealed + - [x] Auto-unseals on service start via `ExecStartPost` +- [x] Initial Vault setup + - [x] Initialized OpenBao with Shamir secret sharing (5 keys, threshold 3) + - [x] File storage backend + - [x] Self-signed TLS certificates via LoadCredential +- [x] Deploy to infrastructure + - [x] DNS entry added for vault.home.2rjus.net + - [x] VM deployed via terraform + - [x] Verified OpenBao running and auto-unsealing + +**Changes from Original Plan:** +- Used OpenBao instead of HashiCorp Vault (licensing) +- Used systemd's native TPM2 support instead of tpm2-tools directly +- Skipped audit logging (can be enabled later) +- Used self-signed certs initially (will migrate to OpenBao PKI later) + +**Deliverable:** ✅ Running OpenBao server that auto-unseals on boot using TPM2 + +**Documentation:** +- `/services/vault/README.md` - Service module overview +- `/docs/vault/auto-unseal.md` - Complete TPM2 auto-unseal setup guide --- -#### Phase 4b: Vault-as-Code with OpenTofu +#### Phase 4b: Vault-as-Code with OpenTofu ✅ COMPLETED + +**Status:** ✅ Fully implemented and tested +**Completed:** 2026-02-02 **Goal:** Manage all Vault configuration (secrets structure, policies, roles) as code +**Implementation:** +- Complete Terraform/OpenTofu configuration in `terraform/vault/` +- Locals-based pattern (similar to `vms.tf`) for declaring secrets and policies +- Auto-generation of secrets using `random_password` provider +- Three-tier secrets path hierarchy: `hosts/`, `services/`, `shared/` +- PKI infrastructure with **Elliptic Curve certificates** (P-384 for CAs, P-256 for leaf certs) +- ACME support enabled on intermediate CA + **Tasks:** -- [ ] Set up Vault Terraform provider - - [ ] Create `terraform/vault/` directory - - [ ] Configure Vault provider (address, auth) - - [ ] Store Vault token securely (terraform.tfvars, gitignored) -- [ ] Enable and configure secrets engines - - [ ] Enable KV v2 secrets engine at `secret/` - - [ ] Define secret path structure (per-service, per-host) - - [ ] Example: `secret/monitoring/grafana`, `secret/postgres/ha1` -- [ ] Define policies as code - - [ ] Create policies for different service tiers - - [ ] Principle of least privilege (hosts only read their secrets) - - [ ] Example: monitoring-policy allows read on `secret/monitoring/*` -- [ ] Set up AppRole authentication - - [ ] Enable AppRole auth backend - - [ ] Create role per host type (monitoring, dns, database, etc.) - - [ ] Bind policies to roles - - [ ] Configure TTL and token policies -- [ ] Migrate existing secrets from sops-nix - - [ ] Create migration script/playbook - - [ ] Decrypt sops secrets and load into Vault KV - - [ ] Verify all secrets migrated successfully - - [ ] Keep sops as backup during transition -- [ ] Implement secrets-as-code patterns - - [ ] Secret values in gitignored terraform.tfvars - - [ ] Or use random_password for auto-generated secrets - - [ ] Secret structure/paths in version-controlled .tf files +- [x] Set up Vault Terraform provider + - [x] Created `terraform/vault/` directory + - [x] Configured Vault provider (uses HashiCorp provider, compatible with OpenBao) + - [x] Credentials in terraform.tfvars (gitignored) + - [x] terraform.tfvars.example for reference +- [x] Enable and configure secrets engines + - [x] KV v2 engine at `secret/` + - [x] Three-tier path structure: + - `secret/hosts/{hostname}/*` - Host-specific secrets + - `secret/services/{service}/*` - Service-wide secrets + - `secret/shared/{category}/*` - Shared secrets (SMTP, backups, etc.) +- [x] Define policies as code + - [x] Policies auto-generated from `locals.host_policies` + - [x] Per-host policies with read/list on designated paths + - [x] Principle of least privilege enforced +- [x] Set up AppRole authentication + - [x] AppRole backend enabled at `approle/` + - [x] Roles auto-generated per host from `locals.host_policies` + - [x] Token TTL: 1 hour, max 24 hours + - [x] Policies bound to roles +- [x] Implement secrets-as-code patterns + - [x] Auto-generated secrets using `random_password` provider + - [x] Manual secrets supported via variables in terraform.tfvars + - [x] Secret structure versioned in .tf files + - [x] Secret values excluded from git +- [x] Set up PKI infrastructure + - [x] Root CA (10 year TTL, EC P-384) + - [x] Intermediate CA (5 year TTL, EC P-384) + - [x] PKI role for `*.home.2rjus.net` (30 day max TTL, EC P-256) + - [x] ACME enabled on intermediate CA + - [x] Support for static certificate issuance via Terraform + - [x] CRL, OCSP, and issuing certificate URLs configured -**Example OpenTofu:** -```hcl -resource "vault_kv_secret_v2" "monitoring_grafana" { - mount = "secret" - name = "monitoring/grafana" - data_json = jsonencode({ - admin_password = var.grafana_admin_password - smtp_password = var.smtp_password - }) -} +**Changes from Original Plan:** +- Used Elliptic Curve instead of RSA for all certificates (better performance, smaller keys) +- Implemented PKI infrastructure in Phase 4b instead of Phase 4c (more logical grouping) +- ACME support configured immediately (ready for migration from step-ca) +- Did not migrate existing sops-nix secrets yet (deferred to gradual migration) -resource "vault_policy" "monitoring" { - name = "monitoring-policy" - policy = < homelab-root-ca.crt` + - [ ] Add to NixOS trust store on all hosts via `security.pki.certificateFiles` + - [ ] Deploy via auto-upgrade +- [ ] Test certificate issuance + - [ ] Issue test certificate using ACME client (lego/certbot) + - [ ] Or issue static certificate via OpenBao CLI + - [ ] Verify certificate chain and trust +- [ ] Migrate vault01's own certificate + - [ ] Issue new certificate from OpenBao PKI (self-issued) + - [ ] Replace self-signed bootstrap certificate + - [ ] Update service configuration +- [ ] Migrate hosts from step-ca to OpenBao + - [ ] Update `system/acme.nix` to use OpenBao ACME endpoint + - [ ] Change server to `https://vault.home.2rjus.net:8200/v1/pki_int/acme/directory` + - [ ] Test on one host (non-critical service) + - [ ] Roll out to all hosts via auto-upgrade +- [ ] Configure SSH CA in OpenBao (optional, future work) - [ ] Enable SSH secrets engine (`ssh/` mount) - [ ] Generate SSH signing keys - [ ] Create roles for host and user certificates - [ ] Configure TTLs and allowed principals -- [ ] Migrate hosts from step-ca to Vault - - [ ] Update system/acme.nix to use Vault ACME endpoint - - [ ] Change server to `https://vault.home.2rjus.net:8200/v1/pki_int/acme/directory` - - [ ] Test certificate issuance on one host - - [ ] Roll out to all hosts via auto-upgrade -- [ ] Migrate SSH CA trust - - [ ] Distribute Vault SSH CA public key to all hosts - - [ ] Update sshd_config to trust Vault CA - - [ ] Test SSH certificate authentication + - [ ] Distribute SSH CA public key to all hosts + - [ ] Update sshd_config to trust OpenBao CA - [ ] Decommission step-ca - - [ ] Verify all services migrated + - [ ] Verify all ACME services migrated and working - [ ] Stop step-ca service on ca host - [ ] Archive step-ca configuration for backup + - [ ] Update documentation -**Deliverable:** All TLS and SSH certificates issued by Vault, step-ca retired +**Deliverable:** All TLS certificates issued by OpenBao PKI, step-ca retired --- diff --git a/docs/vault/auto-unseal.md b/docs/vault/auto-unseal.md new file mode 100644 index 0000000..eeed239 --- /dev/null +++ b/docs/vault/auto-unseal.md @@ -0,0 +1,178 @@ +# OpenBao TPM2 Auto-Unseal Setup + +This document describes the one-time setup process for enabling TPM2-based auto-unsealing on vault01. + +## Overview + +The auto-unseal feature uses systemd's `LoadCredentialEncrypted` with TPM2 to securely store and retrieve an unseal key. On service start, systemd automatically decrypts the credential using the VM's TPM, and the service unseals OpenBao. + +## Prerequisites + +- OpenBao must be initialized (`bao operator init` completed) +- You must have at least one unseal key from the initialization +- vault01 must have a TPM2 device (virtual TPM for Proxmox VMs) + +## Initial Setup + +Perform these steps on vault01 after deploying the service configuration: + +### 1. Save Unseal Key + +```bash +# Create temporary file with one of your unseal keys +echo "paste-your-unseal-key-here" > /tmp/unseal-key.txt +``` + +### 2. Encrypt with TPM2 + +```bash +# Encrypt the key using TPM2 binding +systemd-creds encrypt \ + --with-key=tpm2 \ + --name=unseal-key \ + /tmp/unseal-key.txt \ + /var/lib/openbao/unseal-key.cred + +# Set proper ownership and permissions +chown openbao:openbao /var/lib/openbao/unseal-key.cred +chmod 600 /var/lib/openbao/unseal-key.cred +``` + +### 3. Cleanup + +```bash +# Securely delete the plaintext key +shred -u /tmp/unseal-key.txt +``` + +### 4. Test Auto-Unseal + +```bash +# Restart the service - it should auto-unseal +systemctl restart openbao + +# Verify it's unsealed +bao status +# Should show: Sealed = false +``` + +## TPM PCR Binding + +The default `--with-key=tpm2` binds the credential to PCR 7 (Secure Boot state). For stricter binding that includes firmware and boot state: + +```bash +systemd-creds encrypt \ + --with-key=tpm2 \ + --tpm2-pcrs=0+7+14 \ + --name=unseal-key \ + /tmp/unseal-key.txt \ + /var/lib/openbao/unseal-key.cred +``` + +PCR meanings: +- **PCR 0**: BIOS/UEFI firmware measurements +- **PCR 7**: Secure Boot state (UEFI variables) +- **PCR 14**: MOK (Machine Owner Key) state + +**Trade-off**: Stricter PCR binding improves security but may require re-encrypting the credential after firmware updates or kernel changes. + +## Re-provisioning + +If you need to reprovision vault01 from scratch: + +1. **Before destroying**: Back up your root token and all unseal keys (stored securely offline) +2. **After recreating the VM**: + - Initialize OpenBao: `bao operator init` + - Follow the setup steps above to encrypt a new unseal key with TPM2 +3. **Restore data** (if migrating): Copy `/var/lib/openbao` from backup + +## Handling System Changes + +**After firmware updates, kernel updates, or boot configuration changes**, PCR values may change, causing TPM decryption to fail. + +### Symptoms +- Service fails to start +- Logs show: `Failed to decrypt credentials` +- OpenBao remains sealed after reboot + +### Fix +1. Unseal manually with one of your offline unseal keys: + ```bash + bao operator unseal + ``` + +2. Re-encrypt the credential with updated PCR values: + ```bash + echo "your-unseal-key" > /tmp/unseal-key.txt + systemd-creds encrypt \ + --with-key=tpm2 \ + --name=unseal-key \ + /tmp/unseal-key.txt \ + /var/lib/openbao/unseal-key.cred + chown openbao:openbao /var/lib/openbao/unseal-key.cred + chmod 600 /var/lib/openbao/unseal-key.cred + shred -u /tmp/unseal-key.txt + ``` + +3. Restart the service: + ```bash + systemctl restart openbao + ``` + +## Security Considerations + +### What This Protects Against +- **Data at rest**: Vault data is encrypted and cannot be accessed without unsealing +- **VM snapshot theft**: An attacker with a VM snapshot cannot decrypt the unseal key without the TPM state +- **TPM binding**: The key can only be decrypted by the same VM with matching PCR values + +### What This Does NOT Protect Against +- **Compromised host**: If an attacker gains root access to vault01 while running, they can access unsealed data +- **Boot-time attacks**: If an attacker can modify the boot process to match PCR values, they may retrieve the key +- **VM console access**: An attacker with VM console access during boot could potentially access the unsealed vault + +### Recommendations +- **Keep offline backups** of root token and all unseal keys in a secure location (password manager, encrypted USB, etc.) +- **Use Shamir secret sharing**: The default 5-key threshold means even if the TPM key is compromised, an attacker needs the other keys +- **Monitor access**: Use OpenBao's audit logging to detect unauthorized access +- **Consider stricter PCR binding** (PCR 0+7+14) for production, accepting the maintenance overhead + +## Troubleshooting + +### Check if credential exists +```bash +ls -la /var/lib/openbao/unseal-key.cred +``` + +### Test credential decryption manually +```bash +# Should output your unseal key if TPM decryption works +systemd-creds decrypt /var/lib/openbao/unseal-key.cred - +``` + +### View service logs +```bash +journalctl -u openbao -n 50 +``` + +### Manual unseal +```bash +bao operator unseal +# Enter one of your offline unseal keys when prompted +``` + +### Check TPM status +```bash +# Check if TPM2 is available +ls /dev/tpm* + +# View TPM PCR values +tpm2_pcrread +``` + +## References + +- [systemd.exec - Credentials](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Credentials) +- [systemd-creds man page](https://www.freedesktop.org/software/systemd/man/systemd-creds.html) +- [TPM2 PCR Documentation](https://uapi-group.org/specifications/specs/linux_tpm_pcr_registry/) +- [OpenBao Documentation](https://openbao.org/docs/) diff --git a/flake.nix b/flake.nix index 4bb29e9..26523b3 100644 --- a/flake.nix +++ b/flake.nix @@ -334,38 +334,38 @@ sops-nix.nixosModules.sops ]; }; - testvm01 = nixpkgs.lib.nixosSystem { - inherit system; - specialArgs = { - inherit inputs self sops-nix; + testvm01 = nixpkgs.lib.nixosSystem { + inherit system; + specialArgs = { + inherit inputs self sops-nix; + }; + modules = [ + ( + { config, pkgs, ... }: + { + nixpkgs.overlays = commonOverlays; + } + ) + ./hosts/testvm01 + sops-nix.nixosModules.sops + ]; }; - modules = [ - ( - { config, pkgs, ... }: - { - nixpkgs.overlays = commonOverlays; - } - ) - ./hosts/testvm01 - sops-nix.nixosModules.sops - ]; - }; - vault01 = nixpkgs.lib.nixosSystem { - inherit system; - specialArgs = { - inherit inputs self sops-nix; + vault01 = nixpkgs.lib.nixosSystem { + inherit system; + specialArgs = { + inherit inputs self sops-nix; + }; + modules = [ + ( + { config, pkgs, ... }: + { + nixpkgs.overlays = commonOverlays; + } + ) + ./hosts/vault01 + sops-nix.nixosModules.sops + ]; }; - modules = [ - ( - { config, pkgs, ... }: - { - nixpkgs.overlays = commonOverlays; - } - ) - ./hosts/vault01 - sops-nix.nixosModules.sops - ]; - }; }; packages = forAllSystems ( { pkgs }: @@ -380,6 +380,7 @@ packages = with pkgs; [ ansible opentofu + openbao (pkgs.callPackage ./scripts/create-host { }) ]; }; diff --git a/services/vault/README.md b/services/vault/README.md new file mode 100644 index 0000000..e1b7d93 --- /dev/null +++ b/services/vault/README.md @@ -0,0 +1,38 @@ +# OpenBao Service Module + +NixOS service module for OpenBao (open-source Vault fork) with TPM2-based auto-unsealing. + +## Features + +- TLS-enabled TCP listener on `0.0.0.0:8200` +- Unix socket listener at `/run/openbao/openbao.sock` +- File-based storage at `/var/lib/openbao` +- TPM2 auto-unseal on service start + +## Configuration + +The module expects: +- TLS certificate: `/var/lib/openbao/cert.pem` +- TLS private key: `/var/lib/openbao/key.pem` +- TPM2-encrypted unseal key: `/var/lib/openbao/unseal-key.cred` + +Certificates are loaded via systemd `LoadCredential`, and the unseal key via `LoadCredentialEncrypted`. + +## Setup + +For initial setup and configuration instructions, see: +- **Auto-unseal setup**: `/docs/vault/auto-unseal.md` +- **Terraform configuration**: `/terraform/vault/README.md` + +## Usage + +```bash +# Check seal status +bao status + +# Manually seal (for maintenance) +bao operator seal + +# Service will auto-unseal on restart +systemctl restart openbao +``` diff --git a/services/vault/default.nix b/services/vault/default.nix index 41b2ab6..bb30d60 100644 --- a/services/vault/default.nix +++ b/services/vault/default.nix @@ -1,8 +1,114 @@ -{ ... }: +{ pkgs, ... }: +let + unsealScript = pkgs.writeShellApplication { + name = "openbao-unseal"; + runtimeInputs = with pkgs; [ + openbao + coreutils + gnugrep + getent + ]; + text = '' + # Set environment to use Unix socket + export BAO_ADDR='unix:///run/openbao/openbao.sock' + SOCKET_PATH="/run/openbao/openbao.sock" + CREDS_DIR="''${CREDENTIALS_DIRECTORY:-}" + + # Wait for socket to exist + echo "Waiting for OpenBao socket..." + for _ in {1..30}; do + if [ -S "$SOCKET_PATH" ]; then + echo "Socket exists" + break + fi + sleep 1 + done + + # Wait for OpenBao to accept connections + echo "Waiting for OpenBao to be ready..." + for _ in {1..30}; do + output=$(timeout 2 bao status 2>&1 || true) + + if echo "$output" | grep -q "Sealed.*false"; then + # Already unsealed + echo "OpenBao is already unsealed" + exit 0 + elif echo "$output" | grep -qE "(Sealed|Initialized)"; then + # Got a valid response, OpenBao is ready (sealed) + echo "OpenBao is ready" + break + fi + + sleep 1 + done + + # Check if already unsealed + if output=$(timeout 2 bao status 2>&1 || true); then + if echo "$output" | grep -q "Sealed.*false"; then + echo "OpenBao is already unsealed" + exit 0 + fi + fi + + # Unseal using the TPM-decrypted keys (one per line) + if [ -n "$CREDS_DIR" ] && [ -f "$CREDS_DIR/unseal-key" ]; then + echo "Unsealing OpenBao..." + while IFS= read -r key; do + # Skip empty lines + [ -z "$key" ] && continue + + echo "Applying unseal key..." + bao operator unseal "$key" + + # Check if unsealed after each key + if output=$(timeout 2 bao status 2>&1 || true); then + if echo "$output" | grep -q "Sealed.*false"; then + echo "OpenBao unsealed successfully" + exit 0 + fi + fi + done < "$CREDS_DIR/unseal-key" + + echo "WARNING: Applied all keys but OpenBao is still sealed" + exit 0 + else + echo "WARNING: Unseal key credential not found, OpenBao remains sealed" + exit 0 + fi + ''; + }; +in { - services.vault = { + services.openbao = { enable = true; - storageBackend = "file"; + settings = { + ui = true; + + storage.file.path = "/var/lib/openbao"; + listener.default = { + type = "tcp"; + address = "0.0.0.0:8200"; + tls_cert_file = "/run/credentials/openbao.service/cert.pem"; + tls_key_file = "/run/credentials/openbao.service/key.pem"; + }; + listener.socket = { + type = "unix"; + address = "/run/openbao/openbao.sock"; + }; + }; + }; + + systemd.services.openbao.serviceConfig = { + LoadCredential = [ + "key.pem:/var/lib/openbao/key.pem" + "cert.pem:/var/lib/openbao/cert.pem" + ]; + # TPM2-encrypted unseal key (created manually, see setup instructions) + LoadCredentialEncrypted = [ + "unseal-key:/var/lib/openbao/unseal-key.cred" + ]; + # Auto-unseal on service start + ExecStartPost = "${unsealScript}/bin/openbao-unseal"; }; } diff --git a/terraform/cloud-init.tf b/terraform/cloud-init.tf index 5ca7fe7..2155f72 100644 --- a/terraform/cloud-init.tf +++ b/terraform/cloud-init.tf @@ -8,7 +8,7 @@ resource "proxmox_cloud_init_disk" "ci" { name = each.key pve_node = each.value.target_node - storage = "local" # Cloud-init disks must be on storage that supports ISO/snippets + storage = "local" # Cloud-init disks must be on storage that supports ISO/snippets # User data includes SSH keys and optionally NIXOS_FLAKE_BRANCH user_data = <<-EOT @@ -25,34 +25,34 @@ resource "proxmox_cloud_init_disk" "ci" { : ""} EOT - # Network configuration - static IP or DHCP - network_config = each.value.ip != null ? yamlencode({ - version = 1 - config = [{ - type = "physical" - name = "ens18" - subnets = [{ - type = "static" - address = each.value.ip - gateway = each.value.gateway - dns_nameservers = split(" ", each.value.nameservers) - dns_search = [each.value.search_domain] - }] +# Network configuration - static IP or DHCP +network_config = each.value.ip != null ? yamlencode({ + version = 1 + config = [{ + type = "physical" + name = "ens18" + subnets = [{ + type = "static" + address = each.value.ip + gateway = each.value.gateway + dns_nameservers = split(" ", each.value.nameservers) + dns_search = [each.value.search_domain] }] - }) : yamlencode({ - version = 1 - config = [{ - type = "physical" - name = "ens18" - subnets = [{ - type = "dhcp" - }] + }] + }) : yamlencode({ + version = 1 + config = [{ + type = "physical" + name = "ens18" + subnets = [{ + type = "dhcp" }] - }) + }] +}) - # Instance metadata - meta_data = yamlencode({ - instance_id = sha1(each.key) - local-hostname = each.key - }) +# Instance metadata +meta_data = yamlencode({ + instance_id = sha1(each.key) + local-hostname = each.key +}) } diff --git a/terraform/vault/.terraform.lock.hcl b/terraform/vault/.terraform.lock.hcl new file mode 100644 index 0000000..dc102ca --- /dev/null +++ b/terraform/vault/.terraform.lock.hcl @@ -0,0 +1,37 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/random" { + version = "3.8.1" + constraints = "~> 3.6" + hashes = [ + "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=", + "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8", + "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476", + "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7", + "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3", + "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0", + "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1", + "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f", + "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9", + "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff", + ] +} + +provider "registry.opentofu.org/hashicorp/vault" { + version = "4.8.0" + constraints = "~> 4.0" + hashes = [ + "h1:SQkjClJDo6SETUnq912GO8BdEExhU1ko8IG2mr4X/2A=", + "zh:0c07ef884c03083b08a54c2cf782f3ff7e124b05e7a4438a0b90a86e60c8d080", + "zh:13dcf2ed494c79e893b447249716d96b665616a868ffaf8f2c5abef07c7eee6f", + "zh:6f15a29fae3a6178e5904e3c95ba22b20f362d8ee491da816048c89f30e6b2de", + "zh:94b92a4bf7a2d250d9698a021f1ab60d1957d01b5bab81f7d9c00c2d6a9b3747", + "zh:a9e207540ef12cd2402e37b3b7567e08de14061a0a2635fd2f4fd09e0a3382aa", + "zh:b41667938ba541e8492036415b3f51fbd1758e456f6d5f0b63e26f4ad5728b21", + "zh:df0b73aff5f4b51e08fc0c273db7f677994db29a81deda66d91acfcfe3f1a370", + "zh:df904b217dc79b71a8b5f5f3ab2e52316d0f890810383721349cc10a72f7265b", + "zh:f0e0b3e6782e0126c40f05cf87ec80978c7291d90f52d7741300b5de1d9c01ba", + "zh:f8e599718b0ea22658eaa3e590671d3873aa723e7ce7d00daf3460ab41d3af14", + ] +} diff --git a/terraform/vault/README.md b/terraform/vault/README.md new file mode 100644 index 0000000..eb4d3b7 --- /dev/null +++ b/terraform/vault/README.md @@ -0,0 +1,280 @@ +# OpenBao Terraform Configuration + +This directory contains Terraform/OpenTofu configuration for managing OpenBao (Vault) infrastructure as code. + +## Overview + +Manages the following OpenBao resources: +- **AppRole Authentication**: For host-based authentication +- **PKI Infrastructure**: Root CA + Intermediate CA for TLS certificates +- **KV Secrets Engine**: Key-value secret storage (v2) +- **Policies**: Access control policies + +## Setup + +1. **Copy the example tfvars file:** + ```bash + cp terraform.tfvars.example terraform.tfvars + ``` + +2. **Edit `terraform.tfvars` with your OpenBao credentials:** + ```hcl + vault_address = "https://vault.home.2rjus.net:8200" + vault_token = "hvs.your-root-token-here" + vault_skip_tls_verify = true + ``` + +3. **Initialize Terraform:** + ```bash + tofu init + ``` + +4. **Review the plan:** + ```bash + tofu plan + ``` + +5. **Apply the configuration:** + ```bash + tofu apply + ``` + +## Files + +- `main.tf` - Provider configuration +- `variables.tf` - Variable definitions +- `approle.tf` - AppRole authentication backend and roles +- `pki.tf` - PKI engines (root CA and intermediate CA) +- `secrets.tf` - KV secrets engine and test secrets +- `terraform.tfvars` - Credentials (gitignored) +- `terraform.tfvars.example` - Example configuration + +## Resources Created + +### AppRole Authentication +- AppRole backend at `approle/` +- Host-based roles and policies (defined in `locals.host_policies`) + +### PKI Infrastructure +- Root CA at `pki/` (10 year TTL) +- Intermediate CA at `pki_int/` (5 year TTL) +- Role `homelab` for issuing certificates to `*.home.2rjus.net` +- Certificate max TTL: 30 days + +### Secrets +- KV v2 engine at `secret/` +- Secrets and policies defined in `locals.secrets` and `locals.host_policies` + +## Usage Examples + +### Adding a New Host + +1. **Define the host policy in `approle.tf`:** +```hcl +locals { + host_policies = { + "monitoring01" = { + paths = [ + "secret/data/hosts/monitoring01/*", + "secret/data/services/prometheus/*", + ] + } + } +} +``` + +2. **Add secrets in `secrets.tf`:** +```hcl +locals { + secrets = { + "hosts/monitoring01/grafana-admin" = { + auto_generate = true + password_length = 32 + } + } +} +``` + +3. **Apply changes:** +```bash +tofu apply +``` + +4. **Get AppRole credentials:** +```bash +# Get role_id +bao read auth/approle/role/monitoring01/role-id + +# Generate secret_id +bao write -f auth/approle/role/monitoring01/secret-id +``` + +### Issue Certificates from PKI + +**Method 1: ACME (Recommended for automated services)** + +First, enable ACME support: +```bash +bao write pki_int/config/acme enabled=true +``` + +ACME directory endpoint: +``` +https://vault.home.2rjus.net:8200/v1/pki_int/acme/directory +``` + +Use with ACME clients (lego, certbot, cert-manager, etc.): +```bash +# Example with lego +lego --email admin@home.2rjus.net \ + --dns manual \ + --server https://vault.home.2rjus.net:8200/v1/pki_int/acme/directory \ + --accept-tos \ + run -d test.home.2rjus.net +``` + +**Method 2: Static certificates via Terraform** + +Define in `pki.tf`: +```hcl +locals { + static_certificates = { + "monitoring" = { + common_name = "monitoring.home.2rjus.net" + alt_names = ["grafana.home.2rjus.net", "prometheus.home.2rjus.net"] + ttl = "720h" + } + } +} +``` + +Terraform will auto-issue and auto-renew these certificates. + +**Method 3: Manual CLI issuance** + +```bash +# Issue certificate for a host +bao write pki_int/issue/homelab \ + common_name="test.home.2rjus.net" \ + ttl="720h" +``` + +### Read a secret + +```bash +# Authenticate with AppRole first +bao write auth/approle/login \ + role_id="..." \ + secret_id="..." + +# Read the test secret +bao kv get secret/test/example +``` + +## Managing Secrets + +Secrets are defined in the `locals.secrets` block in `secrets.tf` using a declarative pattern: + +### Auto-Generated Secrets (Recommended) + +Most secrets can be auto-generated using the `random_password` provider: + +```hcl +locals { + secrets = { + "hosts/monitoring01/grafana-admin" = { + auto_generate = true + password_length = 32 + } + } +} +``` + +### Manual Secrets + +For secrets that must have specific values (external services, etc.): + +```hcl +# In variables.tf +variable "smtp_password" { + type = string + sensitive = true +} + +# In secrets.tf locals block +locals { + secrets = { + "shared/smtp/credentials" = { + auto_generate = false + data = { + username = "notifications@2rjus.net" + password = var.smtp_password + server = "smtp.gmail.com" + } + } + } +} + +# In terraform.tfvars +smtp_password = "super-secret-password" +``` + +### Path Structure + +Secrets follow a three-tier hierarchy: +- `hosts/{hostname}/*` - Host-specific secrets +- `services/{service}/*` - Service-wide secrets (any host running the service) +- `shared/{category}/*` - Shared secrets (SMTP, backup, etc.) + +## Security Notes + +- `terraform.tfvars` is gitignored to prevent credential leakage +- Root token should be stored securely (consider using a limited admin token instead) +- `skip_tls_verify = true` is acceptable for self-signed certs in homelab +- AppRole secret_ids can be scoped to specific CIDR ranges for additional security + +## Initial Setup Steps + +After deploying this configuration, perform these one-time setup tasks: + +### 1. Enable ACME +```bash +export BAO_ADDR='https://vault.home.2rjus.net:8200' +export BAO_TOKEN='your-root-token' +export BAO_SKIP_VERIFY=1 + +# Configure cluster path (required for ACME) +bao write pki_int/config/cluster path=https://vault.home.2rjus.net:8200/v1/pki_int + +# Enable ACME on intermediate CA +bao write pki_int/config/acme enabled=true + +# Verify ACME is enabled +curl -k https://vault.home.2rjus.net:8200/v1/pki_int/acme/directory +``` + +### 2. Download Root CA Certificate + +For trusting the internal CA on clients: +```bash +# Download root CA certificate +bao read -field=certificate pki/cert/ca > homelab-root-ca.crt + +# Install on NixOS hosts (add to system/default.nix or similar) +security.pki.certificateFiles = [ ./homelab-root-ca.crt ]; +``` + +### 3. Test Certificate Issuance + +```bash +# Manual test +bao write pki_int/issue/homelab common_name="test.home.2rjus.net" ttl="24h" +``` + +## Next Steps + +1. Replace step-ca ACME endpoint with OpenBao in `system/acme.nix` +2. Add more AppRoles for different host types +3. Migrate existing sops-nix secrets to OpenBao KV +4. Set up SSH CA for host and user certificates +5. Configure auto-unseal for vault01 diff --git a/terraform/vault/approle.tf b/terraform/vault/approle.tf new file mode 100644 index 0000000..cb9aac1 --- /dev/null +++ b/terraform/vault/approle.tf @@ -0,0 +1,74 @@ +# Enable AppRole auth backend +resource "vault_auth_backend" "approle" { + type = "approle" + path = "approle" +} + +# Define host access policies +locals { + host_policies = { + # Example: monitoring01 host + # "monitoring01" = { + # paths = [ + # "secret/data/hosts/monitoring01/*", + # "secret/data/services/prometheus/*", + # "secret/data/services/grafana/*", + # "secret/data/shared/smtp/*" + # ] + # } + + # Example: ha1 host + # "ha1" = { + # paths = [ + # "secret/data/hosts/ha1/*", + # "secret/data/shared/mqtt/*" + # ] + # } + + # TODO: actually use this policy + "ha1" = { + paths = [ + "secret/data/hosts/ha1/*", + ] + } + + # TODO: actually use this policy + "monitoring01" = { + paths = [ + "secret/data/hosts/monitoring01/*", + ] + } + } +} + +# Generate policies for each host +resource "vault_policy" "host_policies" { + for_each = local.host_policies + + name = "${each.key}-policy" + + policy = < { + common_name = v.common_name + serial = v.serial_number + expiration = v.expiration + issuing_ca = v.issuing_ca + certificate = v.certificate + private_key = v.private_key + } + } + sensitive = true +} diff --git a/terraform/vault/secrets.tf b/terraform/vault/secrets.tf new file mode 100644 index 0000000..1f65d82 --- /dev/null +++ b/terraform/vault/secrets.tf @@ -0,0 +1,76 @@ +# Enable KV v2 secrets engine +resource "vault_mount" "kv" { + path = "secret" + type = "kv" + options = { version = "2" } + description = "KV Version 2 secret store" +} + +# Define all secrets with auto-generation support +locals { + secrets = { + # Example host-specific secrets + # "hosts/monitoring01/grafana-admin" = { + # auto_generate = true + # password_length = 32 + # } + # "hosts/ha1/mqtt-password" = { + # auto_generate = true + # password_length = 24 + # } + + # Example service secrets + # "services/prometheus/remote-write" = { + # auto_generate = true + # password_length = 40 + # } + + # Example shared secrets with manual values + # "shared/smtp/credentials" = { + # auto_generate = false + # data = { + # username = "notifications@2rjus.net" + # password = var.smtp_password # Define in variables.tf and set in terraform.tfvars + # server = "smtp.gmail.com" + # } + # } + + # TODO: actually use the secret + "hosts/monitoring01/grafana-admin" = { + auto_generate = true + password_length = 32 + } + + # TODO: actually use the secret + "hosts/ha1/mqtt-password" = { + auto_generate = true + password_length = 24 + } + + } +} + +# Auto-generate passwords for secrets with auto_generate = true +resource "random_password" "auto_secrets" { + for_each = { + for k, v in local.secrets : k => v + if lookup(v, "auto_generate", false) + } + + length = each.value.password_length + special = true +} + +# Create all secrets in Vault +resource "vault_kv_secret_v2" "secrets" { + for_each = local.secrets + + mount = vault_mount.kv.path + name = each.key + + data_json = jsonencode( + lookup(each.value, "auto_generate", false) + ? { password = random_password.auto_secrets[each.key].result } + : each.value.data + ) +} diff --git a/terraform/vault/terraform.tfvars.example b/terraform/vault/terraform.tfvars.example new file mode 100644 index 0000000..23f9f87 --- /dev/null +++ b/terraform/vault/terraform.tfvars.example @@ -0,0 +1,6 @@ +# Copy this file to terraform.tfvars and fill in your values +# terraform.tfvars is gitignored to keep credentials safe + +vault_address = "https://vault.home.2rjus.net:8200" +vault_token = "hvs.XXXXXXXXXXXXXXXXXXXX" +vault_skip_tls_verify = true diff --git a/terraform/vault/variables.tf b/terraform/vault/variables.tf new file mode 100644 index 0000000..be0a9d1 --- /dev/null +++ b/terraform/vault/variables.tf @@ -0,0 +1,26 @@ +variable "vault_address" { + description = "OpenBao server address" + type = string + default = "https://vault.home.2rjus.net:8200" +} + +variable "vault_token" { + description = "OpenBao root or admin token" + type = string + sensitive = true +} + +variable "vault_skip_tls_verify" { + description = "Skip TLS verification (for self-signed certs)" + type = bool + default = true +} + +# Example variables for manual secrets +# Uncomment and add to terraform.tfvars as needed + +# variable "smtp_password" { +# description = "SMTP password for notifications" +# type = string +# sensitive = true +# } diff --git a/terraform/vms.tf b/terraform/vms.tf index 73ba143..01fb272 100644 --- a/terraform/vms.tf +++ b/terraform/vms.tf @@ -39,10 +39,11 @@ locals { flake_branch = "pipeline-testing-improvements" } "vault01" = { - ip = "10.69.13.19/24" - cpu_cores = 2 - memory = 2048 - disk_size = "20G" + ip = "10.69.13.19/24" + cpu_cores = 2 + memory = 2048 + disk_size = "20G" + flake_branch = "vault-setup" # Bootstrap from this branch instead of master } }