From 74e7c9faa40ca8c04a12e7fc52c10ca80642f84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 19:42:19 +0100 Subject: [PATCH 1/7] monitoring02: add Loki service Add standalone Loki service module (services/loki/) with same config as monitoring01 and import it on monitoring02. Update Grafana Loki datasource to localhost. Defer Tempo and Pyroscope migration (not actively used). Co-Authored-By: Claude Opus 4.6 --- .../monitoring-migration-victoriametrics.md | 26 +++------ hosts/monitoring02/default.nix | 1 + services/grafana/default.nix | 2 +- services/loki/default.nix | 58 +++++++++++++++++++ 4 files changed, 69 insertions(+), 18 deletions(-) create mode 100644 services/loki/default.nix diff --git a/docs/plans/monitoring-migration-victoriametrics.md b/docs/plans/monitoring-migration-victoriametrics.md index 7c6c349..d562c41 100644 --- a/docs/plans/monitoring-migration-victoriametrics.md +++ b/docs/plans/monitoring-migration-victoriametrics.md @@ -14,8 +14,8 @@ a `monitoring` CNAME for seamless transition. - Alertmanager (routes to alerttonotify webhook) - Grafana (dashboards, datasources) - Loki (log aggregation from all hosts via Promtail) -- Tempo (distributed tracing) -- Pyroscope (continuous profiling) +- Tempo (distributed tracing) - not actively used +- Pyroscope (continuous profiling) - not actively used **Hardcoded References to monitoring01:** - `system/monitoring/logs.nix` - Promtail sends logs to `http://monitoring01.home.2rjus.net:3100` @@ -44,9 +44,7 @@ If multi-year retention with downsampling becomes necessary later, Thanos can be │ VictoriaMetrics│ │ + Grafana │ monitoring │ + Loki │ - CNAME ──────────│ + Tempo │ - │ + Pyroscope │ - │ + Alertmanager │ + CNAME ──────────│ + Alertmanager │ │ (vmalert) │ └─────────────────┘ ▲ @@ -94,16 +92,11 @@ Imported by monitoring02 alongside the existing Grafana service. 4. **Grafana** (port 3000): [DONE] - VictoriaMetrics datasource (localhost:8428) as default - monitoring01 Prometheus datasource kept for comparison during parallel operation - - Loki datasource pointing to monitoring01 (until Loki migrated) + - Loki datasource pointing to localhost (after Loki migrated to monitoring02) -5. **Loki** (port 3100): - - TODO: Same configuration as current - -6. **Tempo** (ports 3200, 3201): - - TODO: Same configuration - -7. **Pyroscope** (port 4040): - - TODO: Same Docker-based deployment +5. **Loki** (port 3100): [DONE] + - Same configuration as monitoring01 in standalone `services/loki/` module + - Grafana datasource updated to localhost:3100 **Note:** pve-exporter and pushgateway scrape targets are not included on monitoring02. pve-exporter requires a local exporter instance; pushgateway is replaced by VictoriaMetrics @@ -147,7 +140,6 @@ Update hardcoded references to use the CNAME: - prometheus.home.2rjus.net -> monitoring.home.2rjus.net:8428 - alertmanager.home.2rjus.net -> monitoring.home.2rjus.net:9093 - grafana.home.2rjus.net -> monitoring.home.2rjus.net:3000 - - pyroscope.home.2rjus.net -> monitoring.home.2rjus.net:4040 Note: `hosts/template2/bootstrap.nix` stays pointed at monitoring01 until decommission. @@ -172,8 +164,8 @@ Once ready to cut over: ## Current Progress - **Phase 1** complete (2026-02-08): monitoring02 host created, Grafana with Kanidm OIDC validated -- **Phase 2** in progress (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Grafana datasources configured - - Remaining: Loki, Tempo, Pyroscope migration +- **Phase 2** complete (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Loki, Grafana datasources configured + - Tempo and Pyroscope deferred (not actively used; can be added later if needed) ## Open Questions diff --git a/hosts/monitoring02/default.nix b/hosts/monitoring02/default.nix index ea273a4..a8ef155 100644 --- a/hosts/monitoring02/default.nix +++ b/hosts/monitoring02/default.nix @@ -3,5 +3,6 @@ ./configuration.nix ../../services/grafana ../../services/victoriametrics + ../../services/loki ]; } \ No newline at end of file diff --git a/services/grafana/default.nix b/services/grafana/default.nix index a0dc7b4..ed5aece 100644 --- a/services/grafana/default.nix +++ b/services/grafana/default.nix @@ -54,7 +54,7 @@ { name = "Loki"; type = "loki"; - url = "http://monitoring01.home.2rjus.net:3100"; + url = "http://localhost:3100"; uid = "loki"; } ]; diff --git a/services/loki/default.nix b/services/loki/default.nix new file mode 100644 index 0000000..87ee06f --- /dev/null +++ b/services/loki/default.nix @@ -0,0 +1,58 @@ +{ ... }: +{ + services.loki = { + enable = true; + configuration = { + auth_enabled = false; + + server = { + http_listen_port = 3100; + }; + common = { + ring = { + instance_addr = "127.0.0.1"; + kvstore = { + store = "inmemory"; + }; + }; + replication_factor = 1; + path_prefix = "/var/lib/loki"; + }; + schema_config = { + configs = [ + { + from = "2024-01-01"; + store = "tsdb"; + object_store = "filesystem"; + schema = "v13"; + index = { + prefix = "loki_index_"; + period = "24h"; + }; + } + ]; + }; + storage_config = { + filesystem = { + directory = "/var/lib/loki/chunks"; + }; + }; + compactor = { + working_directory = "/var/lib/loki/compactor"; + compaction_interval = "10m"; + retention_enabled = true; + retention_delete_delay = "2h"; + retention_delete_worker_count = 150; + delete_request_store = "filesystem"; + }; + limits_config = { + retention_period = "30d"; + ingestion_rate_mb = 10; + ingestion_burst_size_mb = 20; + max_streams_per_user = 10000; + max_query_series = 500; + max_query_parallelism = 8; + }; + }; + }; +} From 2903873d52e145467c8314fb47fe3f51b1bc54f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 19:48:06 +0100 Subject: [PATCH 2/7] monitoring02: add loki CNAME and Caddy reverse proxy Co-Authored-By: Claude Opus 4.6 --- hosts/monitoring02/configuration.nix | 2 +- services/loki/default.nix | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hosts/monitoring02/configuration.nix b/hosts/monitoring02/configuration.nix index 3cf2f8d..2616555 100644 --- a/hosts/monitoring02/configuration.nix +++ b/hosts/monitoring02/configuration.nix @@ -18,7 +18,7 @@ role = "monitoring"; }; - homelab.dns.cnames = [ "grafana-test" "metrics" "vmalert" ]; + homelab.dns.cnames = [ "grafana-test" "metrics" "vmalert" "loki" ]; # Enable Vault integration vault.enable = true; diff --git a/services/loki/default.nix b/services/loki/default.nix index 87ee06f..b73456e 100644 --- a/services/loki/default.nix +++ b/services/loki/default.nix @@ -1,5 +1,10 @@ { ... }: { + # Caddy reverse proxy for Loki + services.caddy.virtualHosts."loki.home.2rjus.net".extraConfig = '' + reverse_proxy http://127.0.0.1:3100 + ''; + services.loki = { enable = true; configuration = { From c13921d302d4b044129bc5fbcf912abca3aea449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 20:00:08 +0100 Subject: [PATCH 3/7] loki: add basic auth for log push and dual-ship promtail - Loki bound to localhost, Caddy reverse proxy with basic_auth - Vault secret (shared/loki/push-auth) for password, bcrypt hash generated at boot for Caddy environment - Promtail dual-ships to monitoring01 (direct) and loki.home.2rjus.net (with basic auth), conditional on vault.enable - Terraform: new shared loki-push policy added to all AppRoles Co-Authored-By: Claude Opus 4.6 --- services/loki/default.nix | 45 ++++++++++++++++++++++++++++++++++++-- system/monitoring/logs.nix | 16 ++++++++++++++ terraform/vault/approle.tf | 13 ++++++++++- terraform/vault/secrets.tf | 6 +++++ 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/services/loki/default.nix b/services/loki/default.nix index b73456e..f50a6e0 100644 --- a/services/loki/default.nix +++ b/services/loki/default.nix @@ -1,7 +1,47 @@ -{ ... }: +{ config, lib, pkgs, ... }: +let + # Script to generate bcrypt hash from Vault password for Caddy basic_auth + generateCaddyAuth = pkgs.writeShellApplication { + name = "generate-caddy-loki-auth"; + runtimeInputs = [ config.services.caddy.package ]; + text = '' + PASSWORD=$(cat /run/secrets/loki-push-auth) + HASH=$(caddy hash-password --plaintext "$PASSWORD") + echo "LOKI_PUSH_HASH=$HASH" > /run/secrets/caddy-loki-auth.env + chmod 0400 /run/secrets/caddy-loki-auth.env + ''; + }; +in { - # Caddy reverse proxy for Loki + # Fetch Loki push password from Vault + vault.secrets.loki-push-auth = { + secretPath = "shared/loki/push-auth"; + extractKey = "password"; + services = [ "caddy" ]; + }; + + # Generate bcrypt hash for Caddy before it starts + systemd.services.caddy-loki-auth = { + description = "Generate Caddy basic auth hash for Loki"; + after = [ "vault-secret-loki-push-auth.service" ]; + requires = [ "vault-secret-loki-push-auth.service" ]; + before = [ "caddy.service" ]; + requiredBy = [ "caddy.service" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = lib.getExe generateCaddyAuth; + }; + }; + + # Load the bcrypt hash as environment variable for Caddy + services.caddy.environmentFile = "/run/secrets/caddy-loki-auth.env"; + + # Caddy reverse proxy for Loki with basic auth services.caddy.virtualHosts."loki.home.2rjus.net".extraConfig = '' + basic_auth { + promtail {env.LOKI_PUSH_HASH} + } reverse_proxy http://127.0.0.1:3100 ''; @@ -11,6 +51,7 @@ auth_enabled = false; server = { + http_listen_address = "127.0.0.1"; http_listen_port = 3100; }; common = { diff --git a/system/monitoring/logs.nix b/system/monitoring/logs.nix index 68d9cac..d3fad59 100644 --- a/system/monitoring/logs.nix +++ b/system/monitoring/logs.nix @@ -16,6 +16,14 @@ in SystemKeepFree=1G ''; }; + + # Fetch Loki push password from Vault (only on hosts with Vault enabled) + vault.secrets.promtail-loki-auth = lib.mkIf config.vault.enable { + secretPath = "shared/loki/push-auth"; + extractKey = "password"; + services = [ "promtail" ]; + }; + # Configure promtail services.promtail = { enable = true; @@ -31,6 +39,14 @@ in { url = "http://monitoring01.home.2rjus.net:3100/loki/api/v1/push"; } + ] ++ lib.optionals config.vault.enable [ + { + url = "https://loki.home.2rjus.net/loki/api/v1/push"; + basic_auth = { + username = "promtail"; + password_file = "/run/secrets/promtail-loki-auth"; + }; + } ]; scrape_configs = [ diff --git a/terraform/vault/approle.tf b/terraform/vault/approle.tf index 8542812..f262f49 100644 --- a/terraform/vault/approle.tf +++ b/terraform/vault/approle.tf @@ -26,6 +26,17 @@ path "secret/data/shared/nixos-exporter/*" { EOT } +# Shared policy for Loki push authentication (all hosts push logs) +resource "vault_policy" "loki_push" { + name = "loki-push" + + policy = < Date: Tue, 17 Feb 2026 20:10:37 +0100 Subject: [PATCH 4/7] terraform: add ns1 and ns2 to AppRole policies They were missing from the host_policies map, so they didn't get shared policies like loki-push. Co-Authored-By: Claude Opus 4.6 --- terraform/vault/approle.tf | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/terraform/vault/approle.tf b/terraform/vault/approle.tf index f262f49..451b823 100644 --- a/terraform/vault/approle.tf +++ b/terraform/vault/approle.tf @@ -90,6 +90,19 @@ locals { } # Wave 3: DNS servers + "ns1" = { + paths = [ + "secret/data/hosts/ns1/*", + "secret/data/shared/dns/*", + ] + } + + "ns2" = { + paths = [ + "secret/data/hosts/ns2/*", + "secret/data/shared/dns/*", + ] + } # Wave 4: http-proxy "http-proxy" = { From 43c81f6688c34fa32aee41d8e1275cc661bc4525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 20:13:22 +0100 Subject: [PATCH 5/7] terraform: fix loki-push policy for generated hosts Revert ns1/ns2 from approle.tf (they're in hosts-generated.tf) and add loki-push policy to generated AppRoles instead. Co-Authored-By: Claude Opus 4.6 --- terraform/vault/approle.tf | 15 +-------------- terraform/vault/hosts-generated.tf | 2 +- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/terraform/vault/approle.tf b/terraform/vault/approle.tf index 451b823..5f76056 100644 --- a/terraform/vault/approle.tf +++ b/terraform/vault/approle.tf @@ -89,20 +89,7 @@ locals { ] } - # Wave 3: DNS servers - "ns1" = { - paths = [ - "secret/data/hosts/ns1/*", - "secret/data/shared/dns/*", - ] - } - - "ns2" = { - paths = [ - "secret/data/hosts/ns2/*", - "secret/data/shared/dns/*", - ] - } + # Wave 3: DNS servers (managed in hosts-generated.tf) # Wave 4: http-proxy "http-proxy" = { diff --git a/terraform/vault/hosts-generated.tf b/terraform/vault/hosts-generated.tf index 7172d20..4854b70 100644 --- a/terraform/vault/hosts-generated.tf +++ b/terraform/vault/hosts-generated.tf @@ -74,7 +74,7 @@ resource "vault_approle_auth_backend_role" "generated_hosts" { backend = vault_auth_backend.approle.path role_name = each.key - token_policies = ["host-${each.key}", "homelab-deploy", "nixos-exporter"] + token_policies = ["host-${each.key}", "homelab-deploy", "nixos-exporter", "loki-push"] secret_id_ttl = 0 # Never expire (wrapped tokens provide time limit) token_ttl = 3600 token_max_ttl = 3600 From 87d8571d626c0380851524ad5ce5c80b65c5bf93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 20:17:02 +0100 Subject: [PATCH 6/7] promtail: fix vault secret ownership for loki auth The secret file needs to be owned by promtail since Promtail runs as a dedicated user and can't read root-owned files. Co-Authored-By: Claude Opus 4.6 --- system/monitoring/logs.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/system/monitoring/logs.nix b/system/monitoring/logs.nix index d3fad59..6a21a62 100644 --- a/system/monitoring/logs.nix +++ b/system/monitoring/logs.nix @@ -21,6 +21,8 @@ in vault.secrets.promtail-loki-auth = lib.mkIf config.vault.enable { secretPath = "shared/loki/push-auth"; extractKey = "password"; + owner = "promtail"; + group = "promtail"; services = [ "promtail" ]; }; From 35924c7b01024cbee5954a4d318e187bd322a8e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 20:35:14 +0100 Subject: [PATCH 7/7] mcp: move config to .mcp.json.example, gitignore real config The real .mcp.json now contains Loki credentials for basic auth, so it should not be committed. The example file has placeholders. Co-Authored-By: Claude Opus 4.6 --- .gitignore | 3 +++ .mcp.json => .mcp.json.example | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) rename .mcp.json => .mcp.json.example (88%) diff --git a/.gitignore b/.gitignore index 567ee61..18fbe70 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ result result-* +# MCP config (contains secrets) +.mcp.json + # Terraform/OpenTofu terraform/.terraform/ terraform/.terraform.lock.hcl diff --git a/.mcp.json b/.mcp.json.example similarity index 88% rename from .mcp.json rename to .mcp.json.example index f5d61f6..ff4fb40 100644 --- a/.mcp.json +++ b/.mcp.json.example @@ -20,7 +20,9 @@ "env": { "PROMETHEUS_URL": "https://prometheus.home.2rjus.net", "ALERTMANAGER_URL": "https://alertmanager.home.2rjus.net", - "LOKI_URL": "http://monitoring01.home.2rjus.net:3100" + "LOKI_URL": "https://loki.home.2rjus.net", + "LOKI_USERNAME": "promtail", + "LOKI_PASSWORD": "" } }, "homelab-deploy": { @@ -44,4 +46,3 @@ } } } -