From 74e7c9faa40ca8c04a12e7fc52c10ca80642f84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Tue, 17 Feb 2026 19:42:19 +0100 Subject: [PATCH] monitoring02: add Loki service Add standalone Loki service module (services/loki/) with same config as monitoring01 and import it on monitoring02. Update Grafana Loki datasource to localhost. Defer Tempo and Pyroscope migration (not actively used). Co-Authored-By: Claude Opus 4.6 --- .../monitoring-migration-victoriametrics.md | 26 +++------ hosts/monitoring02/default.nix | 1 + services/grafana/default.nix | 2 +- services/loki/default.nix | 58 +++++++++++++++++++ 4 files changed, 69 insertions(+), 18 deletions(-) create mode 100644 services/loki/default.nix diff --git a/docs/plans/monitoring-migration-victoriametrics.md b/docs/plans/monitoring-migration-victoriametrics.md index 7c6c349..d562c41 100644 --- a/docs/plans/monitoring-migration-victoriametrics.md +++ b/docs/plans/monitoring-migration-victoriametrics.md @@ -14,8 +14,8 @@ a `monitoring` CNAME for seamless transition. - Alertmanager (routes to alerttonotify webhook) - Grafana (dashboards, datasources) - Loki (log aggregation from all hosts via Promtail) -- Tempo (distributed tracing) -- Pyroscope (continuous profiling) +- Tempo (distributed tracing) - not actively used +- Pyroscope (continuous profiling) - not actively used **Hardcoded References to monitoring01:** - `system/monitoring/logs.nix` - Promtail sends logs to `http://monitoring01.home.2rjus.net:3100` @@ -44,9 +44,7 @@ If multi-year retention with downsampling becomes necessary later, Thanos can be │ VictoriaMetrics│ │ + Grafana │ monitoring │ + Loki │ - CNAME ──────────│ + Tempo │ - │ + Pyroscope │ - │ + Alertmanager │ + CNAME ──────────│ + Alertmanager │ │ (vmalert) │ └─────────────────┘ ▲ @@ -94,16 +92,11 @@ Imported by monitoring02 alongside the existing Grafana service. 4. **Grafana** (port 3000): [DONE] - VictoriaMetrics datasource (localhost:8428) as default - monitoring01 Prometheus datasource kept for comparison during parallel operation - - Loki datasource pointing to monitoring01 (until Loki migrated) + - Loki datasource pointing to localhost (after Loki migrated to monitoring02) -5. **Loki** (port 3100): - - TODO: Same configuration as current - -6. **Tempo** (ports 3200, 3201): - - TODO: Same configuration - -7. **Pyroscope** (port 4040): - - TODO: Same Docker-based deployment +5. **Loki** (port 3100): [DONE] + - Same configuration as monitoring01 in standalone `services/loki/` module + - Grafana datasource updated to localhost:3100 **Note:** pve-exporter and pushgateway scrape targets are not included on monitoring02. pve-exporter requires a local exporter instance; pushgateway is replaced by VictoriaMetrics @@ -147,7 +140,6 @@ Update hardcoded references to use the CNAME: - prometheus.home.2rjus.net -> monitoring.home.2rjus.net:8428 - alertmanager.home.2rjus.net -> monitoring.home.2rjus.net:9093 - grafana.home.2rjus.net -> monitoring.home.2rjus.net:3000 - - pyroscope.home.2rjus.net -> monitoring.home.2rjus.net:4040 Note: `hosts/template2/bootstrap.nix` stays pointed at monitoring01 until decommission. @@ -172,8 +164,8 @@ Once ready to cut over: ## Current Progress - **Phase 1** complete (2026-02-08): monitoring02 host created, Grafana with Kanidm OIDC validated -- **Phase 2** in progress (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Grafana datasources configured - - Remaining: Loki, Tempo, Pyroscope migration +- **Phase 2** complete (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Loki, Grafana datasources configured + - Tempo and Pyroscope deferred (not actively used; can be added later if needed) ## Open Questions diff --git a/hosts/monitoring02/default.nix b/hosts/monitoring02/default.nix index ea273a4..a8ef155 100644 --- a/hosts/monitoring02/default.nix +++ b/hosts/monitoring02/default.nix @@ -3,5 +3,6 @@ ./configuration.nix ../../services/grafana ../../services/victoriametrics + ../../services/loki ]; } \ No newline at end of file diff --git a/services/grafana/default.nix b/services/grafana/default.nix index a0dc7b4..ed5aece 100644 --- a/services/grafana/default.nix +++ b/services/grafana/default.nix @@ -54,7 +54,7 @@ { name = "Loki"; type = "loki"; - url = "http://monitoring01.home.2rjus.net:3100"; + url = "http://localhost:3100"; uid = "loki"; } ]; diff --git a/services/loki/default.nix b/services/loki/default.nix new file mode 100644 index 0000000..87ee06f --- /dev/null +++ b/services/loki/default.nix @@ -0,0 +1,58 @@ +{ ... }: +{ + services.loki = { + enable = true; + configuration = { + auth_enabled = false; + + server = { + http_listen_port = 3100; + }; + common = { + ring = { + instance_addr = "127.0.0.1"; + kvstore = { + store = "inmemory"; + }; + }; + replication_factor = 1; + path_prefix = "/var/lib/loki"; + }; + schema_config = { + configs = [ + { + from = "2024-01-01"; + store = "tsdb"; + object_store = "filesystem"; + schema = "v13"; + index = { + prefix = "loki_index_"; + period = "24h"; + }; + } + ]; + }; + storage_config = { + filesystem = { + directory = "/var/lib/loki/chunks"; + }; + }; + compactor = { + working_directory = "/var/lib/loki/compactor"; + compaction_interval = "10m"; + retention_enabled = true; + retention_delete_delay = "2h"; + retention_delete_worker_count = 150; + delete_request_store = "filesystem"; + }; + limits_config = { + retention_period = "30d"; + ingestion_rate_mb = 10; + ingestion_burst_size_mb = 20; + max_streams_per_user = 10000; + max_query_series = 500; + max_query_parallelism = 8; + }; + }; + }; +}