loki-monitoring02 #41
@@ -14,8 +14,8 @@ a `monitoring` CNAME for seamless transition.
|
|||||||
- Alertmanager (routes to alerttonotify webhook)
|
- Alertmanager (routes to alerttonotify webhook)
|
||||||
- Grafana (dashboards, datasources)
|
- Grafana (dashboards, datasources)
|
||||||
- Loki (log aggregation from all hosts via Promtail)
|
- Loki (log aggregation from all hosts via Promtail)
|
||||||
- Tempo (distributed tracing)
|
- Tempo (distributed tracing) - not actively used
|
||||||
- Pyroscope (continuous profiling)
|
- Pyroscope (continuous profiling) - not actively used
|
||||||
|
|
||||||
**Hardcoded References to monitoring01:**
|
**Hardcoded References to monitoring01:**
|
||||||
- `system/monitoring/logs.nix` - Promtail sends logs to `http://monitoring01.home.2rjus.net:3100`
|
- `system/monitoring/logs.nix` - Promtail sends logs to `http://monitoring01.home.2rjus.net:3100`
|
||||||
@@ -44,9 +44,7 @@ If multi-year retention with downsampling becomes necessary later, Thanos can be
|
|||||||
│ VictoriaMetrics│
|
│ VictoriaMetrics│
|
||||||
│ + Grafana │
|
│ + Grafana │
|
||||||
monitoring │ + Loki │
|
monitoring │ + Loki │
|
||||||
CNAME ──────────│ + Tempo │
|
CNAME ──────────│ + Alertmanager │
|
||||||
│ + Pyroscope │
|
|
||||||
│ + Alertmanager │
|
|
||||||
│ (vmalert) │
|
│ (vmalert) │
|
||||||
└─────────────────┘
|
└─────────────────┘
|
||||||
▲
|
▲
|
||||||
@@ -94,16 +92,11 @@ Imported by monitoring02 alongside the existing Grafana service.
|
|||||||
4. **Grafana** (port 3000): [DONE]
|
4. **Grafana** (port 3000): [DONE]
|
||||||
- VictoriaMetrics datasource (localhost:8428) as default
|
- VictoriaMetrics datasource (localhost:8428) as default
|
||||||
- monitoring01 Prometheus datasource kept for comparison during parallel operation
|
- monitoring01 Prometheus datasource kept for comparison during parallel operation
|
||||||
- Loki datasource pointing to monitoring01 (until Loki migrated)
|
- Loki datasource pointing to localhost (after Loki migrated to monitoring02)
|
||||||
|
|
||||||
5. **Loki** (port 3100):
|
5. **Loki** (port 3100): [DONE]
|
||||||
- TODO: Same configuration as current
|
- Same configuration as monitoring01 in standalone `services/loki/` module
|
||||||
|
- Grafana datasource updated to localhost:3100
|
||||||
6. **Tempo** (ports 3200, 3201):
|
|
||||||
- TODO: Same configuration
|
|
||||||
|
|
||||||
7. **Pyroscope** (port 4040):
|
|
||||||
- TODO: Same Docker-based deployment
|
|
||||||
|
|
||||||
**Note:** pve-exporter and pushgateway scrape targets are not included on monitoring02.
|
**Note:** pve-exporter and pushgateway scrape targets are not included on monitoring02.
|
||||||
pve-exporter requires a local exporter instance; pushgateway is replaced by VictoriaMetrics
|
pve-exporter requires a local exporter instance; pushgateway is replaced by VictoriaMetrics
|
||||||
@@ -147,7 +140,6 @@ Update hardcoded references to use the CNAME:
|
|||||||
- prometheus.home.2rjus.net -> monitoring.home.2rjus.net:8428
|
- prometheus.home.2rjus.net -> monitoring.home.2rjus.net:8428
|
||||||
- alertmanager.home.2rjus.net -> monitoring.home.2rjus.net:9093
|
- alertmanager.home.2rjus.net -> monitoring.home.2rjus.net:9093
|
||||||
- grafana.home.2rjus.net -> monitoring.home.2rjus.net:3000
|
- grafana.home.2rjus.net -> monitoring.home.2rjus.net:3000
|
||||||
- pyroscope.home.2rjus.net -> monitoring.home.2rjus.net:4040
|
|
||||||
|
|
||||||
Note: `hosts/template2/bootstrap.nix` stays pointed at monitoring01 until decommission.
|
Note: `hosts/template2/bootstrap.nix` stays pointed at monitoring01 until decommission.
|
||||||
|
|
||||||
@@ -172,8 +164,8 @@ Once ready to cut over:
|
|||||||
## Current Progress
|
## Current Progress
|
||||||
|
|
||||||
- **Phase 1** complete (2026-02-08): monitoring02 host created, Grafana with Kanidm OIDC validated
|
- **Phase 1** complete (2026-02-08): monitoring02 host created, Grafana with Kanidm OIDC validated
|
||||||
- **Phase 2** in progress (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Grafana datasources configured
|
- **Phase 2** complete (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Loki, Grafana datasources configured
|
||||||
- Remaining: Loki, Tempo, Pyroscope migration
|
- Tempo and Pyroscope deferred (not actively used; can be added later if needed)
|
||||||
|
|
||||||
## Open Questions
|
## Open Questions
|
||||||
|
|
||||||
|
|||||||
@@ -3,5 +3,6 @@
|
|||||||
./configuration.nix
|
./configuration.nix
|
||||||
../../services/grafana
|
../../services/grafana
|
||||||
../../services/victoriametrics
|
../../services/victoriametrics
|
||||||
|
../../services/loki
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
@@ -54,7 +54,7 @@
|
|||||||
{
|
{
|
||||||
name = "Loki";
|
name = "Loki";
|
||||||
type = "loki";
|
type = "loki";
|
||||||
url = "http://monitoring01.home.2rjus.net:3100";
|
url = "http://localhost:3100";
|
||||||
uid = "loki";
|
uid = "loki";
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|||||||
58
services/loki/default.nix
Normal file
58
services/loki/default.nix
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
services.loki = {
|
||||||
|
enable = true;
|
||||||
|
configuration = {
|
||||||
|
auth_enabled = false;
|
||||||
|
|
||||||
|
server = {
|
||||||
|
http_listen_port = 3100;
|
||||||
|
};
|
||||||
|
common = {
|
||||||
|
ring = {
|
||||||
|
instance_addr = "127.0.0.1";
|
||||||
|
kvstore = {
|
||||||
|
store = "inmemory";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
replication_factor = 1;
|
||||||
|
path_prefix = "/var/lib/loki";
|
||||||
|
};
|
||||||
|
schema_config = {
|
||||||
|
configs = [
|
||||||
|
{
|
||||||
|
from = "2024-01-01";
|
||||||
|
store = "tsdb";
|
||||||
|
object_store = "filesystem";
|
||||||
|
schema = "v13";
|
||||||
|
index = {
|
||||||
|
prefix = "loki_index_";
|
||||||
|
period = "24h";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
storage_config = {
|
||||||
|
filesystem = {
|
||||||
|
directory = "/var/lib/loki/chunks";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
compactor = {
|
||||||
|
working_directory = "/var/lib/loki/compactor";
|
||||||
|
compaction_interval = "10m";
|
||||||
|
retention_enabled = true;
|
||||||
|
retention_delete_delay = "2h";
|
||||||
|
retention_delete_worker_count = 150;
|
||||||
|
delete_request_store = "filesystem";
|
||||||
|
};
|
||||||
|
limits_config = {
|
||||||
|
retention_period = "30d";
|
||||||
|
ingestion_rate_mb = 10;
|
||||||
|
ingestion_burst_size_mb = 20;
|
||||||
|
max_streams_per_user = 10000;
|
||||||
|
max_query_series = 500;
|
||||||
|
max_query_parallelism = 8;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user