Compare commits
6 Commits
loki-monit
...
ef850d91a4
| Author | SHA1 | Date | |
|---|---|---|---|
|
ef850d91a4
|
|||
|
a99fb5b959
|
|||
|
d385f02c89
|
|||
|
8dfd04b406
|
|||
|
63cf690598
|
|||
|
ef8eeaa2f5
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,9 +2,6 @@
|
|||||||
result
|
result
|
||||||
result-*
|
result-*
|
||||||
|
|
||||||
# MCP config (contains secrets)
|
|
||||||
.mcp.json
|
|
||||||
|
|
||||||
# Terraform/OpenTofu
|
# Terraform/OpenTofu
|
||||||
terraform/.terraform/
|
terraform/.terraform/
|
||||||
terraform/.terraform.lock.hcl
|
terraform/.terraform.lock.hcl
|
||||||
|
|||||||
@@ -20,9 +20,7 @@
|
|||||||
"env": {
|
"env": {
|
||||||
"PROMETHEUS_URL": "https://prometheus.home.2rjus.net",
|
"PROMETHEUS_URL": "https://prometheus.home.2rjus.net",
|
||||||
"ALERTMANAGER_URL": "https://alertmanager.home.2rjus.net",
|
"ALERTMANAGER_URL": "https://alertmanager.home.2rjus.net",
|
||||||
"LOKI_URL": "https://loki.home.2rjus.net",
|
"LOKI_URL": "http://monitoring01.home.2rjus.net:3100"
|
||||||
"LOKI_USERNAME": "promtail",
|
|
||||||
"LOKI_PASSWORD": "<password from: bao kv get -field=password secret/shared/loki/push-auth>"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"homelab-deploy": {
|
"homelab-deploy": {
|
||||||
@@ -46,3 +44,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -14,8 +14,8 @@ a `monitoring` CNAME for seamless transition.
|
|||||||
- Alertmanager (routes to alerttonotify webhook)
|
- Alertmanager (routes to alerttonotify webhook)
|
||||||
- Grafana (dashboards, datasources)
|
- Grafana (dashboards, datasources)
|
||||||
- Loki (log aggregation from all hosts via Promtail)
|
- Loki (log aggregation from all hosts via Promtail)
|
||||||
- Tempo (distributed tracing) - not actively used
|
- Tempo (distributed tracing)
|
||||||
- Pyroscope (continuous profiling) - not actively used
|
- Pyroscope (continuous profiling)
|
||||||
|
|
||||||
**Hardcoded References to monitoring01:**
|
**Hardcoded References to monitoring01:**
|
||||||
- `system/monitoring/logs.nix` - Promtail sends logs to `http://monitoring01.home.2rjus.net:3100`
|
- `system/monitoring/logs.nix` - Promtail sends logs to `http://monitoring01.home.2rjus.net:3100`
|
||||||
@@ -44,7 +44,9 @@ If multi-year retention with downsampling becomes necessary later, Thanos can be
|
|||||||
│ VictoriaMetrics│
|
│ VictoriaMetrics│
|
||||||
│ + Grafana │
|
│ + Grafana │
|
||||||
monitoring │ + Loki │
|
monitoring │ + Loki │
|
||||||
CNAME ──────────│ + Alertmanager │
|
CNAME ──────────│ + Tempo │
|
||||||
|
│ + Pyroscope │
|
||||||
|
│ + Alertmanager │
|
||||||
│ (vmalert) │
|
│ (vmalert) │
|
||||||
└─────────────────┘
|
└─────────────────┘
|
||||||
▲
|
▲
|
||||||
@@ -92,11 +94,16 @@ Imported by monitoring02 alongside the existing Grafana service.
|
|||||||
4. **Grafana** (port 3000): [DONE]
|
4. **Grafana** (port 3000): [DONE]
|
||||||
- VictoriaMetrics datasource (localhost:8428) as default
|
- VictoriaMetrics datasource (localhost:8428) as default
|
||||||
- monitoring01 Prometheus datasource kept for comparison during parallel operation
|
- monitoring01 Prometheus datasource kept for comparison during parallel operation
|
||||||
- Loki datasource pointing to localhost (after Loki migrated to monitoring02)
|
- Loki datasource pointing to monitoring01 (until Loki migrated)
|
||||||
|
|
||||||
5. **Loki** (port 3100): [DONE]
|
5. **Loki** (port 3100):
|
||||||
- Same configuration as monitoring01 in standalone `services/loki/` module
|
- TODO: Same configuration as current
|
||||||
- Grafana datasource updated to localhost:3100
|
|
||||||
|
6. **Tempo** (ports 3200, 3201):
|
||||||
|
- TODO: Same configuration
|
||||||
|
|
||||||
|
7. **Pyroscope** (port 4040):
|
||||||
|
- TODO: Same Docker-based deployment
|
||||||
|
|
||||||
**Note:** pve-exporter and pushgateway scrape targets are not included on monitoring02.
|
**Note:** pve-exporter and pushgateway scrape targets are not included on monitoring02.
|
||||||
pve-exporter requires a local exporter instance; pushgateway is replaced by VictoriaMetrics
|
pve-exporter requires a local exporter instance; pushgateway is replaced by VictoriaMetrics
|
||||||
@@ -140,6 +147,7 @@ Update hardcoded references to use the CNAME:
|
|||||||
- prometheus.home.2rjus.net -> monitoring.home.2rjus.net:8428
|
- prometheus.home.2rjus.net -> monitoring.home.2rjus.net:8428
|
||||||
- alertmanager.home.2rjus.net -> monitoring.home.2rjus.net:9093
|
- alertmanager.home.2rjus.net -> monitoring.home.2rjus.net:9093
|
||||||
- grafana.home.2rjus.net -> monitoring.home.2rjus.net:3000
|
- grafana.home.2rjus.net -> monitoring.home.2rjus.net:3000
|
||||||
|
- pyroscope.home.2rjus.net -> monitoring.home.2rjus.net:4040
|
||||||
|
|
||||||
Note: `hosts/template2/bootstrap.nix` stays pointed at monitoring01 until decommission.
|
Note: `hosts/template2/bootstrap.nix` stays pointed at monitoring01 until decommission.
|
||||||
|
|
||||||
@@ -164,8 +172,8 @@ Once ready to cut over:
|
|||||||
## Current Progress
|
## Current Progress
|
||||||
|
|
||||||
- **Phase 1** complete (2026-02-08): monitoring02 host created, Grafana with Kanidm OIDC validated
|
- **Phase 1** complete (2026-02-08): monitoring02 host created, Grafana with Kanidm OIDC validated
|
||||||
- **Phase 2** complete (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Loki, Grafana datasources configured
|
- **Phase 2** in progress (2026-02-17): VictoriaMetrics, vmalert, Alertmanager, Grafana datasources configured
|
||||||
- Tempo and Pyroscope deferred (not actively used; can be added later if needed)
|
- Remaining: Loki, Tempo, Pyroscope migration
|
||||||
|
|
||||||
## Open Questions
|
## Open Questions
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
role = "monitoring";
|
role = "monitoring";
|
||||||
};
|
};
|
||||||
|
|
||||||
homelab.dns.cnames = [ "grafana-test" "metrics" "vmalert" "loki" ];
|
homelab.dns.cnames = [ "grafana-test" "metrics" "vmalert" ];
|
||||||
|
|
||||||
# Enable Vault integration
|
# Enable Vault integration
|
||||||
vault.enable = true;
|
vault.enable = true;
|
||||||
|
|||||||
@@ -3,6 +3,5 @@
|
|||||||
./configuration.nix
|
./configuration.nix
|
||||||
../../services/grafana
|
../../services/grafana
|
||||||
../../services/victoriametrics
|
../../services/victoriametrics
|
||||||
../../services/loki
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
@@ -54,7 +54,7 @@
|
|||||||
{
|
{
|
||||||
name = "Loki";
|
name = "Loki";
|
||||||
type = "loki";
|
type = "loki";
|
||||||
url = "http://localhost:3100";
|
url = "http://monitoring01.home.2rjus.net:3100";
|
||||||
uid = "loki";
|
uid = "loki";
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -1,104 +0,0 @@
|
|||||||
{ config, lib, pkgs, ... }:
|
|
||||||
let
|
|
||||||
# Script to generate bcrypt hash from Vault password for Caddy basic_auth
|
|
||||||
generateCaddyAuth = pkgs.writeShellApplication {
|
|
||||||
name = "generate-caddy-loki-auth";
|
|
||||||
runtimeInputs = [ config.services.caddy.package ];
|
|
||||||
text = ''
|
|
||||||
PASSWORD=$(cat /run/secrets/loki-push-auth)
|
|
||||||
HASH=$(caddy hash-password --plaintext "$PASSWORD")
|
|
||||||
echo "LOKI_PUSH_HASH=$HASH" > /run/secrets/caddy-loki-auth.env
|
|
||||||
chmod 0400 /run/secrets/caddy-loki-auth.env
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
in
|
|
||||||
{
|
|
||||||
# Fetch Loki push password from Vault
|
|
||||||
vault.secrets.loki-push-auth = {
|
|
||||||
secretPath = "shared/loki/push-auth";
|
|
||||||
extractKey = "password";
|
|
||||||
services = [ "caddy" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Generate bcrypt hash for Caddy before it starts
|
|
||||||
systemd.services.caddy-loki-auth = {
|
|
||||||
description = "Generate Caddy basic auth hash for Loki";
|
|
||||||
after = [ "vault-secret-loki-push-auth.service" ];
|
|
||||||
requires = [ "vault-secret-loki-push-auth.service" ];
|
|
||||||
before = [ "caddy.service" ];
|
|
||||||
requiredBy = [ "caddy.service" ];
|
|
||||||
serviceConfig = {
|
|
||||||
Type = "oneshot";
|
|
||||||
RemainAfterExit = true;
|
|
||||||
ExecStart = lib.getExe generateCaddyAuth;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Load the bcrypt hash as environment variable for Caddy
|
|
||||||
services.caddy.environmentFile = "/run/secrets/caddy-loki-auth.env";
|
|
||||||
|
|
||||||
# Caddy reverse proxy for Loki with basic auth
|
|
||||||
services.caddy.virtualHosts."loki.home.2rjus.net".extraConfig = ''
|
|
||||||
basic_auth {
|
|
||||||
promtail {env.LOKI_PUSH_HASH}
|
|
||||||
}
|
|
||||||
reverse_proxy http://127.0.0.1:3100
|
|
||||||
'';
|
|
||||||
|
|
||||||
services.loki = {
|
|
||||||
enable = true;
|
|
||||||
configuration = {
|
|
||||||
auth_enabled = false;
|
|
||||||
|
|
||||||
server = {
|
|
||||||
http_listen_address = "127.0.0.1";
|
|
||||||
http_listen_port = 3100;
|
|
||||||
};
|
|
||||||
common = {
|
|
||||||
ring = {
|
|
||||||
instance_addr = "127.0.0.1";
|
|
||||||
kvstore = {
|
|
||||||
store = "inmemory";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
replication_factor = 1;
|
|
||||||
path_prefix = "/var/lib/loki";
|
|
||||||
};
|
|
||||||
schema_config = {
|
|
||||||
configs = [
|
|
||||||
{
|
|
||||||
from = "2024-01-01";
|
|
||||||
store = "tsdb";
|
|
||||||
object_store = "filesystem";
|
|
||||||
schema = "v13";
|
|
||||||
index = {
|
|
||||||
prefix = "loki_index_";
|
|
||||||
period = "24h";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
];
|
|
||||||
};
|
|
||||||
storage_config = {
|
|
||||||
filesystem = {
|
|
||||||
directory = "/var/lib/loki/chunks";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
compactor = {
|
|
||||||
working_directory = "/var/lib/loki/compactor";
|
|
||||||
compaction_interval = "10m";
|
|
||||||
retention_enabled = true;
|
|
||||||
retention_delete_delay = "2h";
|
|
||||||
retention_delete_worker_count = 150;
|
|
||||||
delete_request_store = "filesystem";
|
|
||||||
};
|
|
||||||
limits_config = {
|
|
||||||
retention_period = "30d";
|
|
||||||
ingestion_rate_mb = 10;
|
|
||||||
ingestion_burst_size_mb = 20;
|
|
||||||
max_streams_per_user = 10000;
|
|
||||||
max_query_series = 500;
|
|
||||||
max_query_parallelism = 8;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -16,16 +16,6 @@ in
|
|||||||
SystemKeepFree=1G
|
SystemKeepFree=1G
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# Fetch Loki push password from Vault (only on hosts with Vault enabled)
|
|
||||||
vault.secrets.promtail-loki-auth = lib.mkIf config.vault.enable {
|
|
||||||
secretPath = "shared/loki/push-auth";
|
|
||||||
extractKey = "password";
|
|
||||||
owner = "promtail";
|
|
||||||
group = "promtail";
|
|
||||||
services = [ "promtail" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Configure promtail
|
# Configure promtail
|
||||||
services.promtail = {
|
services.promtail = {
|
||||||
enable = true;
|
enable = true;
|
||||||
@@ -41,14 +31,6 @@ in
|
|||||||
{
|
{
|
||||||
url = "http://monitoring01.home.2rjus.net:3100/loki/api/v1/push";
|
url = "http://monitoring01.home.2rjus.net:3100/loki/api/v1/push";
|
||||||
}
|
}
|
||||||
] ++ lib.optionals config.vault.enable [
|
|
||||||
{
|
|
||||||
url = "https://loki.home.2rjus.net/loki/api/v1/push";
|
|
||||||
basic_auth = {
|
|
||||||
username = "promtail";
|
|
||||||
password_file = "/run/secrets/promtail-loki-auth";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
];
|
];
|
||||||
|
|
||||||
scrape_configs = [
|
scrape_configs = [
|
||||||
|
|||||||
@@ -26,17 +26,6 @@ path "secret/data/shared/nixos-exporter/*" {
|
|||||||
EOT
|
EOT
|
||||||
}
|
}
|
||||||
|
|
||||||
# Shared policy for Loki push authentication (all hosts push logs)
|
|
||||||
resource "vault_policy" "loki_push" {
|
|
||||||
name = "loki-push"
|
|
||||||
|
|
||||||
policy = <<EOT
|
|
||||||
path "secret/data/shared/loki/*" {
|
|
||||||
capabilities = ["read", "list"]
|
|
||||||
}
|
|
||||||
EOT
|
|
||||||
}
|
|
||||||
|
|
||||||
# Define host access policies
|
# Define host access policies
|
||||||
locals {
|
locals {
|
||||||
host_policies = {
|
host_policies = {
|
||||||
@@ -89,7 +78,7 @@ locals {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
# Wave 3: DNS servers (managed in hosts-generated.tf)
|
# Wave 3: DNS servers
|
||||||
|
|
||||||
# Wave 4: http-proxy
|
# Wave 4: http-proxy
|
||||||
"http-proxy" = {
|
"http-proxy" = {
|
||||||
@@ -149,7 +138,7 @@ resource "vault_approle_auth_backend_role" "hosts" {
|
|||||||
backend = vault_auth_backend.approle.path
|
backend = vault_auth_backend.approle.path
|
||||||
role_name = each.key
|
role_name = each.key
|
||||||
token_policies = concat(
|
token_policies = concat(
|
||||||
["${each.key}-policy", "homelab-deploy", "nixos-exporter", "loki-push"],
|
["${each.key}-policy", "homelab-deploy", "nixos-exporter"],
|
||||||
lookup(each.value, "extra_policies", [])
|
lookup(each.value, "extra_policies", [])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ resource "vault_approle_auth_backend_role" "generated_hosts" {
|
|||||||
|
|
||||||
backend = vault_auth_backend.approle.path
|
backend = vault_auth_backend.approle.path
|
||||||
role_name = each.key
|
role_name = each.key
|
||||||
token_policies = ["host-${each.key}", "homelab-deploy", "nixos-exporter", "loki-push"]
|
token_policies = ["host-${each.key}", "homelab-deploy", "nixos-exporter"]
|
||||||
secret_id_ttl = 0 # Never expire (wrapped tokens provide time limit)
|
secret_id_ttl = 0 # Never expire (wrapped tokens provide time limit)
|
||||||
token_ttl = 3600
|
token_ttl = 3600
|
||||||
token_max_ttl = 3600
|
token_max_ttl = 3600
|
||||||
|
|||||||
@@ -153,12 +153,6 @@ locals {
|
|||||||
auto_generate = true
|
auto_generate = true
|
||||||
password_length = 64
|
password_length = 64
|
||||||
}
|
}
|
||||||
|
|
||||||
# Loki push authentication (used by Promtail on all hosts)
|
|
||||||
"shared/loki/push-auth" = {
|
|
||||||
auto_generate = true
|
|
||||||
password_length = 32
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user