monitoring01: remove host and migrate services to monitoring02 #43
@@ -130,7 +130,7 @@ get_commit_info(<hash>) # Get full details of a specific change
|
||||
```
|
||||
|
||||
**Example workflow for a service-related alert:**
|
||||
1. Query `nixos_flake_info{hostname="monitoring01"}` → `current_rev: 8959829`
|
||||
1. Query `nixos_flake_info{hostname="monitoring02"}` → `current_rev: 8959829`
|
||||
2. `resolve_ref("master")` → `4633421`
|
||||
3. `is_ancestor("8959829", "4633421")` → Yes, host is behind
|
||||
4. `commits_between("8959829", "4633421")` → 7 commits missing
|
||||
|
||||
@@ -30,7 +30,7 @@ Use the `lab-monitoring` MCP server tools:
|
||||
### Label Reference
|
||||
|
||||
Available labels for log queries:
|
||||
- `hostname` - Hostname (e.g., `ns1`, `monitoring01`, `ha1`) - matches the Prometheus `hostname` label
|
||||
- `hostname` - Hostname (e.g., `ns1`, `monitoring02`, `ha1`) - matches the Prometheus `hostname` label
|
||||
- `systemd_unit` - Systemd unit name (e.g., `nsd.service`, `nixos-upgrade.service`)
|
||||
- `job` - Either `systemd-journal` (most logs), `varlog` (file-based logs), or `bootstrap` (VM bootstrap logs)
|
||||
- `filename` - For `varlog` job, the log file path
|
||||
@@ -54,7 +54,7 @@ Journal logs are JSON-formatted. Key fields:
|
||||
|
||||
**All logs from a host:**
|
||||
```logql
|
||||
{hostname="monitoring01"}
|
||||
{hostname="monitoring02"}
|
||||
```
|
||||
|
||||
**Logs from a service across all hosts:**
|
||||
@@ -74,7 +74,7 @@ Journal logs are JSON-formatted. Key fields:
|
||||
|
||||
**Regex matching:**
|
||||
```logql
|
||||
{systemd_unit="prometheus.service"} |~ "scrape.*failed"
|
||||
{systemd_unit="victoriametrics.service"} |~ "scrape.*failed"
|
||||
```
|
||||
|
||||
**Filter by level (journal scrape only):**
|
||||
@@ -109,7 +109,7 @@ Default lookback is 1 hour. Use `start` parameter for older logs:
|
||||
Useful systemd units for troubleshooting:
|
||||
- `nixos-upgrade.service` - Daily auto-upgrade logs
|
||||
- `nsd.service` - DNS server (ns1/ns2)
|
||||
- `prometheus.service` - Metrics collection
|
||||
- `victoriametrics.service` - Metrics collection
|
||||
- `loki.service` - Log aggregation
|
||||
- `caddy.service` - Reverse proxy
|
||||
- `home-assistant.service` - Home automation
|
||||
@@ -152,7 +152,7 @@ VMs provisioned from template2 send bootstrap progress directly to Loki via curl
|
||||
|
||||
Parse JSON and filter on fields:
|
||||
```logql
|
||||
{systemd_unit="prometheus.service"} | json | PRIORITY="3"
|
||||
{systemd_unit="victoriametrics.service"} | json | PRIORITY="3"
|
||||
```
|
||||
|
||||
---
|
||||
@@ -242,12 +242,11 @@ All available Prometheus job names:
|
||||
- `unbound` - DNS resolver metrics (ns1, ns2)
|
||||
- `wireguard` - VPN tunnel metrics (http-proxy)
|
||||
|
||||
**Monitoring stack (localhost on monitoring01):**
|
||||
- `prometheus` - Prometheus self-metrics
|
||||
**Monitoring stack (localhost on monitoring02):**
|
||||
- `victoriametrics` - VictoriaMetrics self-metrics
|
||||
- `loki` - Loki self-metrics
|
||||
- `grafana` - Grafana self-metrics
|
||||
- `alertmanager` - Alertmanager metrics
|
||||
- `pushgateway` - Push-based metrics gateway
|
||||
|
||||
**External/infrastructure:**
|
||||
- `pve-exporter` - Proxmox hypervisor metrics
|
||||
@@ -262,7 +261,7 @@ All scrape targets have these labels:
|
||||
**Standard labels:**
|
||||
- `instance` - Full target address (`<hostname>.home.2rjus.net:<port>`)
|
||||
- `job` - Job name (e.g., `node-exporter`, `unbound`, `nixos-exporter`)
|
||||
- `hostname` - Short hostname (e.g., `ns1`, `monitoring01`) - use this for host filtering
|
||||
- `hostname` - Short hostname (e.g., `ns1`, `monitoring02`) - use this for host filtering
|
||||
|
||||
**Host metadata labels** (when configured in `homelab.host`):
|
||||
- `role` - Host role (e.g., `dns`, `build-host`, `vault`)
|
||||
@@ -275,7 +274,7 @@ Use the `hostname` label for easy host filtering across all jobs:
|
||||
|
||||
```promql
|
||||
{hostname="ns1"} # All metrics from ns1
|
||||
node_load1{hostname="monitoring01"} # Specific metric by hostname
|
||||
node_load1{hostname="monitoring02"} # Specific metric by hostname
|
||||
up{hostname="ha1"} # Check if ha1 is up
|
||||
```
|
||||
|
||||
@@ -283,10 +282,10 @@ This is simpler than wildcarding the `instance` label:
|
||||
|
||||
```promql
|
||||
# Old way (still works but verbose)
|
||||
up{instance=~"monitoring01.*"}
|
||||
up{instance=~"monitoring02.*"}
|
||||
|
||||
# New way (preferred)
|
||||
up{hostname="monitoring01"}
|
||||
up{hostname="monitoring02"}
|
||||
```
|
||||
|
||||
### Filtering by Role/Tier
|
||||
|
||||
20
CLAUDE.md
20
CLAUDE.md
@@ -247,7 +247,7 @@ nix develop -c homelab-deploy -- deploy \
|
||||
deploy.prod.<hostname>
|
||||
```
|
||||
|
||||
Subject format: `deploy.<tier>.<hostname>` (e.g., `deploy.prod.monitoring01`, `deploy.test.testvm01`)
|
||||
Subject format: `deploy.<tier>.<hostname>` (e.g., `deploy.prod.monitoring02`, `deploy.test.testvm01`)
|
||||
|
||||
**Verifying Deployments:**
|
||||
|
||||
@@ -309,7 +309,7 @@ All hosts automatically get:
|
||||
- OpenBao (Vault) secrets management via AppRole
|
||||
- Internal ACME CA integration (OpenBao PKI at vault.home.2rjus.net)
|
||||
- Daily auto-upgrades with auto-reboot
|
||||
- Prometheus node-exporter + Promtail (logs to monitoring01)
|
||||
- Prometheus node-exporter + Promtail (logs to monitoring02)
|
||||
- Monitoring scrape target auto-registration via `homelab.monitoring` options
|
||||
- Custom root CA trust
|
||||
- DNS zone auto-registration via `homelab.dns` options
|
||||
@@ -335,7 +335,7 @@ Use `nix flake show` or `nix develop -c ansible-inventory --graph` to list all h
|
||||
- Infrastructure subnet: `10.69.13.x`
|
||||
- DNS: ns1/ns2 provide authoritative DNS with primary-secondary setup
|
||||
- Internal CA for ACME certificates (no Let's Encrypt)
|
||||
- Centralized monitoring at monitoring01
|
||||
- Centralized monitoring at monitoring02
|
||||
- Static networking via systemd-networkd
|
||||
|
||||
### Secrets Management
|
||||
@@ -480,23 +480,21 @@ See [docs/host-creation.md](docs/host-creation.md) for the complete host creatio
|
||||
|
||||
### Monitoring Stack
|
||||
|
||||
All hosts ship metrics and logs to `monitoring01`:
|
||||
- **Metrics**: Prometheus scrapes node-exporter from all hosts
|
||||
- **Logs**: Promtail ships logs to Loki on monitoring01
|
||||
- **Access**: Grafana at monitoring01 for visualization
|
||||
- **Tracing**: Tempo for distributed tracing
|
||||
- **Profiling**: Pyroscope for continuous profiling
|
||||
All hosts ship metrics and logs to `monitoring02`:
|
||||
- **Metrics**: VictoriaMetrics scrapes node-exporter from all hosts
|
||||
- **Logs**: Promtail ships logs to Loki on monitoring02
|
||||
- **Access**: Grafana at monitoring02 for visualization
|
||||
|
||||
**Scrape Target Auto-Generation:**
|
||||
|
||||
Prometheus scrape targets are automatically generated from host configurations, following the same pattern as DNS zone generation:
|
||||
VictoriaMetrics scrape targets are automatically generated from host configurations, following the same pattern as DNS zone generation:
|
||||
|
||||
- **Node-exporter**: All flake hosts with static IPs are automatically added as node-exporter targets
|
||||
- **Service targets**: Defined via `homelab.monitoring.scrapeTargets` in service modules
|
||||
- **External targets**: Non-flake hosts defined in `/services/monitoring/external-targets.nix`
|
||||
- **Library**: `lib/monitoring.nix` provides `generateNodeExporterTargets` and `generateScrapeConfigs`
|
||||
|
||||
Service modules declare their scrape targets directly via `homelab.monitoring.scrapeTargets`. The Prometheus config on monitoring01 auto-generates scrape configs from all hosts. See "Homelab Module Options" section for available options.
|
||||
Service modules declare their scrape targets directly via `homelab.monitoring.scrapeTargets`. The VictoriaMetrics config on monitoring02 auto-generates scrape configs from all hosts. See "Homelab Module Options" section for available options.
|
||||
|
||||
To add monitoring targets for non-NixOS hosts, edit `/services/monitoring/external-targets.nix`.
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ NixOS Flake-based configuration repository for a homelab infrastructure. All hos
|
||||
| `ca` | Internal Certificate Authority |
|
||||
| `ha1` | Home Assistant + Zigbee2MQTT + Mosquitto |
|
||||
| `http-proxy` | Reverse proxy |
|
||||
| `monitoring01` | Prometheus, Grafana, Loki, Tempo, Pyroscope |
|
||||
| `monitoring02` | VictoriaMetrics, Grafana, Loki, Alertmanager |
|
||||
| `jelly01` | Jellyfin media server |
|
||||
| `nix-cache02` | Nix binary cache + NATS-based build service |
|
||||
| `nats1` | NATS messaging |
|
||||
@@ -121,4 +121,4 @@ No manual intervention is required after `tofu apply`.
|
||||
- Infrastructure subnet: `10.69.13.0/24`
|
||||
- DNS: ns1/ns2 authoritative with primary-secondary AXFR
|
||||
- Internal CA for TLS certificates (migrating from step-ca to OpenBao PKI)
|
||||
- Centralized monitoring at monitoring01
|
||||
- Centralized monitoring at monitoring02
|
||||
|
||||
@@ -92,15 +92,6 @@
|
||||
./hosts/http-proxy
|
||||
];
|
||||
};
|
||||
monitoring01 = nixpkgs.lib.nixosSystem {
|
||||
inherit system;
|
||||
specialArgs = {
|
||||
inherit inputs self;
|
||||
};
|
||||
modules = commonModules ++ [
|
||||
./hosts/monitoring01
|
||||
];
|
||||
};
|
||||
jelly01 = nixpkgs.lib.nixosSystem {
|
||||
inherit system;
|
||||
specialArgs = {
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
"ha"
|
||||
"z2m"
|
||||
"jelly"
|
||||
"pyroscope"
|
||||
"pushgw"
|
||||
];
|
||||
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
{
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
{
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
|
||||
../../system
|
||||
../../common/vm
|
||||
];
|
||||
|
||||
homelab.host.role = "monitoring";
|
||||
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
# Use the systemd-boot EFI boot loader.
|
||||
boot.loader.grub = {
|
||||
enable = true;
|
||||
device = "/dev/sda";
|
||||
configurationLimit = 3;
|
||||
};
|
||||
|
||||
networking.hostName = "monitoring01";
|
||||
networking.domain = "home.2rjus.net";
|
||||
networking.useNetworkd = true;
|
||||
networking.useDHCP = false;
|
||||
services.resolved.enable = true;
|
||||
networking.nameservers = [
|
||||
"10.69.13.5"
|
||||
"10.69.13.6"
|
||||
];
|
||||
|
||||
systemd.network.enable = true;
|
||||
systemd.network.networks."ens18" = {
|
||||
matchConfig.Name = "ens18";
|
||||
address = [
|
||||
"10.69.13.13/24"
|
||||
];
|
||||
routes = [
|
||||
{ Gateway = "10.69.13.1"; }
|
||||
];
|
||||
linkConfig.RequiredForOnline = "routable";
|
||||
};
|
||||
time.timeZone = "Europe/Oslo";
|
||||
|
||||
nix.settings.experimental-features = [
|
||||
"nix-command"
|
||||
"flakes"
|
||||
];
|
||||
nix.settings.tarball-ttl = 0;
|
||||
environment.systemPackages = with pkgs; [
|
||||
vim
|
||||
wget
|
||||
git
|
||||
sqlite
|
||||
];
|
||||
|
||||
services.qemuGuest.enable = true;
|
||||
|
||||
# Vault secrets management
|
||||
vault.enable = true;
|
||||
homelab.deploy.enable = true;
|
||||
vault.secrets.backup-helper = {
|
||||
secretPath = "shared/backup/password";
|
||||
extractKey = "password";
|
||||
outputDir = "/run/secrets/backup_helper_secret";
|
||||
services = [ "restic-backups-grafana" "restic-backups-grafana-db" ];
|
||||
};
|
||||
|
||||
services.restic.backups.grafana = {
|
||||
repository = "rest:http://10.69.12.52:8000/backup-nix";
|
||||
passwordFile = "/run/secrets/backup_helper_secret";
|
||||
paths = [ "/var/lib/grafana/plugins" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "daily";
|
||||
Persistent = true;
|
||||
RandomizedDelaySec = "2h";
|
||||
};
|
||||
pruneOpts = [
|
||||
"--keep-daily 7"
|
||||
"--keep-weekly 4"
|
||||
"--keep-monthly 6"
|
||||
"--keep-within 1d"
|
||||
];
|
||||
extraOptions = [ "--retry-lock=5m" ];
|
||||
};
|
||||
|
||||
services.restic.backups.grafana-db = {
|
||||
repository = "rest:http://10.69.12.52:8000/backup-nix";
|
||||
passwordFile = "/run/secrets/backup_helper_secret";
|
||||
command = [ "${pkgs.sqlite}/bin/sqlite3" "/var/lib/grafana/data/grafana.db" ".dump" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "daily";
|
||||
Persistent = true;
|
||||
RandomizedDelaySec = "2h";
|
||||
};
|
||||
pruneOpts = [
|
||||
"--keep-daily 7"
|
||||
"--keep-weekly 4"
|
||||
"--keep-monthly 6"
|
||||
"--keep-within 1d"
|
||||
];
|
||||
extraOptions = [ "--retry-lock=5m" ];
|
||||
};
|
||||
|
||||
# Open ports in the firewall.
|
||||
# networking.firewall.allowedTCPPorts = [ ... ];
|
||||
# networking.firewall.allowedUDPPorts = [ ... ];
|
||||
# Or disable the firewall altogether.
|
||||
networking.firewall.enable = false;
|
||||
|
||||
system.stateVersion = "23.11"; # Did you read the comment?
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
{ ... }:
|
||||
{
|
||||
imports = [
|
||||
./configuration.nix
|
||||
../../services/monitoring
|
||||
];
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
modulesPath,
|
||||
...
|
||||
}:
|
||||
|
||||
{
|
||||
imports = [
|
||||
(modulesPath + "/profiles/qemu-guest.nix")
|
||||
];
|
||||
boot.initrd.availableKernelModules = [
|
||||
"ata_piix"
|
||||
"uhci_hcd"
|
||||
"virtio_pci"
|
||||
"virtio_scsi"
|
||||
"sd_mod"
|
||||
"sr_mod"
|
||||
];
|
||||
boot.initrd.kernelModules = [ "dm-snapshot" ];
|
||||
boot.kernelModules = [
|
||||
"ptp_kvm"
|
||||
];
|
||||
boot.extraModulePackages = [ ];
|
||||
|
||||
fileSystems."/" = {
|
||||
device = "/dev/disk/by-label/root";
|
||||
fsType = "xfs";
|
||||
};
|
||||
|
||||
swapDevices = [ { device = "/dev/disk/by-label/swap"; } ];
|
||||
|
||||
# Enables DHCP on each ethernet and wireless interface. In case of scripted networking
|
||||
# (the default) this is the recommended approach. When using systemd-networkd it's
|
||||
# still possible to use this option, but it's recommended to use it in conjunction
|
||||
# with explicit per-interface declarations with `networking.interfaces.<interface>.useDHCP`.
|
||||
networking.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.ens18.useDHCP = lib.mkDefault true;
|
||||
|
||||
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
|
||||
}
|
||||
@@ -5,5 +5,8 @@
|
||||
../../services/victoriametrics
|
||||
../../services/loki
|
||||
../../services/monitoring/alerttonotify.nix
|
||||
../../services/monitoring/blackbox.nix
|
||||
../../services/monitoring/exportarr.nix
|
||||
../../services/monitoring/pve.nix
|
||||
];
|
||||
}
|
||||
@@ -20,10 +20,10 @@ vault-fetch <secret-path> <output-directory> [cache-directory]
|
||||
|
||||
```bash
|
||||
# Fetch Grafana admin secrets
|
||||
vault-fetch hosts/monitoring01/grafana-admin /run/secrets/grafana /var/lib/vault/cache/grafana
|
||||
vault-fetch hosts/ha1/mqtt-password /run/secrets/grafana /var/lib/vault/cache/grafana
|
||||
|
||||
# Use default cache location
|
||||
vault-fetch hosts/monitoring01/grafana-admin /run/secrets/grafana
|
||||
vault-fetch hosts/ha1/mqtt-password /run/secrets/grafana
|
||||
```
|
||||
|
||||
## How It Works
|
||||
@@ -53,13 +53,13 @@ If Vault is unreachable or authentication fails:
|
||||
This tool is designed to be called from systemd service `ExecStartPre` hooks via the `vault.secrets` NixOS module:
|
||||
|
||||
```nix
|
||||
vault.secrets.grafana-admin = {
|
||||
secretPath = "hosts/monitoring01/grafana-admin";
|
||||
vault.secrets.mqtt-password = {
|
||||
secretPath = "hosts/ha1/mqtt-password";
|
||||
};
|
||||
|
||||
# Service automatically gets secrets fetched before start
|
||||
systemd.services.grafana.serviceConfig = {
|
||||
EnvironmentFile = "/run/secrets/grafana-admin/password";
|
||||
systemd.services.mosquitto.serviceConfig = {
|
||||
EnvironmentFile = "/run/secrets/mqtt-password/password";
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ set -euo pipefail
|
||||
#
|
||||
# Usage: vault-fetch <secret-path> <output-directory> [cache-directory]
|
||||
#
|
||||
# Example: vault-fetch hosts/monitoring01/grafana-admin /run/secrets/grafana /var/lib/vault/cache/grafana
|
||||
# Example: vault-fetch hosts/ha1/mqtt-password /run/secrets/grafana /var/lib/vault/cache/grafana
|
||||
#
|
||||
# This script:
|
||||
# 1. Authenticates to Vault using AppRole credentials from /var/lib/vault/approle/
|
||||
@@ -17,7 +17,7 @@ set -euo pipefail
|
||||
# Parse arguments
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: vault-fetch <secret-path> <output-directory> [cache-directory]" >&2
|
||||
echo "Example: vault-fetch hosts/monitoring01/grafana /run/secrets/grafana /var/lib/vault/cache/grafana" >&2
|
||||
echo "Example: vault-fetch hosts/ha1/mqtt-password /run/secrets/grafana /var/lib/vault/cache/grafana" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@@ -45,12 +45,6 @@
|
||||
isDefault = true;
|
||||
uid = "victoriametrics";
|
||||
}
|
||||
{
|
||||
name = "Prometheus (monitoring01)";
|
||||
type = "prometheus";
|
||||
url = "http://monitoring01.home.2rjus.net:9090";
|
||||
uid = "prometheus";
|
||||
}
|
||||
{
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
|
||||
@@ -63,22 +63,6 @@
|
||||
}
|
||||
reverse_proxy http://jelly01.home.2rjus.net:8096
|
||||
}
|
||||
pyroscope.home.2rjus.net {
|
||||
log {
|
||||
output file /var/log/caddy/pyroscope.log {
|
||||
mode 644
|
||||
}
|
||||
}
|
||||
reverse_proxy http://monitoring01.home.2rjus.net:4040
|
||||
}
|
||||
pushgw.home.2rjus.net {
|
||||
log {
|
||||
output file /var/log/caddy/pushgw.log {
|
||||
mode 644
|
||||
}
|
||||
}
|
||||
reverse_proxy http://monitoring01.home.2rjus.net:9091
|
||||
}
|
||||
http://http-proxy.home.2rjus.net/metrics {
|
||||
log {
|
||||
output file /var/log/caddy/caddy-metrics.log {
|
||||
|
||||
@@ -1,33 +1,4 @@
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
# TLS endpoints to monitor for certificate expiration
|
||||
# These are all services using ACME certificates from OpenBao PKI
|
||||
tlsTargets = [
|
||||
# Direct ACME certs (security.acme.certs)
|
||||
"https://vault.home.2rjus.net:8200"
|
||||
"https://auth.home.2rjus.net"
|
||||
"https://testvm01.home.2rjus.net"
|
||||
|
||||
# Caddy auto-TLS on http-proxy
|
||||
"https://nzbget.home.2rjus.net"
|
||||
"https://radarr.home.2rjus.net"
|
||||
"https://sonarr.home.2rjus.net"
|
||||
"https://ha.home.2rjus.net"
|
||||
"https://z2m.home.2rjus.net"
|
||||
"https://prometheus.home.2rjus.net"
|
||||
"https://alertmanager.home.2rjus.net"
|
||||
"https://grafana.home.2rjus.net"
|
||||
"https://jelly.home.2rjus.net"
|
||||
"https://pyroscope.home.2rjus.net"
|
||||
"https://pushgw.home.2rjus.net"
|
||||
|
||||
# Caddy auto-TLS on nix-cache02
|
||||
"https://nix-cache.home.2rjus.net"
|
||||
|
||||
# Caddy auto-TLS on grafana01
|
||||
"https://grafana-test.home.2rjus.net"
|
||||
];
|
||||
in
|
||||
{
|
||||
services.prometheus.exporters.blackbox = {
|
||||
enable = true;
|
||||
@@ -57,36 +28,4 @@ in
|
||||
- 503
|
||||
'';
|
||||
};
|
||||
|
||||
# Add blackbox scrape config to Prometheus
|
||||
# Alert rules are in rules.yml (certificate_rules group)
|
||||
services.prometheus.scrapeConfigs = [
|
||||
{
|
||||
job_name = "blackbox_tls";
|
||||
metrics_path = "/probe";
|
||||
params = {
|
||||
module = [ "https_cert" ];
|
||||
};
|
||||
static_configs = [{
|
||||
targets = tlsTargets;
|
||||
}];
|
||||
relabel_configs = [
|
||||
# Pass the target URL to blackbox as a parameter
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
# Use the target URL as the instance label
|
||||
{
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
# Point the actual scrape at the local blackbox exporter
|
||||
{
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:9115";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
{ ... }:
|
||||
{
|
||||
imports = [
|
||||
./loki.nix
|
||||
./grafana.nix
|
||||
./prometheus.nix
|
||||
./blackbox.nix
|
||||
./exportarr.nix
|
||||
./pve.nix
|
||||
./alerttonotify.nix
|
||||
./pyroscope.nix
|
||||
./tempo.nix
|
||||
];
|
||||
}
|
||||
@@ -14,14 +14,4 @@
|
||||
apiKeyFile = config.vault.secrets.sonarr-api-key.outputDir;
|
||||
port = 9709;
|
||||
};
|
||||
|
||||
# Scrape config
|
||||
services.prometheus.scrapeConfigs = [
|
||||
{
|
||||
job_name = "sonarr";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:9709" ];
|
||||
}];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
server = {
|
||||
http_addr = "";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
{ ... }:
|
||||
{
|
||||
services.loki = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
auth_enabled = false;
|
||||
|
||||
server = {
|
||||
http_listen_port = 3100;
|
||||
};
|
||||
common = {
|
||||
ring = {
|
||||
instance_addr = "127.0.0.1";
|
||||
kvstore = {
|
||||
store = "inmemory";
|
||||
};
|
||||
};
|
||||
replication_factor = 1;
|
||||
path_prefix = "/var/lib/loki";
|
||||
};
|
||||
schema_config = {
|
||||
configs = [
|
||||
{
|
||||
from = "2024-01-01";
|
||||
store = "tsdb";
|
||||
object_store = "filesystem";
|
||||
schema = "v13";
|
||||
index = {
|
||||
prefix = "loki_index_";
|
||||
period = "24h";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
storage_config = {
|
||||
filesystem = {
|
||||
directory = "/var/lib/loki/chunks";
|
||||
};
|
||||
};
|
||||
compactor = {
|
||||
working_directory = "/var/lib/loki/compactor";
|
||||
compaction_interval = "10m";
|
||||
retention_enabled = true;
|
||||
retention_delete_delay = "2h";
|
||||
retention_delete_worker_count = 150;
|
||||
delete_request_store = "filesystem";
|
||||
};
|
||||
limits_config = {
|
||||
retention_period = "30d";
|
||||
ingestion_rate_mb = 10;
|
||||
ingestion_burst_size_mb = 20;
|
||||
max_streams_per_user = 10000;
|
||||
max_query_series = 500;
|
||||
max_query_parallelism = 8;
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,267 +0,0 @@
|
||||
{ self, lib, pkgs, ... }:
|
||||
let
|
||||
monLib = import ../../lib/monitoring.nix { inherit lib; };
|
||||
externalTargets = import ./external-targets.nix;
|
||||
|
||||
nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets;
|
||||
autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets;
|
||||
|
||||
# Script to fetch AppRole token for Prometheus to use when scraping OpenBao metrics
|
||||
fetchOpenbaoToken = pkgs.writeShellApplication {
|
||||
name = "fetch-openbao-token";
|
||||
runtimeInputs = [ pkgs.curl pkgs.jq ];
|
||||
text = ''
|
||||
VAULT_ADDR="https://vault01.home.2rjus.net:8200"
|
||||
APPROLE_DIR="/var/lib/vault/approle"
|
||||
OUTPUT_FILE="/run/secrets/prometheus/openbao-token"
|
||||
|
||||
# Read AppRole credentials
|
||||
if [ ! -f "$APPROLE_DIR/role-id" ] || [ ! -f "$APPROLE_DIR/secret-id" ]; then
|
||||
echo "AppRole credentials not found at $APPROLE_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ROLE_ID=$(cat "$APPROLE_DIR/role-id")
|
||||
SECRET_ID=$(cat "$APPROLE_DIR/secret-id")
|
||||
|
||||
# Authenticate to Vault
|
||||
AUTH_RESPONSE=$(curl -sf -k -X POST \
|
||||
-d "{\"role_id\":\"$ROLE_ID\",\"secret_id\":\"$SECRET_ID\"}" \
|
||||
"$VAULT_ADDR/v1/auth/approle/login")
|
||||
|
||||
# Extract token
|
||||
VAULT_TOKEN=$(echo "$AUTH_RESPONSE" | jq -r '.auth.client_token')
|
||||
if [ -z "$VAULT_TOKEN" ] || [ "$VAULT_TOKEN" = "null" ]; then
|
||||
echo "Failed to extract Vault token from response" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Write token to file
|
||||
mkdir -p "$(dirname "$OUTPUT_FILE")"
|
||||
echo -n "$VAULT_TOKEN" > "$OUTPUT_FILE"
|
||||
chown prometheus:prometheus "$OUTPUT_FILE"
|
||||
chmod 0400 "$OUTPUT_FILE"
|
||||
|
||||
echo "Successfully fetched OpenBao token"
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
# Systemd service to fetch AppRole token for Prometheus OpenBao scraping
|
||||
# The token is used to authenticate when scraping /v1/sys/metrics
|
||||
systemd.services.prometheus-openbao-token = {
|
||||
description = "Fetch OpenBao token for Prometheus metrics scraping";
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
before = [ "prometheus.service" ];
|
||||
requiredBy = [ "prometheus.service" ];
|
||||
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = lib.getExe fetchOpenbaoToken;
|
||||
};
|
||||
};
|
||||
|
||||
# Timer to periodically refresh the token (AppRole tokens have 1-hour TTL)
|
||||
systemd.timers.prometheus-openbao-token = {
|
||||
description = "Refresh OpenBao token for Prometheus";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnBootSec = "5min";
|
||||
OnUnitActiveSec = "30min";
|
||||
RandomizedDelaySec = "5min";
|
||||
};
|
||||
};
|
||||
|
||||
# Fetch apiary bearer token from Vault
|
||||
vault.secrets.prometheus-apiary-token = {
|
||||
secretPath = "hosts/monitoring01/apiary-token";
|
||||
extractKey = "password";
|
||||
owner = "prometheus";
|
||||
group = "prometheus";
|
||||
services = [ "prometheus" ];
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
# syntax-only check because we use external credential files (e.g., openbao-token)
|
||||
checkConfig = "syntax-only";
|
||||
alertmanager = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
global = {
|
||||
};
|
||||
route = {
|
||||
receiver = "webhook_natstonotify";
|
||||
group_wait = "30s";
|
||||
group_interval = "5m";
|
||||
repeat_interval = "1h";
|
||||
group_by = [ "alertname" ];
|
||||
};
|
||||
receivers = [
|
||||
{
|
||||
name = "webhook_natstonotify";
|
||||
webhook_configs = [
|
||||
{
|
||||
url = "http://localhost:5001/alert";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
alertmanagers = [
|
||||
{
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:9093" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
retentionTime = "30d";
|
||||
globalConfig = {
|
||||
scrape_interval = "15s";
|
||||
};
|
||||
rules = [
|
||||
(builtins.readFile ./rules.yml)
|
||||
];
|
||||
|
||||
scrapeConfigs = [
|
||||
# Auto-generated node-exporter targets from flake hosts + external
|
||||
# Each static_config entry may have labels from homelab.host metadata
|
||||
{
|
||||
job_name = "node-exporter";
|
||||
static_configs = nodeExporterTargets;
|
||||
}
|
||||
# Systemd exporter on all hosts (same targets, different port)
|
||||
# Preserves the same label grouping as node-exporter
|
||||
{
|
||||
job_name = "systemd-exporter";
|
||||
static_configs = map
|
||||
(cfg: cfg // {
|
||||
targets = map (t: builtins.replaceStrings [ ":9100" ] [ ":9558" ] t) cfg.targets;
|
||||
})
|
||||
nodeExporterTargets;
|
||||
}
|
||||
# Local monitoring services (not auto-generated)
|
||||
{
|
||||
job_name = "prometheus";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:9090" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "loki";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:3100" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "grafana";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:3000" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "alertmanager";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:9093" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "pushgateway";
|
||||
honor_labels = true;
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:9091" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
# Caddy metrics from nix-cache02 (serves nix-cache.home.2rjus.net)
|
||||
{
|
||||
job_name = "nix-cache_caddy";
|
||||
scheme = "https";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "nix-cache.home.2rjus.net" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
# pve-exporter with complex relabel config
|
||||
{
|
||||
job_name = "pve-exporter";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "10.69.12.75" ];
|
||||
}
|
||||
];
|
||||
metrics_path = "/pve";
|
||||
params = {
|
||||
module = [ "default" ];
|
||||
cluster = [ "1" ];
|
||||
node = [ "1" ];
|
||||
};
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:9221";
|
||||
}
|
||||
];
|
||||
}
|
||||
# OpenBao metrics with bearer token auth
|
||||
{
|
||||
job_name = "openbao";
|
||||
scheme = "https";
|
||||
metrics_path = "/v1/sys/metrics";
|
||||
params = {
|
||||
format = [ "prometheus" ];
|
||||
};
|
||||
static_configs = [{
|
||||
targets = [ "vault01.home.2rjus.net:8200" ];
|
||||
}];
|
||||
authorization = {
|
||||
type = "Bearer";
|
||||
credentials_file = "/run/secrets/prometheus/openbao-token";
|
||||
};
|
||||
}
|
||||
# Apiary external service
|
||||
{
|
||||
job_name = "apiary";
|
||||
scheme = "https";
|
||||
scrape_interval = "60s";
|
||||
static_configs = [{
|
||||
targets = [ "apiary.t-juice.club" ];
|
||||
}];
|
||||
authorization = {
|
||||
type = "Bearer";
|
||||
credentials_file = "/run/secrets/prometheus-apiary-token";
|
||||
};
|
||||
}
|
||||
] ++ autoScrapeConfigs;
|
||||
|
||||
pushgateway = {
|
||||
enable = true;
|
||||
web = {
|
||||
external-url = "https://pushgw.home.2rjus.net";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
{ config, ... }:
|
||||
{
|
||||
vault.secrets.pve-exporter = {
|
||||
secretPath = "hosts/monitoring01/pve-exporter";
|
||||
secretPath = "hosts/monitoring02/pve-exporter";
|
||||
extractKey = "config";
|
||||
outputDir = "/run/secrets/pve_exporter";
|
||||
mode = "0444";
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{ ... }:
|
||||
{
|
||||
virtualisation.oci-containers.containers.pyroscope = {
|
||||
pull = "missing";
|
||||
image = "grafana/pyroscope:latest";
|
||||
ports = [ "4040:4040" ];
|
||||
};
|
||||
}
|
||||
@@ -259,32 +259,32 @@ groups:
|
||||
description: "Wireguard handshake timeout on {{ $labels.instance }} for peer {{ $labels.public_key }}."
|
||||
- name: monitoring_rules
|
||||
rules:
|
||||
- alert: prometheus_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="prometheus.service", state="active"} == 0
|
||||
- alert: victoriametrics_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring02.home.2rjus.net:9100", name="victoriametrics.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Prometheus service not running on {{ $labels.instance }}"
|
||||
description: "Prometheus service not running on {{ $labels.instance }}"
|
||||
summary: "VictoriaMetrics service not running on {{ $labels.instance }}"
|
||||
description: "VictoriaMetrics service not running on {{ $labels.instance }}"
|
||||
- alert: vmalert_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring02.home.2rjus.net:9100", name="vmalert.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "vmalert service not running on {{ $labels.instance }}"
|
||||
description: "vmalert service not running on {{ $labels.instance }}"
|
||||
- alert: alertmanager_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="alertmanager.service", state="active"} == 0
|
||||
expr: node_systemd_unit_state{instance="monitoring02.home.2rjus.net:9100", name="alertmanager.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Alertmanager service not running on {{ $labels.instance }}"
|
||||
description: "Alertmanager service not running on {{ $labels.instance }}"
|
||||
- alert: pushgateway_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="pushgateway.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Pushgateway service not running on {{ $labels.instance }}"
|
||||
description: "Pushgateway service not running on {{ $labels.instance }}"
|
||||
- alert: loki_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="loki.service", state="active"} == 0
|
||||
expr: node_systemd_unit_state{instance="monitoring02.home.2rjus.net:9100", name="loki.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -292,29 +292,13 @@ groups:
|
||||
summary: "Loki service not running on {{ $labels.instance }}"
|
||||
description: "Loki service not running on {{ $labels.instance }}"
|
||||
- alert: grafana_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="grafana.service", state="active"} == 0
|
||||
expr: node_systemd_unit_state{instance="monitoring02.home.2rjus.net:9100", name="grafana.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Grafana service not running on {{ $labels.instance }}"
|
||||
description: "Grafana service not running on {{ $labels.instance }}"
|
||||
- alert: tempo_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="tempo.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Tempo service not running on {{ $labels.instance }}"
|
||||
description: "Tempo service not running on {{ $labels.instance }}"
|
||||
- alert: pyroscope_not_running
|
||||
expr: node_systemd_unit_state{instance="monitoring01.home.2rjus.net:9100", name="podman-pyroscope.service", state="active"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Pyroscope service not running on {{ $labels.instance }}"
|
||||
description: "Pyroscope service not running on {{ $labels.instance }}"
|
||||
- name: proxmox_rules
|
||||
rules:
|
||||
- alert: pve_node_down
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
{ ... }:
|
||||
{
|
||||
services.tempo = {
|
||||
enable = true;
|
||||
settings = {
|
||||
server = {
|
||||
http_listen_port = 3200;
|
||||
grpc_listen_port = 3201;
|
||||
};
|
||||
distributor = {
|
||||
receivers = {
|
||||
otlp = {
|
||||
protocols = {
|
||||
http = {
|
||||
endpoint = ":4318";
|
||||
cors = {
|
||||
allowed_origins = [ "*.home.2rjus.net" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
storage = {
|
||||
trace = {
|
||||
backend = "local";
|
||||
local = {
|
||||
path = "/var/lib/tempo";
|
||||
};
|
||||
wal = {
|
||||
path = "/var/lib/tempo/wal";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -6,6 +6,24 @@ let
|
||||
nodeExporterTargets = monLib.generateNodeExporterTargets self externalTargets;
|
||||
autoScrapeConfigs = monLib.generateScrapeConfigs self externalTargets;
|
||||
|
||||
# TLS endpoints to monitor for certificate expiration via blackbox exporter
|
||||
tlsTargets = [
|
||||
"https://vault.home.2rjus.net:8200"
|
||||
"https://auth.home.2rjus.net"
|
||||
"https://testvm01.home.2rjus.net"
|
||||
"https://nzbget.home.2rjus.net"
|
||||
"https://radarr.home.2rjus.net"
|
||||
"https://sonarr.home.2rjus.net"
|
||||
"https://ha.home.2rjus.net"
|
||||
"https://z2m.home.2rjus.net"
|
||||
"https://metrics.home.2rjus.net"
|
||||
"https://alertmanager.home.2rjus.net"
|
||||
"https://grafana.home.2rjus.net"
|
||||
"https://jelly.home.2rjus.net"
|
||||
"https://nix-cache.home.2rjus.net"
|
||||
"https://grafana-test.home.2rjus.net"
|
||||
];
|
||||
|
||||
# Script to fetch AppRole token for VictoriaMetrics to use when scraping OpenBao metrics
|
||||
fetchOpenbaoToken = pkgs.writeShellApplication {
|
||||
name = "fetch-openbao-token-vm";
|
||||
@@ -107,6 +125,39 @@ let
|
||||
credentials_file = "/run/secrets/victoriametrics-apiary-token";
|
||||
};
|
||||
}
|
||||
# Blackbox TLS certificate monitoring
|
||||
{
|
||||
job_name = "blackbox_tls";
|
||||
metrics_path = "/probe";
|
||||
params = {
|
||||
module = [ "https_cert" ];
|
||||
};
|
||||
static_configs = [{ targets = tlsTargets; }];
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
target_label = "__address__";
|
||||
replacement = "127.0.0.1:9115";
|
||||
}
|
||||
];
|
||||
}
|
||||
# Sonarr exporter
|
||||
{
|
||||
job_name = "sonarr";
|
||||
static_configs = [{ targets = [ "localhost:9709" ]; }];
|
||||
}
|
||||
# Proxmox VE exporter
|
||||
{
|
||||
job_name = "pve";
|
||||
static_configs = [{ targets = [ "localhost:9221" ]; }];
|
||||
}
|
||||
] ++ autoScrapeConfigs;
|
||||
in
|
||||
{
|
||||
@@ -152,7 +203,7 @@ in
|
||||
|
||||
# Fetch apiary bearer token from Vault
|
||||
vault.secrets.victoriametrics-apiary-token = {
|
||||
secretPath = "hosts/monitoring01/apiary-token";
|
||||
secretPath = "hosts/monitoring02/apiary-token";
|
||||
extractKey = "password";
|
||||
owner = "victoriametrics";
|
||||
group = "victoriametrics";
|
||||
|
||||
@@ -57,7 +57,7 @@ let
|
||||
type = types.str;
|
||||
description = ''
|
||||
Path to the secret in Vault (without /v1/secret/data/ prefix).
|
||||
Example: "hosts/monitoring01/grafana-admin"
|
||||
Example: "hosts/ha1/mqtt-password"
|
||||
'';
|
||||
};
|
||||
|
||||
@@ -152,13 +152,11 @@ in
|
||||
'';
|
||||
example = literalExpression ''
|
||||
{
|
||||
grafana-admin = {
|
||||
secretPath = "hosts/monitoring01/grafana-admin";
|
||||
owner = "grafana";
|
||||
group = "grafana";
|
||||
restartTrigger = true;
|
||||
restartInterval = "daily";
|
||||
services = [ "grafana" ];
|
||||
mqtt-password = {
|
||||
secretPath = "hosts/ha1/mqtt-password";
|
||||
owner = "mosquitto";
|
||||
group = "mosquitto";
|
||||
services = [ "mosquitto" ];
|
||||
};
|
||||
}
|
||||
'';
|
||||
|
||||
@@ -40,23 +40,13 @@ EOT
|
||||
# Define host access policies
|
||||
locals {
|
||||
host_policies = {
|
||||
# Example: monitoring01 host
|
||||
# "monitoring01" = {
|
||||
# paths = [
|
||||
# "secret/data/hosts/monitoring01/*",
|
||||
# "secret/data/services/prometheus/*",
|
||||
# "secret/data/services/grafana/*",
|
||||
# "secret/data/shared/smtp/*"
|
||||
# ]
|
||||
# extra_policies = ["some-other-policy"] # Optional: additional policies
|
||||
# }
|
||||
|
||||
# Example: ha1 host
|
||||
# Example:
|
||||
# "ha1" = {
|
||||
# paths = [
|
||||
# "secret/data/hosts/ha1/*",
|
||||
# "secret/data/shared/mqtt/*"
|
||||
# ]
|
||||
# extra_policies = ["some-other-policy"] # Optional: additional policies
|
||||
# }
|
||||
|
||||
"ha1" = {
|
||||
@@ -66,16 +56,6 @@ locals {
|
||||
]
|
||||
}
|
||||
|
||||
"monitoring01" = {
|
||||
paths = [
|
||||
"secret/data/hosts/monitoring01/*",
|
||||
"secret/data/shared/backup/*",
|
||||
"secret/data/shared/nats/*",
|
||||
"secret/data/services/exportarr/*",
|
||||
]
|
||||
extra_policies = ["prometheus-metrics"]
|
||||
}
|
||||
|
||||
# Wave 1: hosts with no service secrets (only need vault.enable for future use)
|
||||
"nats1" = {
|
||||
paths = [
|
||||
|
||||
@@ -47,8 +47,8 @@ locals {
|
||||
"monitoring02" = {
|
||||
paths = [
|
||||
"secret/data/hosts/monitoring02/*",
|
||||
"secret/data/hosts/monitoring01/apiary-token",
|
||||
"secret/data/services/grafana/*",
|
||||
"secret/data/services/exportarr/*",
|
||||
"secret/data/shared/nats/nkey",
|
||||
]
|
||||
extra_policies = ["prometheus-metrics"]
|
||||
|
||||
@@ -10,10 +10,6 @@ resource "vault_mount" "kv" {
|
||||
locals {
|
||||
secrets = {
|
||||
# Example host-specific secrets
|
||||
# "hosts/monitoring01/grafana-admin" = {
|
||||
# auto_generate = true
|
||||
# password_length = 32
|
||||
# }
|
||||
# "hosts/ha1/mqtt-password" = {
|
||||
# auto_generate = true
|
||||
# password_length = 24
|
||||
@@ -35,11 +31,6 @@ locals {
|
||||
# }
|
||||
# }
|
||||
|
||||
"hosts/monitoring01/grafana-admin" = {
|
||||
auto_generate = true
|
||||
password_length = 32
|
||||
}
|
||||
|
||||
"hosts/ha1/mqtt-password" = {
|
||||
auto_generate = true
|
||||
password_length = 24
|
||||
@@ -57,8 +48,8 @@ locals {
|
||||
data = { nkey = var.nats_nkey }
|
||||
}
|
||||
|
||||
# PVE exporter config for monitoring01
|
||||
"hosts/monitoring01/pve-exporter" = {
|
||||
# PVE exporter config for monitoring02
|
||||
"hosts/monitoring02/pve-exporter" = {
|
||||
auto_generate = false
|
||||
data = { config = var.pve_exporter_config }
|
||||
}
|
||||
@@ -149,7 +140,7 @@ locals {
|
||||
}
|
||||
|
||||
# Bearer token for scraping apiary metrics
|
||||
"hosts/monitoring01/apiary-token" = {
|
||||
"hosts/monitoring02/apiary-token" = {
|
||||
auto_generate = true
|
||||
password_length = 64
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user