From 881e70df2788fb0d8c6054ccff25bf5ec904bbab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Sat, 7 Feb 2026 01:22:29 +0100 Subject: [PATCH] monitoring: relax systemd_not_running alert threshold Increase duration from 5m to 10m and demote severity from critical to warning. Brief degraded states during nixos-rebuild are normal and were causing false positive alerts. Co-Authored-By: Claude Opus 4.5 --- services/monitoring/rules.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/monitoring/rules.yml b/services/monitoring/rules.yml index 4707787..9e612eb 100644 --- a/services/monitoring/rules.yml +++ b/services/monitoring/rules.yml @@ -75,12 +75,12 @@ groups: description: "Based on the last 6h trend, the root filesystem on {{ $labels.instance }} is predicted to run out of space within 24 hours." - alert: systemd_not_running expr: node_systemd_system_running == 0 - for: 5m + for: 10m labels: - severity: critical + severity: warning annotations: summary: "Systemd not in running state on {{ $labels.instance }}" - description: "Systemd is not in running state on {{ $labels.instance }}. The system may be in a degraded state." + description: "Systemd is not in running state on {{ $labels.instance }}. The system may be in a degraded state. Note: brief degraded states during nixos-rebuild are normal." - alert: high_file_descriptors expr: node_filefd_allocated / node_filefd_maximum > 0.8 for: 5m