From 881e70df2788fb0d8c6054ccff25bf5ec904bbab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= <torjus@usit.uio.no>
Date: Sat, 7 Feb 2026 01:22:29 +0100
Subject: [PATCH] monitoring: relax systemd_not_running alert threshold

Increase duration from 5m to 10m and demote severity from critical to
warning. Brief degraded states during nixos-rebuild are normal and were
causing false positive alerts.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 services/monitoring/rules.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/services/monitoring/rules.yml b/services/monitoring/rules.yml
index 4707787..9e612eb 100644
--- a/services/monitoring/rules.yml
+++ b/services/monitoring/rules.yml
@@ -75,12 +75,12 @@ groups:
           description: "Based on the last 6h trend, the root filesystem on {{ $labels.instance }} is predicted to run out of space within 24 hours."
       - alert: systemd_not_running
         expr: node_systemd_system_running == 0
-        for: 5m
+        for: 10m
         labels:
-          severity: critical
+          severity: warning
         annotations:
           summary: "Systemd not in running state on {{ $labels.instance }}"
-          description: "Systemd is not in running state on {{ $labels.instance }}. The system may be in a degraded state."
+          description: "Systemd is not in running state on {{ $labels.instance }}. The system may be in a degraded state. Note: brief degraded states during nixos-rebuild are normal."
       - alert: high_file_descriptors
         expr: node_filefd_allocated / node_filefd_maximum > 0.8
         for: 5m