monitoring: increase high_cpu_load duration for nix-cache01 to 2h
nix-cache01 regularly hits high CPU during nix builds, causing flappy alerts. Keep the 15m threshold for all other hosts. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -18,13 +18,21 @@ groups:
|
||||
summary: "Disk space low on {{ $labels.instance }}"
|
||||
description: "Disk space is low on {{ $labels.instance }}. Please check."
|
||||
- alert: high_cpu_load
|
||||
expr: max(node_load5{}) by (instance) > (count by (instance)(node_cpu_seconds_total{mode="idle"}) * 0.7)
|
||||
expr: max(node_load5{instance!="nix-cache01.home.2rjus.net:9100"}) by (instance) > (count by (instance)(node_cpu_seconds_total{instance!="nix-cache01.home.2rjus.net:9100", mode="idle"}) * 0.7)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU load on {{ $labels.instance }}"
|
||||
description: "CPU load is high on {{ $labels.instance }}. Please check."
|
||||
- alert: high_cpu_load
|
||||
expr: max(node_load5{instance="nix-cache01.home.2rjus.net:9100"}) by (instance) > (count by (instance)(node_cpu_seconds_total{instance="nix-cache01.home.2rjus.net:9100", mode="idle"}) * 0.7)
|
||||
for: 2h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU load on {{ $labels.instance }}"
|
||||
description: "CPU load is high on {{ $labels.instance }}. Please check."
|
||||
- alert: low_memory
|
||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
||||
for: 2m
|
||||
|
||||
Reference in New Issue
Block a user