monitoring: increase high_cpu_load duration for nix-cache01 to 2h
nix-cache01 regularly hits high CPU during nix builds, causing flappy alerts. Keep the 15m threshold for all other hosts. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -18,13 +18,21 @@ groups:
|
|||||||
summary: "Disk space low on {{ $labels.instance }}"
|
summary: "Disk space low on {{ $labels.instance }}"
|
||||||
description: "Disk space is low on {{ $labels.instance }}. Please check."
|
description: "Disk space is low on {{ $labels.instance }}. Please check."
|
||||||
- alert: high_cpu_load
|
- alert: high_cpu_load
|
||||||
expr: max(node_load5{}) by (instance) > (count by (instance)(node_cpu_seconds_total{mode="idle"}) * 0.7)
|
expr: max(node_load5{instance!="nix-cache01.home.2rjus.net:9100"}) by (instance) > (count by (instance)(node_cpu_seconds_total{instance!="nix-cache01.home.2rjus.net:9100", mode="idle"}) * 0.7)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: "High CPU load on {{ $labels.instance }}"
|
summary: "High CPU load on {{ $labels.instance }}"
|
||||||
description: "CPU load is high on {{ $labels.instance }}. Please check."
|
description: "CPU load is high on {{ $labels.instance }}. Please check."
|
||||||
|
- alert: high_cpu_load
|
||||||
|
expr: max(node_load5{instance="nix-cache01.home.2rjus.net:9100"}) by (instance) > (count by (instance)(node_cpu_seconds_total{instance="nix-cache01.home.2rjus.net:9100", mode="idle"}) * 0.7)
|
||||||
|
for: 2h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "High CPU load on {{ $labels.instance }}"
|
||||||
|
description: "CPU load is high on {{ $labels.instance }}. Please check."
|
||||||
- alert: low_memory
|
- alert: low_memory
|
||||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
||||||
for: 2m
|
for: 2m
|
||||||
|
|||||||
Reference in New Issue
Block a user