Add some alerting rules
Some checks failed
Run nix flake check / flake-check (push) Failing after 3m23s
Periodic flake update / flake-update (push) Successful in 1m32s

This commit is contained in:
Torjus Håkestad 2025-01-21 22:47:44 +01:00
parent 4154fdb9de
commit 43dfc0ec28
Signed by: torjus
SSH Key Fingerprint: SHA256:KjAds8wHfD2mBYK2H815s/+ABcSdcIHUndwHEdSxml4

View File

@ -51,6 +51,22 @@
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: low_disk_space
expr: node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Disk space low on {{ $labels.instance }}"
description: "Disk space is low on {{ $labels.instance }}. Please check."
- alert: high_cpu_load
expr: node_load1 > 1
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU load on {{ $labels.instance }}"
description: "CPU load is high on {{ $labels.instance }}. Please check."
- name: nameserver_rules
rules:
- alert: unbound_down