Add some alerting rules
Some checks failed
Run nix flake check / flake-check (push) Failing after 14m34s
Some checks failed
Run nix flake check / flake-check (push) Failing after 14m34s
This commit is contained in:
parent
f0bc29ac5e
commit
f1ca20a387
@ -67,6 +67,14 @@
|
||||
annotations:
|
||||
summary: "High CPU load on {{ $labels.instance }}"
|
||||
description: "CPU load is high on {{ $labels.instance }}. Please check."
|
||||
- alert: nixos_upgrade_failed
|
||||
expr: node_systemd_unit_state{name="nixos-upgrade.service", state="failed"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "NixOS upgrade failed on {{ $labels.instance }}"
|
||||
description: "NixOS upgrade failed on {{ $labels.instance }}"
|
||||
- name: nameserver_rules
|
||||
rules:
|
||||
- alert: unbound_down
|
||||
@ -105,6 +113,24 @@
|
||||
annotations:
|
||||
summary: "NATS not running on {{ $labels.instance }}"
|
||||
description: "NATS has been down on {{ $labels.instance }} more than 5 minutes."
|
||||
- name: nix_cache_rules
|
||||
rules:
|
||||
- alert: build-flakes service failed
|
||||
expr: node_systemd_unit_state{instance="nix-cache01.home.2rjus.net:9100", name="build-flakes.service",state="failed"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "The build-flakes service on {{ $labels.instance }} has failed"
|
||||
description: "The build-flakes service on {{ $labels.instance }} has failed"
|
||||
- alert: low_disk_space_nix
|
||||
expr: node_filesystem_free_bytes{instance="nix-cache01.home.2rjus.net:9100", mountpoint="/nix"} / node_filesystem_size_bytes{instance="nix-cache01.home.2rjus.net:9100", mountpoint="/nix"} * 100 < 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Disk space low on /nix for {{ $labels.instance }}"
|
||||
description: "Disk space is low on /nix for host {{ $labels.instance }}. Please check."
|
||||
- name: home_assistant_rules
|
||||
rules:
|
||||
- alert: home_assistant_down
|
||||
|
Loading…
x
Reference in New Issue
Block a user