Rework build-flakes alert rules
Some checks failed
Run nix flake check / flake-check (push) Has been cancelled

This commit is contained in:
Torjus Håkestad 2025-05-28 21:26:04 +02:00
parent 87c98581c2
commit fb1a36a846
Signed by: torjus
SSH Key Fingerprint: SHA256:KjAds8wHfD2mBYK2H815s/+ABcSdcIHUndwHEdSxml4

View File

@ -97,15 +97,21 @@ groups:
description: "NATS has been down on {{ $labels.instance }} more than 5 minutes." description: "NATS has been down on {{ $labels.instance }} more than 5 minutes."
- name: nix_cache_rules - name: nix_cache_rules
rules: rules:
- alert: build-flakes_service_failed - alert: build-flakes_service_not_active_recently
expr: node_systemd_unit_state{instance="nix-cache01.home.2rjus.net:9100", name="build-flakes.service", state="failed"} == 1 expr: count_over_time(node_systemd_unit_state{instance="nix-cache01.home.2rjus.net:9100", name="build-flakes.service", state="active"}[1h]) < 1
for: 0m for: 0m
keep_firing_for: 10m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: "The build-flakes service on {{ $labels.instance }} has failed" summary: "The build-flakes service on {{ $labels.instance }} has not run recently"
description: "The build-flakes service on {{ $labels.instance }} has failed" description: "The build-flakes service on {{ $labels.instance }} has not run recently"
- alert:
expr: build_flakes_error == 1
labels:
severity: warning
annotations:
summary: "The build-flakes job has failed for host {{ $labels.host }}."
description: "The build-flakes job has failed for host {{ $labels.host }}."
- alert: harmonia_down - alert: harmonia_down
expr: node_systemd_unit_state {instance="nix-cache01.home.2rjus.net:9100", name = "harmonia.service", state = "active"} == 0 expr: node_systemd_unit_state {instance="nix-cache01.home.2rjus.net:9100", name = "harmonia.service", state = "active"} == 0
for: 5m for: 5m