From b03e2e8ee43889d0a4539ed52565c013615735b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Wed, 11 Feb 2026 00:45:07 +0100 Subject: [PATCH] monitoring: add alerts for homelab-deploy build failures Co-Authored-By: Claude Opus 4.5 --- services/monitoring/rules.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/services/monitoring/rules.yml b/services/monitoring/rules.yml index d405441..cec0b55 100644 --- a/services/monitoring/rules.yml +++ b/services/monitoring/rules.yml @@ -395,3 +395,21 @@ groups: annotations: summary: "TLS probe failed for {{ $labels.instance }}" description: "Cannot connect to {{ $labels.instance }} to check TLS certificate. The service may be down or unreachable." + - name: homelab_deploy_rules + rules: + - alert: homelab_deploy_build_failed + expr: increase(homelab_deploy_build_host_total{status="failure"}[1h]) > 0 + for: 0m + labels: + severity: warning + annotations: + summary: "Build failed for {{ $labels.host }} in repo {{ $labels.repo }}" + description: "Host {{ $labels.host }} failed to build from {{ $labels.repo }} repository." + - alert: homelab_deploy_builder_down + expr: up{job="homelab-deploy-builder"} == 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Homelab deploy builder not responding on {{ $labels.instance }}" + description: "Cannot scrape homelab-deploy-builder metrics from {{ $labels.instance }} for 5 minutes."