monitoring: add alerts for homelab-deploy build failures
Some checks failed
Run nix flake check / flake-check (push) Has been cancelled
Some checks failed
Run nix flake check / flake-check (push) Has been cancelled
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -395,3 +395,21 @@ groups:
|
||||
annotations:
|
||||
summary: "TLS probe failed for {{ $labels.instance }}"
|
||||
description: "Cannot connect to {{ $labels.instance }} to check TLS certificate. The service may be down or unreachable."
|
||||
- name: homelab_deploy_rules
|
||||
rules:
|
||||
- alert: homelab_deploy_build_failed
|
||||
expr: increase(homelab_deploy_build_host_total{status="failure"}[1h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Build failed for {{ $labels.host }} in repo {{ $labels.repo }}"
|
||||
description: "Host {{ $labels.host }} failed to build from {{ $labels.repo }} repository."
|
||||
- alert: homelab_deploy_builder_down
|
||||
expr: up{job="homelab-deploy-builder"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Homelab deploy builder not responding on {{ $labels.instance }}"
|
||||
description: "Cannot scrape homelab-deploy-builder metrics from {{ $labels.instance }} for 5 minutes."
|
||||
|
||||
Reference in New Issue
Block a user