monitoring: fix blackbox rules and add force-push policy
Some checks failed
Run nix flake check / flake-check (push) Has been cancelled

Move certificate alert rules to rules.yml instead of adding them as a
separate rules string in blackbox.nix. The previous approach caused a
YAML parse error due to duplicate 'groups' keys.

Also add policy to CLAUDE.md: never force push to master.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 22:26:05 +01:00
parent 75e4fb61a5
commit 8e1753c2c8
2 changed files with 28 additions and 0 deletions

View File

@@ -392,3 +392,29 @@ groups:
annotations:
summary: "Cannot scrape OpenBao metrics from {{ $labels.instance }}"
description: "OpenBao metrics endpoint is not responding on {{ $labels.instance }}."
- name: certificate_rules
rules:
- alert: tls_certificate_expiring_soon
expr: (probe_ssl_earliest_cert_expiry - time()) < 86400 * 7
for: 1h
labels:
severity: warning
annotations:
summary: "TLS certificate expiring soon on {{ $labels.instance }}"
description: "The TLS certificate for {{ $labels.instance }} expires in less than 7 days."
- alert: tls_certificate_expiring_critical
expr: (probe_ssl_earliest_cert_expiry - time()) < 86400
for: 0m
labels:
severity: critical
annotations:
summary: "TLS certificate expiring within 24h on {{ $labels.instance }}"
description: "The TLS certificate for {{ $labels.instance }} expires in less than 24 hours. Immediate action required."
- alert: tls_probe_failed
expr: probe_success{job="blackbox_tls"} == 0
for: 5m
labels:
severity: warning
annotations:
summary: "TLS probe failed for {{ $labels.instance }}"
description: "Cannot connect to {{ $labels.instance }} to check TLS certificate. The service may be down or unreachable."