monitoring: exclude step-ca serving cert from general expiry alert #17

Merged
torjus merged 1 commits from monitoring-cleanup into master 2026-02-05 00:22:16 +00:00

View File

@@ -307,13 +307,21 @@ groups:
- name: certificate_rules
rules:
- alert: certificate_expiring_soon
expr: labmon_tlsconmon_certificate_seconds_left < 86400
expr: labmon_tlsconmon_certificate_seconds_left{address!="ca.home.2rjus.net:443"} < 86400
for: 5m
labels:
severity: warning
annotations:
summary: "TLS certificate expiring soon for {{ $labels.instance }}"
description: "TLS certificate for {{ $labels.address }} is expiring within 24 hours."
- alert: step_ca_serving_cert_expiring
expr: labmon_tlsconmon_certificate_seconds_left{address="ca.home.2rjus.net:443"} < 3600
for: 5m
labels:
severity: critical
annotations:
summary: "Step-CA serving certificate expiring"
description: "The step-ca serving certificate (24h auto-renewed) has less than 1 hour of validity left. Renewal may have failed."
- alert: certificate_check_error
expr: labmon_tlsconmon_certificate_check_error == 1
for: 5m