diff --git a/CLAUDE.md b/CLAUDE.md index af57dec..2bbff94 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -132,6 +132,8 @@ Terraform manages the secrets and AppRole policies in `terraform/vault/`. **Important:** Never amend commits to `master` unless the user explicitly asks for it. Amending rewrites history and causes issues for deployed configurations. +**Important:** Never force push to `master`. If a commit on master has an error, fix it with a new commit rather than rewriting history. + **Important:** Do not use `gh pr create` to create pull requests. The git server does not support GitHub CLI for PR creation. Instead, push the branch and let the user create the PR manually via the web interface. When starting a new plan or task, the first step should typically be to create and checkout a new branch with an appropriate name (e.g., `git checkout -b dns-automation` or `git checkout -b fix-nginx-config`). diff --git a/services/monitoring/rules.yml b/services/monitoring/rules.yml index 31d99c1..6822955 100644 --- a/services/monitoring/rules.yml +++ b/services/monitoring/rules.yml @@ -392,3 +392,29 @@ groups: annotations: summary: "Cannot scrape OpenBao metrics from {{ $labels.instance }}" description: "OpenBao metrics endpoint is not responding on {{ $labels.instance }}." + - name: certificate_rules + rules: + - alert: tls_certificate_expiring_soon + expr: (probe_ssl_earliest_cert_expiry - time()) < 86400 * 7 + for: 1h + labels: + severity: warning + annotations: + summary: "TLS certificate expiring soon on {{ $labels.instance }}" + description: "The TLS certificate for {{ $labels.instance }} expires in less than 7 days." + - alert: tls_certificate_expiring_critical + expr: (probe_ssl_earliest_cert_expiry - time()) < 86400 + for: 0m + labels: + severity: critical + annotations: + summary: "TLS certificate expiring within 24h on {{ $labels.instance }}" + description: "The TLS certificate for {{ $labels.instance }} expires in less than 24 hours. Immediate action required." + - alert: tls_probe_failed + expr: probe_success{job="blackbox_tls"} == 0 + for: 5m + labels: + severity: warning + annotations: + summary: "TLS probe failed for {{ $labels.instance }}" + description: "Cannot connect to {{ $labels.instance }} to check TLS certificate. The service may be down or unreachable."