monitoring: lower unbound cache hit ratio alert threshold to 20%
Some checks failed
Run nix flake check / flake-check (push) Failing after 9m2s

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-13 18:55:03 +01:00
parent 0d9f49a3b4
commit ae823e439d

View File

@@ -118,13 +118,13 @@ groups:
description: "NSD has been down on {{ $labels.instance }} more than 5 minutes."
# Only alert on primary DNS (secondary has cold cache after failover)
- alert: unbound_low_cache_hit_ratio
expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.5
expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.2
for: 15m
labels:
severity: warning
annotations:
summary: "Low DNS cache hit ratio on {{ $labels.instance }}"
description: "Unbound cache hit ratio is below 50% on {{ $labels.instance }}."
description: "Unbound cache hit ratio is below 20% on {{ $labels.instance }}."
- name: http_proxy_rules
rules:
- alert: caddy_down