From ae823e439d1c24babc4ed7089d3e5ca02bd198f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Fri, 13 Feb 2026 18:55:03 +0100 Subject: [PATCH] monitoring: lower unbound cache hit ratio alert threshold to 20% Co-Authored-By: Claude Opus 4.6 --- services/monitoring/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/monitoring/rules.yml b/services/monitoring/rules.yml index cec0b55..a5f8abf 100644 --- a/services/monitoring/rules.yml +++ b/services/monitoring/rules.yml @@ -118,13 +118,13 @@ groups: description: "NSD has been down on {{ $labels.instance }} more than 5 minutes." # Only alert on primary DNS (secondary has cold cache after failover) - alert: unbound_low_cache_hit_ratio - expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.5 + expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.2 for: 15m labels: severity: warning annotations: summary: "Low DNS cache hit ratio on {{ $labels.instance }}" - description: "Unbound cache hit ratio is below 50% on {{ $labels.instance }}." + description: "Unbound cache hit ratio is below 20% on {{ $labels.instance }}." - name: http_proxy_rules rules: - alert: caddy_down