monitoring: lower unbound cache hit ratio alert threshold to 20%
Some checks failed
Run nix flake check / flake-check (push) Failing after 9m2s
Some checks failed
Run nix flake check / flake-check (push) Failing after 9m2s
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,13 +118,13 @@ groups:
|
||||
description: "NSD has been down on {{ $labels.instance }} more than 5 minutes."
|
||||
# Only alert on primary DNS (secondary has cold cache after failover)
|
||||
- alert: unbound_low_cache_hit_ratio
|
||||
expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.5
|
||||
expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.2
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Low DNS cache hit ratio on {{ $labels.instance }}"
|
||||
description: "Unbound cache hit ratio is below 50% on {{ $labels.instance }}."
|
||||
description: "Unbound cache hit ratio is below 20% on {{ $labels.instance }}."
|
||||
- name: http_proxy_rules
|
||||
rules:
|
||||
- alert: caddy_down
|
||||
|
||||
Reference in New Issue
Block a user