monitoring: lower unbound cache hit ratio alert threshold to 20%
Some checks failed
Run nix flake check / flake-check (push) Failing after 9m2s
Some checks failed
Run nix flake check / flake-check (push) Failing after 9m2s
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,13 +118,13 @@ groups:
|
|||||||
description: "NSD has been down on {{ $labels.instance }} more than 5 minutes."
|
description: "NSD has been down on {{ $labels.instance }} more than 5 minutes."
|
||||||
# Only alert on primary DNS (secondary has cold cache after failover)
|
# Only alert on primary DNS (secondary has cold cache after failover)
|
||||||
- alert: unbound_low_cache_hit_ratio
|
- alert: unbound_low_cache_hit_ratio
|
||||||
expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.5
|
expr: (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) / (rate(unbound_cache_hits_total{dns_role="primary"}[5m]) + rate(unbound_cache_misses_total{dns_role="primary"}[5m]))) < 0.2
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Low DNS cache hit ratio on {{ $labels.instance }}"
|
summary: "Low DNS cache hit ratio on {{ $labels.instance }}"
|
||||||
description: "Unbound cache hit ratio is below 50% on {{ $labels.instance }}."
|
description: "Unbound cache hit ratio is below 20% on {{ $labels.instance }}."
|
||||||
- name: http_proxy_rules
|
- name: http_proxy_rules
|
||||||
rules:
|
rules:
|
||||||
- alert: caddy_down
|
- alert: caddy_down
|
||||||
|
|||||||
Reference in New Issue
Block a user