From ffad2dd20502f1899e076674416cb634c3b4a3fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Mon, 9 Feb 2026 16:10:54 +0100 Subject: [PATCH] monitoring: increase zigbee_sensor_stale threshold to 4 hours The 2-hour threshold was too aggressive for temperature sensors in stable environments. Historical data shows gaps up to 2.75 hours when temperature hasn't changed (Home Assistant only updates last_updated when values change). Increasing to 4 hours avoids false positives while still catching genuine failures. Co-Authored-By: Claude Opus 4.5 --- services/monitoring/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/monitoring/rules.yml b/services/monitoring/rules.yml index 5724d8c..31d99c1 100644 --- a/services/monitoring/rules.yml +++ b/services/monitoring/rules.yml @@ -229,13 +229,13 @@ groups: summary: "Mosquitto not running on {{ $labels.instance }}" description: "Mosquitto has been down on {{ $labels.instance }} more than 5 minutes." - alert: zigbee_sensor_stale - expr: (time() - hass_last_updated_time_seconds{entity=~"sensor\\.(0x[0-9a-f]+|temp_server)_temperature"}) > 7200 + expr: (time() - hass_last_updated_time_seconds{entity=~"sensor\\.(0x[0-9a-f]+|temp_server)_temperature"}) > 14400 for: 5m labels: severity: warning annotations: summary: "Zigbee sensor {{ $labels.friendly_name }} is stale" - description: "Zigbee temperature sensor {{ $labels.entity }} has not reported data for over 2 hours. The sensor may have a dead battery or connectivity issues." + description: "Zigbee temperature sensor {{ $labels.entity }} has not reported data for over 4 hours. The sensor may have a dead battery or connectivity issues." - name: smartctl_rules rules: - alert: smart_critical_warning