Add some alerting rules for smartctl
Some checks failed
Run nix flake check / flake-check (push) Has been cancelled
Some checks failed
Run nix flake check / flake-check (push) Has been cancelled
This commit is contained in:
parent
afa3cc3a57
commit
3797526000
@ -192,6 +192,32 @@
|
||||
annotations:
|
||||
summary: "Mosquitto not running on {{ $labels.instance }}"
|
||||
description: "Mosquitto has been down on {{ $labels.instance }} more than 5 minutes."
|
||||
- name: smartctl_rules
|
||||
rules:
|
||||
- alert: SmartCriticalWarning
|
||||
expr: smartctl_device_critical_warning > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: SMART critical warning (instance {{ $labels.instance }})
|
||||
description: "Disk controller has critical warning on {{ $labels.instance }} drive {{ $labels.device }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
- alert: SmartMediaErrors
|
||||
expr: smartctl_device_media_errors > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: SMART media errors (instance {{ $labels.instance }})
|
||||
description: "Disk controller detected media errors on {{ $labels.instance }} drive {{ $labels.device }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
- alert: SmartWearoutIndicator
|
||||
expr: smartctl_device_available_spare < smartctl_device_available_spare_threshold
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: SMART Wearout Indicator (instance {{ $labels.instance }})
|
||||
description: "Device is wearing out on {{ $labels.instance }} drive {{ $labels.device }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
''
|
||||
];
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user