monitoring: remove duplicate rules from blackbox.nix
All checks were successful
Run nix flake check / flake-check (push) Successful in 2m7s
All checks were successful
Run nix flake check / flake-check (push) Successful in 2m7s
The rules were already added to rules.yml but the blackbox.nix file still had them, causing duplicate 'groups' key errors. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -42,68 +42,35 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# Add blackbox scrape config and alert rules to Prometheus
|
# Add blackbox scrape config to Prometheus
|
||||||
services.prometheus = {
|
# Alert rules are in rules.yml (certificate_rules group)
|
||||||
scrapeConfigs = [
|
services.prometheus.scrapeConfigs = [
|
||||||
{
|
{
|
||||||
job_name = "blackbox_tls";
|
job_name = "blackbox_tls";
|
||||||
metrics_path = "/probe";
|
metrics_path = "/probe";
|
||||||
params = {
|
params = {
|
||||||
module = [ "https_cert" ];
|
module = [ "https_cert" ];
|
||||||
};
|
};
|
||||||
static_configs = [{
|
static_configs = [{
|
||||||
targets = tlsTargets;
|
targets = tlsTargets;
|
||||||
}];
|
}];
|
||||||
relabel_configs = [
|
relabel_configs = [
|
||||||
# Pass the target URL to blackbox as a parameter
|
# Pass the target URL to blackbox as a parameter
|
||||||
{
|
{
|
||||||
source_labels = [ "__address__" ];
|
source_labels = [ "__address__" ];
|
||||||
target_label = "__param_target";
|
target_label = "__param_target";
|
||||||
}
|
}
|
||||||
# Use the target URL as the instance label
|
# Use the target URL as the instance label
|
||||||
{
|
{
|
||||||
source_labels = [ "__param_target" ];
|
source_labels = [ "__param_target" ];
|
||||||
target_label = "instance";
|
target_label = "instance";
|
||||||
}
|
}
|
||||||
# Point the actual scrape at the local blackbox exporter
|
# Point the actual scrape at the local blackbox exporter
|
||||||
{
|
{
|
||||||
target_label = "__address__";
|
target_label = "__address__";
|
||||||
replacement = "127.0.0.1:9115";
|
replacement = "127.0.0.1:9115";
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
rules = [
|
|
||||||
''
|
|
||||||
groups:
|
|
||||||
- name: certificate_rules
|
|
||||||
rules:
|
|
||||||
- alert: tls_certificate_expiring_soon
|
|
||||||
expr: (probe_ssl_earliest_cert_expiry - time()) < 86400 * 7
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "TLS certificate expiring soon on {{ $labels.instance }}"
|
|
||||||
description: "The TLS certificate for {{ $labels.instance }} expires in less than 7 days."
|
|
||||||
- alert: tls_certificate_expiring_critical
|
|
||||||
expr: (probe_ssl_earliest_cert_expiry - time()) < 86400
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "TLS certificate expiring within 24h on {{ $labels.instance }}"
|
|
||||||
description: "The TLS certificate for {{ $labels.instance }} expires in less than 24 hours. Immediate action required."
|
|
||||||
- alert: tls_probe_failed
|
|
||||||
expr: probe_success{job="blackbox_tls"} == 0
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "TLS probe failed for {{ $labels.instance }}"
|
|
||||||
description: "Cannot connect to {{ $labels.instance }} to check TLS certificate. The service may be down or unreachable."
|
|
||||||
''
|
|
||||||
];
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user