From 98c4f54f94b0ca7b6b9b85f9e31dfe32b099cd03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torjus=20H=C3=A5kestad?= Date: Mon, 9 Feb 2026 22:35:44 +0100 Subject: [PATCH] grafana: add TLS certificates dashboard Dashboard includes: - Stat panels for endpoints monitored, probe failures, expiring certs - Gauge showing minimum days until any cert expires - Table of all endpoints sorted by expiry (color-coded) - Probe status table with HTTP status and duration - Time series graphs for expiry trends and probe success rate Co-Authored-By: Claude Opus 4.5 --- services/grafana/dashboards/certificates.json | 442 ++++++++++++++++++ 1 file changed, 442 insertions(+) create mode 100644 services/grafana/dashboards/certificates.json diff --git a/services/grafana/dashboards/certificates.json b/services/grafana/dashboards/certificates.json new file mode 100644 index 0000000..7951fe4 --- /dev/null +++ b/services/grafana/dashboards/certificates.json @@ -0,0 +1,442 @@ +{ + "uid": "certificates-homelab", + "title": "TLS Certificates", + "tags": ["certificates", "tls", "security", "homelab"], + "timezone": "browser", + "schemaVersion": 39, + "version": 1, + "refresh": "5m", + "time": { + "from": "now-7d", + "to": "now" + }, + "panels": [ + { + "id": 1, + "title": "Endpoints Monitored", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 0}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "count(probe_ssl_earliest_cert_expiry{job=\"blackbox_tls\"})", + "legendFormat": "Total", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": null} + ] + } + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none", + "textMode": "auto" + }, + "description": "Total number of TLS endpoints being monitored" + }, + { + "id": 2, + "title": "Probe Failures", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 0}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "count(probe_success{job=\"blackbox_tls\"} == 0) or vector(0)", + "legendFormat": "Failing", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "red", "value": 1} + ] + }, + "noValue": "0" + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none", + "textMode": "auto" + }, + "description": "Number of endpoints where TLS probe is failing" + }, + { + "id": 3, + "title": "Expiring Soon (< 7d)", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 0}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "count((probe_ssl_earliest_cert_expiry{job=\"blackbox_tls\"} - time()) < 86400 * 7) or vector(0)", + "legendFormat": "Warning", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 1} + ] + }, + "noValue": "0" + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none", + "textMode": "auto" + }, + "description": "Certificates expiring within 7 days" + }, + { + "id": 4, + "title": "Expiring Critical (< 24h)", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 0}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "count((probe_ssl_earliest_cert_expiry{job=\"blackbox_tls\"} - time()) < 86400) or vector(0)", + "legendFormat": "Critical", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "red", "value": 1} + ] + }, + "noValue": "0" + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none", + "textMode": "auto" + }, + "description": "Certificates expiring within 24 hours" + }, + { + "id": 5, + "title": "Minimum Days Remaining", + "type": "gauge", + "gridPos": {"h": 4, "w": 8, "x": 16, "y": 0}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "min((probe_ssl_earliest_cert_expiry{job=\"blackbox_tls\"} - time()) / 86400)", + "legendFormat": "Days", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "d", + "min": 0, + "max": 90, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "orange", "value": 7}, + {"color": "yellow", "value": 14}, + {"color": "green", "value": 30} + ] + } + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "description": "Shortest time until any certificate expires" + }, + { + "id": 6, + "title": "Certificate Expiry by Endpoint", + "type": "table", + "gridPos": {"h": 12, "w": 12, "x": 0, "y": 4}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "(probe_ssl_earliest_cert_expiry{job=\"blackbox_tls\"} - time()) / 86400", + "legendFormat": "{{instance}}", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {"Time": true, "job": true, "__name__": true}, + "renameByName": {"instance": "Endpoint", "Value": "Days Until Expiry"} + } + }, + { + "id": "sortBy", + "options": { + "sort": [{"field": "Days Until Expiry", "desc": false}] + } + } + ], + "fieldConfig": { + "defaults": { + "custom": { + "align": "left" + } + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "Days Until Expiry"}, + "properties": [ + {"id": "unit", "value": "d"}, + {"id": "decimals", "value": 1}, + {"id": "custom.width", "value": 150}, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "orange", "value": 7}, + {"color": "yellow", "value": 14}, + {"color": "green", "value": 30} + ] + } + }, + {"id": "custom.cellOptions", "value": {"type": "color-background"}} + ] + } + ] + }, + "options": { + "showHeader": true, + "sortBy": [{"displayName": "Days Until Expiry", "desc": false}] + }, + "description": "All monitored endpoints sorted by days until certificate expiry" + }, + { + "id": 7, + "title": "Probe Status", + "type": "table", + "gridPos": {"h": 12, "w": 12, "x": 12, "y": 4}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "probe_success{job=\"blackbox_tls\"}", + "legendFormat": "{{instance}}", + "refId": "A", + "instant": true, + "format": "table" + }, + { + "expr": "probe_http_status_code{job=\"blackbox_tls\"}", + "legendFormat": "{{instance}}", + "refId": "B", + "instant": true, + "format": "table" + }, + { + "expr": "probe_duration_seconds{job=\"blackbox_tls\"}", + "legendFormat": "{{instance}}", + "refId": "C", + "instant": true, + "format": "table" + } + ], + "transformations": [ + { + "id": "merge" + }, + { + "id": "organize", + "options": { + "excludeByName": {"Time": true, "job": true, "__name__": true}, + "renameByName": { + "instance": "Endpoint", + "Value #A": "Success", + "Value #B": "HTTP Status", + "Value #C": "Duration" + } + } + } + ], + "fieldConfig": { + "defaults": { + "custom": {"align": "left"} + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "Success"}, + "properties": [ + {"id": "custom.width", "value": 80}, + {"id": "mappings", "value": [ + {"type": "value", "options": {"0": {"text": "FAIL", "color": "red"}}}, + {"type": "value", "options": {"1": {"text": "OK", "color": "green"}}} + ]}, + {"id": "custom.cellOptions", "value": {"type": "color-text"}} + ] + }, + { + "matcher": {"id": "byName", "options": "HTTP Status"}, + "properties": [ + {"id": "custom.width", "value": 100} + ] + }, + { + "matcher": {"id": "byName", "options": "Duration"}, + "properties": [ + {"id": "unit", "value": "s"}, + {"id": "decimals", "value": 3}, + {"id": "custom.width", "value": 100} + ] + } + ] + }, + "options": { + "showHeader": true + }, + "description": "Probe success status, HTTP response code, and probe duration" + }, + { + "id": 8, + "title": "Certificate Expiry Over Time", + "type": "timeseries", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 16}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "(probe_ssl_earliest_cert_expiry{job=\"blackbox_tls\"} - time()) / 86400", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "d", + "custom": { + "lineWidth": 2, + "fillOpacity": 10, + "showPoints": "never" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "orange", "value": 7}, + {"color": "yellow", "value": 14}, + {"color": "green", "value": 30} + ] + } + } + }, + "options": { + "legend": {"displayMode": "table", "placement": "right", "calcs": ["lastNotNull"]}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "description": "Days until certificate expiry over time - useful for spotting renewal patterns" + }, + { + "id": 9, + "title": "Probe Success Rate", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "avg(probe_success{job=\"blackbox_tls\"}) * 100", + "legendFormat": "Success Rate", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "custom": { + "lineWidth": 2, + "fillOpacity": 20, + "showPoints": "never" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "yellow", "value": 90}, + {"color": "green", "value": 100} + ] + }, + "color": {"mode": "thresholds"} + } + }, + "options": { + "legend": {"displayMode": "list", "placement": "bottom"}, + "tooltip": {"mode": "single"} + }, + "description": "Overall probe success rate across all endpoints" + }, + { + "id": 10, + "title": "Probe Duration", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "probe_duration_seconds{job=\"blackbox_tls\"}", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "custom": { + "lineWidth": 1, + "fillOpacity": 0, + "showPoints": "never" + } + } + }, + "options": { + "legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "description": "Time taken to complete TLS probe for each endpoint" + } + ] +}