{ "uid": "systemd-homelab", "title": "Systemd Services - Homelab", "tags": ["systemd", "services", "homelab"], "timezone": "browser", "schemaVersion": 39, "version": 1, "refresh": "1m", "time": { "from": "now-24h", "to": "now" }, "templating": { "list": [ { "name": "hostname", "type": "query", "datasource": {"type": "prometheus", "uid": "prometheus"}, "query": "label_values(systemd_unit_state, hostname)", "refresh": 2, "includeAll": true, "multi": true, "current": {"text": "All", "value": "$__all"} } ] }, "panels": [ { "id": 1, "title": "Failed Units", "type": "stat", "gridPos": {"h": 4, "w": 4, "x": 0, "y": 0}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "count(systemd_unit_state{state=\"failed\", hostname=~\"$hostname\"} == 1) or vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "red", "value": 1} ] }, "noValue": "0" } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "graphMode": "none" } }, { "id": 2, "title": "Active Units", "type": "stat", "gridPos": {"h": 4, "w": 4, "x": 4, "y": 0}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "count(systemd_unit_state{state=\"active\", hostname=~\"$hostname\"} == 1)", "refId": "A" } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] } } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "graphMode": "none" } }, { "id": 3, "title": "Hosts Monitored", "type": "stat", "gridPos": {"h": 4, "w": 4, "x": 8, "y": 0}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "count(count by (hostname) (systemd_unit_state{hostname=~\"$hostname\"}))", "refId": "A" } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [{"color": "blue", "value": null}] } } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "graphMode": "none" } }, { "id": 4, "title": "Total Service Restarts", "type": "stat", "gridPos": {"h": 4, "w": 4, "x": 12, "y": 0}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "sum(systemd_service_restart_total{hostname=~\"$hostname\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "orange", "value": 50} ] }, "noValue": "0" } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "graphMode": "none" } }, { "id": 5, "title": "Inactive Units", "type": "stat", "gridPos": {"h": 4, "w": 4, "x": 16, "y": 0}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "count(systemd_unit_state{state=\"inactive\", hostname=~\"$hostname\"} == 1)", "refId": "A" } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [{"color": "purple", "value": null}] } } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "graphMode": "none" } }, { "id": 6, "title": "Timers", "type": "stat", "gridPos": {"h": 4, "w": 4, "x": 20, "y": 0}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "count(systemd_timer_last_trigger_seconds{hostname=~\"$hostname\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [{"color": "blue", "value": null}] } } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "graphMode": "none" } }, { "id": 7, "title": "Failed Units", "type": "table", "gridPos": {"h": 6, "w": 12, "x": 0, "y": 4}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "systemd_unit_state{state=\"failed\", hostname=~\"$hostname\"} == 1", "format": "table", "instant": true, "refId": "A" } ], "fieldConfig": { "defaults": {}, "overrides": [ { "matcher": {"id": "byName", "options": "Host"}, "properties": [{"id": "custom.width", "value": 120}] }, { "matcher": {"id": "byName", "options": "Unit"}, "properties": [{"id": "custom.width", "value": 300}] } ] }, "options": { "showHeader": true, "sortBy": [{"displayName": "Host", "desc": false}] }, "transformations": [ { "id": "organize", "options": { "excludeByName": { "Time": true, "Value": true, "__name__": true, "dns_role": true, "instance": true, "job": true, "role": true, "state": true, "tier": true, "type": true }, "renameByName": { "hostname": "Host", "name": "Unit" } } } ], "description": "Units currently in failed state" }, { "id": 8, "title": "Service Restarts (Top 15)", "type": "table", "gridPos": {"h": 6, "w": 12, "x": 12, "y": 4}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "topk(15, systemd_service_restart_total{hostname=~\"$hostname\"} > 0)", "format": "table", "instant": true, "refId": "A" } ], "fieldConfig": { "defaults": {}, "overrides": [ { "matcher": {"id": "byName", "options": "Host"}, "properties": [{"id": "custom.width", "value": 120}] }, { "matcher": {"id": "byName", "options": "Service"}, "properties": [{"id": "custom.width", "value": 280}] }, { "matcher": {"id": "byName", "options": "Restarts"}, "properties": [{"id": "custom.width", "value": 80}] } ] }, "options": { "showHeader": true, "sortBy": [{"displayName": "Restarts", "desc": true}] }, "transformations": [ { "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "dns_role": true, "instance": true, "job": true, "role": true, "tier": true }, "renameByName": { "hostname": "Host", "name": "Service", "Value": "Restarts" } } } ], "description": "Services that have been restarted (since host boot)" }, { "id": 9, "title": "Active Units per Host", "type": "bargauge", "gridPos": {"h": 8, "w": 12, "x": 0, "y": 10}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "sort_desc(count by (hostname) (systemd_unit_state{state=\"active\", hostname=~\"$hostname\"} == 1))", "legendFormat": "{{hostname}}", "refId": "A", "instant": true } ], "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] }, "min": 0 } }, "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "orientation": "horizontal", "displayMode": "gradient", "showUnfilled": true } }, { "id": 10, "title": "NixOS Upgrade Timers", "type": "table", "gridPos": {"h": 8, "w": 12, "x": 12, "y": 10}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "systemd_timer_last_trigger_seconds{name=\"nixos-upgrade.timer\", hostname=~\"$hostname\"}", "format": "table", "instant": true, "refId": "last" }, { "expr": "time() - systemd_timer_last_trigger_seconds{name=\"nixos-upgrade.timer\", hostname=~\"$hostname\"}", "format": "table", "instant": true, "refId": "ago" } ], "fieldConfig": { "defaults": {}, "overrides": [ { "matcher": {"id": "byName", "options": "Host"}, "properties": [{"id": "custom.width", "value": 130}] }, { "matcher": {"id": "byName", "options": "Last Trigger"}, "properties": [ {"id": "unit", "value": "dateTimeAsLocalNoDateIfToday"}, {"id": "custom.width", "value": 180} ] }, { "matcher": {"id": "byName", "options": "Time Ago"}, "properties": [ {"id": "unit", "value": "s"}, {"id": "custom.width", "value": 120}, {"id": "thresholds", "value": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}, {"id": "custom.cellOptions", "value": {"type": "color-text"}} ] } ] }, "options": { "showHeader": true, "sortBy": [{"displayName": "Time Ago", "desc": true}] }, "transformations": [ { "id": "joinByField", "options": {"byField": "hostname", "mode": "outer"} }, { "id": "organize", "options": { "excludeByName": { "Time": true, "Time 1": true, "__name__": true, "__name__ 1": true, "dns_role": true, "dns_role 1": true, "instance": true, "instance 1": true, "job": true, "job 1": true, "name": true, "name 1": true, "role": true, "role 1": true, "tier": true, "tier 1": true }, "indexByName": { "hostname": 0, "Value #last": 1, "Value #ago": 2 }, "renameByName": { "hostname": "Host", "Value #last": "Last Trigger", "Value #ago": "Time Ago" } } } ], "description": "When nixos-upgrade.timer last ran on each host. Yellow >24h, Red >48h." }, { "id": 11, "title": "Backup Timers", "type": "table", "gridPos": {"h": 6, "w": 12, "x": 0, "y": 18}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "systemd_timer_last_trigger_seconds{name=~\"restic.*\", hostname=~\"$hostname\"}", "format": "table", "instant": true, "refId": "last" }, { "expr": "time() - systemd_timer_last_trigger_seconds{name=~\"restic.*\", hostname=~\"$hostname\"}", "format": "table", "instant": true, "refId": "ago" } ], "fieldConfig": { "defaults": {}, "overrides": [ { "matcher": {"id": "byName", "options": "Host"}, "properties": [{"id": "custom.width", "value": 120}] }, { "matcher": {"id": "byName", "options": "Timer"}, "properties": [{"id": "custom.width", "value": 220}] }, { "matcher": {"id": "byName", "options": "Last Trigger"}, "properties": [ {"id": "unit", "value": "dateTimeAsLocalNoDateIfToday"}, {"id": "custom.width", "value": 180} ] }, { "matcher": {"id": "byName", "options": "Time Ago"}, "properties": [ {"id": "unit", "value": "s"}, {"id": "custom.width", "value": 100}, {"id": "thresholds", "value": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}, {"id": "custom.cellOptions", "value": {"type": "color-text"}} ] } ] }, "options": { "showHeader": true, "sortBy": [{"displayName": "Time Ago", "desc": true}] }, "transformations": [ { "id": "joinByField", "options": {"byField": "name", "mode": "outer"} }, { "id": "organize", "options": { "excludeByName": { "Time": true, "Time 1": true, "__name__": true, "__name__ 1": true, "dns_role": true, "dns_role 1": true, "instance": true, "instance 1": true, "job": true, "job 1": true, "role": true, "role 1": true, "tier": true, "tier 1": true, "hostname 1": true }, "indexByName": { "hostname": 0, "name": 1, "Value #last": 2, "Value #ago": 3 }, "renameByName": { "hostname": "Host", "name": "Timer", "Value #last": "Last Trigger", "Value #ago": "Time Ago" } } } ], "description": "Restic backup timers" }, { "id": 12, "title": "Service Restarts Over Time", "type": "timeseries", "gridPos": {"h": 6, "w": 12, "x": 12, "y": 18}, "datasource": {"type": "prometheus", "uid": "prometheus"}, "targets": [ { "expr": "sum by (hostname) (increase(systemd_service_restart_total{hostname=~\"$hostname\"}[1h]))", "legendFormat": "{{hostname}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "custom": { "lineWidth": 1, "fillOpacity": 20, "showPoints": "never", "stacking": {"mode": "normal"} } } }, "options": { "legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"} }, "description": "Service restart rate per hour" } ] }