diff --git a/services/grafana/dashboards/nixos-fleet.json b/services/grafana/dashboards/nixos-fleet.json index f7ff960..722d5dd 100644 --- a/services/grafana/dashboards/nixos-fleet.json +++ b/services/grafana/dashboards/nixos-fleet.json @@ -628,6 +628,322 @@ } ], "description": "Distribution of hosts by tier (test vs prod)" + }, + { + "id": 15, + "title": "Build Service", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 36}, + "collapsed": false + }, + { + "id": 16, + "title": "Builds (24h)", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 37}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(increase(homelab_deploy_build_host_total{status=\"success\"}[24h]))", + "legendFormat": "Builds", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [{"color": "green", "value": null}] + }, + "noValue": "0", + "decimals": 0 + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none" + }, + "description": "Successful host builds in the last 24 hours" + }, + { + "id": 17, + "title": "Failed Builds (24h)", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 37}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(increase(homelab_deploy_build_host_total{status=\"failure\"}[24h])) or vector(0)", + "legendFormat": "Failed", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 1}, + {"color": "red", "value": 5} + ] + }, + "noValue": "0", + "decimals": 0 + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none" + }, + "description": "Failed host builds in the last 24 hours" + }, + { + "id": 18, + "title": "Last Build", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 37}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "time() - max(homelab_deploy_build_last_timestamp)", + "legendFormat": "Last Build", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 86400}, + {"color": "red", "value": 604800} + ] + }, + "noValue": "-" + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none" + }, + "description": "Time since last build attempt (yellow >1d, red >7d)" + }, + { + "id": 19, + "title": "Avg Build Time", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 37}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(increase(homelab_deploy_build_duration_seconds_sum[24h])) / sum(increase(homelab_deploy_build_duration_seconds_count[24h]))", + "legendFormat": "Avg Time", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 30}, + {"color": "red", "value": 60} + ] + }, + "noValue": "-" + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none" + }, + "description": "Average build duration per host over the last 24 hours" + }, + { + "id": 20, + "title": "Total Hosts Built", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 37}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "count(homelab_deploy_build_duration_seconds_count)", + "legendFormat": "Hosts", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [{"color": "blue", "value": null}] + }, + "noValue": "0" + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none" + }, + "description": "Total number of unique hosts that have been built" + }, + { + "id": 21, + "title": "Build Jobs (24h)", + "type": "stat", + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 37}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(increase(homelab_deploy_builds_total[24h]))", + "legendFormat": "Jobs", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [{"color": "purple", "value": null}] + }, + "noValue": "0", + "decimals": 0 + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "colorMode": "value", + "graphMode": "none" + }, + "description": "Total build jobs (each job may build multiple hosts) in the last 24 hours" + }, + { + "id": 22, + "title": "Build Time by Host", + "type": "bargauge", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 41}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sort_desc(homelab_deploy_build_duration_seconds_sum / homelab_deploy_build_duration_seconds_count)", + "legendFormat": "{{host}}", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 15}, + {"color": "orange", "value": 25}, + {"color": "red", "value": 45} + ] + }, + "min": 0 + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "orientation": "horizontal", + "displayMode": "gradient", + "showUnfilled": true + }, + "description": "Average build time per host (green <15s, yellow <25s, orange <45s, red >45s)" + }, + { + "id": 23, + "title": "Build Count by Host", + "type": "bargauge", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 41}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sort_desc(sum by (host) (homelab_deploy_build_host_total))", + "legendFormat": "{{host}}", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": null}, + {"color": "purple", "value": 10} + ] + }, + "min": 0 + } + }, + "options": { + "reduceOptions": {"calcs": ["lastNotNull"]}, + "orientation": "horizontal", + "displayMode": "gradient", + "showUnfilled": true + }, + "description": "Total build count per host (all time)" + }, + { + "id": 24, + "title": "Build Activity", + "type": "timeseries", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 49}, + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(increase(homelab_deploy_build_host_total{status=\"success\"}[1h]))", + "legendFormat": "Successful", + "refId": "A" + }, + { + "expr": "sum(increase(homelab_deploy_build_host_total{status=\"failure\"}[1h]))", + "legendFormat": "Failed", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": { + "lineWidth": 1, + "fillOpacity": 30, + "showPoints": "never", + "stacking": {"mode": "none"} + } + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "Successful"}, + "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}] + }, + { + "matcher": {"id": "byName", "options": "Failed"}, + "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}] + } + ] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "description": "Build activity over time (successful vs failed builds per hour)" } ] }