diff --git a/README.md b/README.md index 836514ba..8ed78118 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,41 @@ # GitHub runner operators - ![WIP](https://img.shields.io/badge/status-WIP-yellow) A monorepo containing charms to operate Self-Hosted GitHub Action Runners. -At the moment, it contains initial code for the `webhook-gateway` -application, that receives and forwards GitHub webhooks to an AMQP queue. +## Repository layout + +``` +charms/ + planner-operator/ # Juju charm: GitHub runner planner + cos_custom/ + grafana_dashboards/ # Grafana dashboards for the planner charm + # (served via cos-configuration-k8s, path: charms/planner-operator/cos_custom/grafana_dashboards) + webhook-gateway-operator/ # Juju charm: GitHub webhook gateway + +runner_grafana_dashboards/ # Grafana dashboards for runner VM host metrics + # (served via cos-configuration-k8s, path: runner_grafana_dashboards) +``` + +## Observability: Grafana dashboards + +Dashboards in this repo are delivered to Grafana through +[`cos-configuration-k8s`](https://charmhub.io/cos-configuration-k8s), which syncs +JSON files from this Git repository and provisions them via the `grafana-dashboard` +relation. Provisioned dashboards are **immutable** in Grafana regardless of user +role — they cannot be edited or deleted through the UI. + +### Conventions + +| Directory | Purpose | `grafana_dashboards_path` config value | +|---|---|---| +| `charms//cos_custom/grafana_dashboards/` | Dashboards for a specific charm's workload metrics | `charms//cos_custom/grafana_dashboards` | +| `runner_grafana_dashboards/` | Dashboards for runner VM host-level metrics (CPU, memory, disk, network) | `runner_grafana_dashboards` | + +Dashboard JSON files should use `__inputs` to declare the datasource (type `prometheus`). +Setting `"editable": false` is recommended for clarity, but is not strictly required: +dashboards delivered through `cos-configuration-k8s` are filesystem-provisioned and +therefore read-only in Grafana regardless of the JSON flag. Metric names follow the +[OpenTelemetry hostmetrics receiver](https://opentelemetry.io/docs/collector/components/#receiver) +Prometheus naming convention (e.g. `system_cpu_time_seconds_total`). diff --git a/runner_grafana_dashboards/runner_vm_hostmetrics.json b/runner_grafana_dashboards/runner_vm_hostmetrics.json new file mode 100644 index 00000000..043ca229 --- /dev/null +++ b/runner_grafana_dashboards/runner_vm_hostmetrics.json @@ -0,0 +1,1100 @@ +{ + "__inputs": [ + { + "name": "prometheusds", + "label": "Prometheus / Mimir", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Host-level resource metrics (CPU, memory, disk, filesystem, network) for GitHub Actions runner VMs, based on the OpenTelemetry hostmetrics receiver. Filter by repository, workflow, job and runner to inspect a specific run, or pick \"All\" on a variable to widen the scope.", + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "#EAB839", "value": 0.6}, + {"color": "red", "value": 0.8} + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "id": 1, + "options": { + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "CPU", + "type": "gauge", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "1 - avg(rate(system_cpu_time_seconds_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"idle\"}[$__rate_interval]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "#EAB839", "value": 0.6}, + {"color": "red", "value": 0.8} + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "id": 2, + "options": { + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "Memory", + "type": "gauge", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "1 - (sum(system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"free\"}) / sum(system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "#EAB839", "value": 0.7}, + {"color": "red", "value": 0.9} + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "id": 3, + "options": { + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "Root FS", + "type": "gauge", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "sum(system_filesystem_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",mountpoint=\"/\",state=\"used\"}) / sum(system_filesystem_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",mountpoint=\"/\",state=~\"used|free\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "title": "Load 1m", + "type": "stat", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_cpu_load_average_1m{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "title": "CPU Cores", + "type": "stat", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "count(count by (cpu) (system_cpu_time_seconds_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "title": "Total Memory", + "type": "stat", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "sum(system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 110, + "panels": [], + "title": "CPU", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "id": 11, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "CPU Utilization", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "1 - avg(rate(system_cpu_time_seconds_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"idle\"}[$__rate_interval]))", + "legendFormat": "CPU Utilization", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "cores"}, + "properties": [ + {"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}, + {"id": "custom.lineStyle", "value": {"dash": [10, 10], "fill": "dash"}} + ] + } + ] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "id": 12, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "System Load", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_cpu_load_average_1m{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}", + "legendFormat": "1m", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_cpu_load_average_5m{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}", + "legendFormat": "5m", + "range": true, + "refId": "B" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_cpu_load_average_15m{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}", + "legendFormat": "15m", + "range": true, + "refId": "C" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "count(count by (cpu) (system_cpu_time_seconds_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}))", + "legendFormat": "cores", + "range": true, + "refId": "D" + } + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 120, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 50, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "normal"} + }, + "mappings": [], + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 15}, + "id": 21, + "options": { + "legend": {"calcs": ["mean", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Memory Usage", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"used\"}", + "legendFormat": "used", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"cached\"}", + "legendFormat": "cached", + "range": true, + "refId": "B" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"buffered\"}", + "legendFormat": "buffered", + "range": true, + "refId": "C" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"free\"}", + "legendFormat": "free", + "range": true, + "refId": "D" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 15}, + "id": 22, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Memory Utilization", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "1 - (sum(system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"free\"}) / sum(system_memory_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}))", + "legendFormat": "Memory Utilization", + "range": true, + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 130, + "panels": [], + "title": "Disk I/O", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": true, + "axisLabel": "read (-) / write (+)", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": {"id": "byRegexp", "options": "/.*read/"}, + "properties": [{"id": "custom.transform", "value": "negative-Y"}] + } + ] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "id": 31, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Disk I/O Throughput", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_disk_io_bytes_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"read\"}[$__rate_interval])", + "legendFormat": "{{device}} read", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_disk_io_bytes_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"write\"}[$__rate_interval])", + "legendFormat": "{{device}} write", + "range": true, + "refId": "B" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": true, + "axisLabel": "read (-) / write (+)", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "iops" + }, + "overrides": [ + { + "matcher": {"id": "byRegexp", "options": "/.*read/"}, + "properties": [{"id": "custom.transform", "value": "negative-Y"}] + } + ] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "id": 32, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Disk IOPS", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_disk_operations_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"read\"}[$__rate_interval])", + "legendFormat": "{{device}} read", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_disk_operations_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"write\"}[$__rate_interval])", + "legendFormat": "{{device}} write", + "range": true, + "refId": "B" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 32}, + "id": 33, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Disk Busy %", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_disk_io_time_seconds_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}[$__rate_interval])", + "legendFormat": "{{device}}", + "range": true, + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 40}, + "id": 140, + "panels": [], + "title": "Filesystem", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "#EAB839", "value": 0.7}, + {"color": "red", "value": 0.9} + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 41}, + "id": 41, + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "showUnfilled": true, + "valueMode": "color" + }, + "title": "Filesystem Utilization", + "type": "bargauge", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "sum by (mountpoint) (system_filesystem_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"used\"}) / sum by (mountpoint) (system_filesystem_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=~\"used|free\"})", + "legendFormat": "{{mountpoint}}", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 50, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 41}, + "id": 42, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Filesystem Usage", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_filesystem_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"used\"}", + "legendFormat": "{{mountpoint}} used", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "system_filesystem_usage_bytes{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",state=\"free\"}", + "legendFormat": "{{mountpoint}} free", + "range": true, + "refId": "B" + } + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 49}, + "id": 150, + "panels": [], + "title": "Network", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": true, + "axisLabel": "rx (-) / tx (+)", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bps" + }, + "overrides": [ + { + "matcher": {"id": "byRegexp", "options": "/.*receive/"}, + "properties": [{"id": "custom.transform", "value": "negative-Y"}] + } + ] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 50}, + "id": 51, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Network Throughput", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_network_io_bytes_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"receive\"}[$__rate_interval]) * 8", + "legendFormat": "{{device}} receive", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_network_io_bytes_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"transmit\"}[$__rate_interval]) * 8", + "legendFormat": "{{device}} transmit", + "range": true, + "refId": "B" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": true, + "axisLabel": "rx (-) / tx (+)", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "pps" + }, + "overrides": [ + { + "matcher": {"id": "byRegexp", "options": "/.*receive/"}, + "properties": [{"id": "custom.transform", "value": "negative-Y"}] + } + ] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 50}, + "id": 52, + "options": { + "legend": {"calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Network Packets", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_network_packets_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"receive\"}[$__rate_interval])", + "legendFormat": "{{device}} receive", + "range": true, + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_network_packets_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\",direction=\"transmit\"}[$__rate_interval])", + "legendFormat": "{{device}} transmit", + "range": true, + "refId": "B" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 58}, + "id": 53, + "options": { + "legend": {"calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "Network Errors", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "rate(system_network_errors_total{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"}[$__rate_interval])", + "legendFormat": "{{device}} {{direction}} errors", + "range": true, + "refId": "A" + } + ] + }, + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"} + }, + "mappings": [], + "min": 0, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 58}, + "id": 54, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "title": "TCP Connections", + "type": "timeseries", + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "editorMode": "code", + "expr": "sum by (state) (system_network_connections{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\",github_runner=~\"$github_runner\"})", + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ] + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [ + "github-runner", + "hostmetrics", + "opentelemetry" + ], + "templating": { + "list": [ + { + "name": "prometheusds", + "type": "datasource", + "pluginId": "prometheus", + "query": "prometheus", + "label": "Prometheus", + "hide": 0, + "refresh": 1, + "includeAll": false, + "multi": false + }, + { + "name": "github_repository", + "type": "query", + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "label": "Repository", + "query": "label_values(system_cpu_load_average_1m, github_repository)", + "refresh": 2, + "sort": 1, + "includeAll": true, + "multi": false, + "allValue": ".*", + "hide": 0 + }, + { + "name": "github_workflow", + "type": "query", + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "label": "Workflow", + "query": "label_values(system_cpu_load_average_1m{github_repository=~\"$github_repository\"}, github_workflow)", + "refresh": 2, + "sort": 1, + "includeAll": true, + "multi": false, + "allValue": ".*", + "hide": 0 + }, + { + "name": "github_job", + "type": "query", + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "label": "Job", + "query": "label_values(system_cpu_load_average_1m{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\"}, github_job)", + "refresh": 2, + "sort": 1, + "includeAll": true, + "multi": false, + "allValue": ".*", + "hide": 0 + }, + { + "name": "github_runner", + "type": "query", + "datasource": {"type": "prometheus", "uid": "${prometheusds}"}, + "label": "Runner", + "query": "label_values(system_cpu_load_average_1m{github_repository=~\"$github_repository\",github_workflow=~\"$github_workflow\",github_job=~\"$github_job\"}, github_runner)", + "refresh": 2, + "sort": 1, + "includeAll": true, + "multi": false, + "allValue": ".*", + "hide": 0 + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "GitHub Runner VM Hostmetrics", + "uid": "github-runner-vm-hostmetrics", + "version": 1 +}