2026-01-21 13:37:36 -03:00
# services/monitoring/grafana-dashboard-jobs.yaml
2026-01-18 02:50:07 -03:00
apiVersion : v1
kind : ConfigMap
metadata :
2026-01-21 13:37:36 -03:00
name : grafana-dashboard-jobs
2026-01-18 02:50:07 -03:00
labels :
grafana_dashboard : "1"
data :
2026-01-21 13:37:36 -03:00
atlas-jobs.json : |
2026-01-18 02:50:07 -03:00
{
2026-01-21 13:37:36 -03:00
"uid": "atlas-jobs" ,
"title": "Atlas Jobs" ,
2026-01-18 02:50:07 -03:00
"folderUid": "atlas-internal" ,
"editable": true ,
"panels": [
{
"id": 1 ,
2026-01-21 13:37:36 -03:00
"type": "bargauge" ,
2026-01-21 14:30:55 -03:00
"title": "Ariadne Task Errors (range)" ,
2026-01-21 13:37:36 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 7 ,
2026-01-21 14:30:55 -03:00
"w": 8 ,
2026-01-21 13:37:36 -03:00
"x": 0 ,
"y": 0
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range])))" ,
2026-01-21 13:37:36 -03:00
"refId": "A" ,
"legendFormat": "{{task}}" ,
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
"unit": "none" ,
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
"value": 1
},
{
"color": "orange" ,
"value": 3
},
{
"color": "red" ,
"value": 5
}
]
}
},
"overrides": [ ]
},
"options": {
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
},
{
"id": 2 ,
"type": "timeseries" ,
2026-01-21 15:12:53 -03:00
"title": "Ariadne Attempts / Failures" ,
2026-01-21 13:37:36 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 7 ,
2026-01-21 14:30:55 -03:00
"w": 8 ,
"x": 8 ,
2026-01-21 13:37:36 -03:00
"y": 0
},
"targets": [
{
2026-01-21 14:30:55 -03:00
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))" ,
2026-01-21 13:37:36 -03:00
"refId": "A" ,
"legendFormat": "Attempts"
},
2026-01-21 14:30:55 -03:00
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))" ,
2026-01-21 15:12:53 -03:00
"refId": "B" ,
2026-01-21 13:37:36 -03:00
"legendFormat": "Failures"
}
] ,
"fieldConfig": {
"defaults": {
"unit": "none"
},
2026-01-21 14:30:55 -03:00
"overrides": [
{
"matcher": {
"id": "byName" ,
2026-01-21 15:12:53 -03:00
"options": "Attempts"
2026-01-21 14:30:55 -03:00
},
"properties": [
{
"id": "color" ,
"value": {
"mode": "fixed" ,
2026-01-21 15:12:53 -03:00
"fixedColor": "green"
2026-01-21 14:30:55 -03:00
}
}
]
},
{
"matcher": {
"id": "byName" ,
"options": "Failures"
},
"properties": [
{
"id": "color" ,
"value": {
"mode": "fixed" ,
"fixedColor": "red"
}
}
]
}
]
2026-01-21 13:37:36 -03:00
},
"options": {
"legend": {
"displayMode": "table" ,
"placement": "right"
},
"tooltip": {
"mode": "multi"
}
}
},
{
"id": 3 ,
"type": "bargauge" ,
"title": "One-off Job Pods (age hours)" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 7 ,
2026-01-21 14:30:55 -03:00
"w": 8 ,
"x": 16 ,
2026-01-21 13:37:36 -03:00
"y": 0
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))" ,
2026-01-21 13:37:36 -03:00
"refId": "A" ,
"legendFormat": "{{namespace}}/{{pod}}" ,
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
"unit": "h" ,
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
"value": 6
},
{
"color": "orange" ,
"value": 24
},
{
"color": "red" ,
"value": 48
}
]
2026-01-21 14:30:55 -03:00
},
"decimals": 2
2026-01-21 13:37:36 -03:00
},
"overrides": [ ]
},
"options": {
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
},
{
"id": "limit" ,
"options": {
"limit": 12
}
}
]
},
{
"id": 4 ,
2026-01-18 02:50:07 -03:00
"type": "stat" ,
"title": "Glue Jobs Stale (>36h)" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 4 ,
2026-01-21 13:37:36 -03:00
"w": 4 ,
2026-01-18 02:50:07 -03:00
"x": 0 ,
2026-01-21 13:37:36 -03:00
"y": 7
2026-01-18 02:50:07 -03:00
},
"targets": [
{
2026-01-21 14:30:55 -03:00
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)" ,
2026-01-18 02:50:07 -03:00
"refId": "A"
}
] ,
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
"value": 1
},
{
"color": "orange" ,
"value": 2
},
{
"color": "red" ,
"value": 3
}
]
},
"unit": "none" ,
"custom": {
"displayMode": "auto"
}
},
"overrides": [ ]
},
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
},
"textMode": "value"
}
},
{
2026-01-21 13:37:36 -03:00
"id": 5 ,
2026-01-21 11:29:29 -03:00
"type": "stat" ,
2026-01-18 02:50:07 -03:00
"title": "Glue Jobs Missing Success" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 4 ,
2026-01-21 11:29:29 -03:00
"w": 4 ,
"x": 4 ,
2026-01-21 13:37:36 -03:00
"y": 7
2026-01-18 02:50:07 -03:00
},
"targets": [
{
2026-01-21 14:30:55 -03:00
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)" ,
2026-01-21 11:29:29 -03:00
"refId": "A"
2026-01-18 02:50:07 -03:00
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 11:29:29 -03:00
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "rgba(115, 115, 115, 1)" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
]
},
2026-01-18 02:50:07 -03:00
"unit": "none" ,
"custom": {
2026-01-21 11:29:29 -03:00
"displayMode": "auto"
2026-01-18 02:50:07 -03:00
}
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
2026-01-18 02:50:07 -03:00
},
2026-01-21 11:29:29 -03:00
"textMode": "value"
}
2026-01-18 02:50:07 -03:00
},
{
2026-01-21 13:37:36 -03:00
"id": 6 ,
2026-01-21 11:29:29 -03:00
"type": "stat" ,
2026-01-18 02:50:07 -03:00
"title": "Glue Jobs Suspended" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 4 ,
2026-01-21 11:29:29 -03:00
"w": 4 ,
"x": 8 ,
2026-01-21 13:37:36 -03:00
"y": 7
2026-01-18 02:50:07 -03:00
},
"targets": [
{
2026-01-21 14:30:55 -03:00
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)" ,
2026-01-21 11:29:29 -03:00
"refId": "A"
2026-01-18 02:50:07 -03:00
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 11:29:29 -03:00
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "rgba(115, 115, 115, 1)" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
]
},
2026-01-18 02:50:07 -03:00
"unit": "none" ,
"custom": {
2026-01-21 11:29:29 -03:00
"displayMode": "auto"
2026-01-18 02:50:07 -03:00
}
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
},
"textMode": "value"
}
},
{
2026-01-21 13:37:36 -03:00
"id": 7 ,
2026-01-21 11:29:29 -03:00
"type": "stat" ,
"title": "Ariadne Task Errors (1h)" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
2026-01-18 02:50:07 -03:00
},
2026-01-21 11:29:29 -03:00
"gridPos": {
"h": 4 ,
"w": 4 ,
"x": 12 ,
2026-01-21 13:37:36 -03:00
"y": 7
2026-01-21 11:29:29 -03:00
},
"targets": [
2026-01-18 02:50:07 -03:00
{
2026-01-21 11:29:29 -03:00
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))" ,
"refId": "A"
}
] ,
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "rgba(115, 115, 115, 1)" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
]
},
"unit": "none" ,
"custom": {
"displayMode": "auto"
}
2026-01-18 02:50:07 -03:00
},
2026-01-21 11:29:29 -03:00
"overrides": [ ]
},
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
},
"textMode": "value"
}
},
{
2026-01-21 13:37:36 -03:00
"id": 8 ,
2026-01-21 11:29:29 -03:00
"type": "stat" ,
"title": "Ariadne Task Errors (24h)" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 4 ,
"w": 4 ,
"x": 16 ,
2026-01-21 13:37:36 -03:00
"y": 7
2026-01-21 11:29:29 -03:00
},
"targets": [
2026-01-18 02:50:07 -03:00
{
2026-01-21 11:29:29 -03:00
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[24h]))" ,
"refId": "A"
2026-01-18 02:50:07 -03:00
}
2026-01-21 11:29:29 -03:00
] ,
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "rgba(115, 115, 115, 1)" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
]
},
"unit": "none" ,
"custom": {
"displayMode": "auto"
}
},
"overrides": [ ]
},
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
},
"textMode": "value"
}
2026-01-18 02:50:07 -03:00
},
{
2026-01-21 13:37:36 -03:00
"id": 9 ,
2026-01-21 11:29:29 -03:00
"type": "stat" ,
"title": "Ariadne Task Runs (1h)" ,
2026-01-18 02:50:07 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 4 ,
2026-01-21 11:29:29 -03:00
"w": 4 ,
"x": 20 ,
2026-01-21 13:37:36 -03:00
"y": 7
2026-01-18 02:50:07 -03:00
},
"targets": [
{
2026-01-21 11:29:29 -03:00
"expr": "sum(increase(ariadne_task_runs_total[1h]))" ,
"refId": "A"
2026-01-18 02:50:07 -03:00
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 11:29:29 -03:00
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "rgba(115, 115, 115, 1)" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
]
},
2026-01-18 02:50:07 -03:00
"unit": "none" ,
"custom": {
2026-01-21 11:29:29 -03:00
"displayMode": "auto"
2026-01-18 02:50:07 -03:00
}
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
},
"textMode": "value"
}
},
{
2026-01-21 13:37:36 -03:00
"id": 10 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
2026-01-21 13:37:36 -03:00
"title": "Ariadne Schedule Last Error (hours ago)" ,
2026-01-18 02:50:07 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 14:30:55 -03:00
"h": 6 ,
2026-01-18 02:50:07 -03:00
"w": 12 ,
"x": 0 ,
2026-01-21 14:30:55 -03:00
"y": 17
2026-01-18 02:50:07 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)" ,
2026-01-18 02:50:07 -03:00
"refId": "A" ,
2026-01-21 11:29:29 -03:00
"legendFormat": "{{task}}" ,
2026-01-18 02:50:07 -03:00
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 13:37:36 -03:00
"unit": "h" ,
2026-01-21 11:29:29 -03:00
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
2026-01-21 13:37:36 -03:00
"color": "red" ,
2026-01-21 11:29:29 -03:00
"value": null
},
{
2026-01-21 13:37:36 -03:00
"color": "orange" ,
2026-01-21 11:29:29 -03:00
"value": 1
},
{
2026-01-21 13:37:36 -03:00
"color": "yellow" ,
"value": 6
2026-01-21 11:29:29 -03:00
},
{
2026-01-21 13:37:36 -03:00
"color": "green" ,
"value": 24
2026-01-21 11:29:29 -03:00
}
]
2026-01-21 14:30:55 -03:00
},
"decimals": 2
2026-01-18 02:50:07 -03:00
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
2026-01-18 02:50:07 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
2026-01-21 15:01:02 -03:00
"order": "desc"
2026-01-18 02:50:07 -03:00
}
}
]
2026-01-21 11:29:29 -03:00
},
{
2026-01-21 13:37:36 -03:00
"id": 11 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
2026-01-21 13:37:36 -03:00
"title": "Ariadne Schedule Last Success (hours ago)" ,
2026-01-18 02:50:07 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 14:30:55 -03:00
"h": 6 ,
2026-01-18 02:50:07 -03:00
"w": 12 ,
"x": 12 ,
2026-01-21 14:30:55 -03:00
"y": 17
2026-01-18 02:50:07 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)" ,
2026-01-18 02:50:07 -03:00
"refId": "A" ,
2026-01-21 11:29:29 -03:00
"legendFormat": "{{task}}" ,
2026-01-18 02:50:07 -03:00
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 13:37:36 -03:00
"unit": "h" ,
2026-01-21 11:29:29 -03:00
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
2026-01-21 13:37:36 -03:00
"color": "green" ,
2026-01-21 11:29:29 -03:00
"value": null
},
{
2026-01-21 13:37:36 -03:00
"color": "yellow" ,
"value": 6
2026-01-21 11:29:29 -03:00
},
{
2026-01-21 13:37:36 -03:00
"color": "orange" ,
"value": 24
2026-01-21 11:29:29 -03:00
},
{
2026-01-21 13:37:36 -03:00
"color": "red" ,
"value": 48
2026-01-21 11:29:29 -03:00
}
]
2026-01-21 14:30:55 -03:00
},
"decimals": 2
2026-01-18 02:50:07 -03:00
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
2026-01-18 02:50:07 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
2026-01-19 16:58:02 -03:00
},
2026-01-21 02:57:40 -03:00
{
2026-01-21 13:37:36 -03:00
"id": 12 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
2026-01-21 13:37:36 -03:00
"title": "Glue Jobs Last Success (hours ago)" ,
2026-01-21 02:57:40 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 14:30:55 -03:00
"h": 6 ,
2026-01-21 11:29:29 -03:00
"w": 12 ,
2026-01-21 02:57:40 -03:00
"x": 0 ,
2026-01-21 14:30:55 -03:00
"y": 23
2026-01-21 02:57:40 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)" ,
2026-01-21 02:57:40 -03:00
"refId": "A" ,
2026-01-21 13:37:36 -03:00
"legendFormat": "{{namespace}}/{{cronjob}}" ,
2026-01-21 11:29:29 -03:00
"instant": true
2026-01-21 02:57:40 -03:00
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 11:29:29 -03:00
"unit": "h" ,
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
2026-01-21 13:37:36 -03:00
"color": "green" ,
2026-01-21 11:29:29 -03:00
"value": null
},
{
"color": "yellow" ,
"value": 6
},
{
2026-01-21 13:37:36 -03:00
"color": "orange" ,
2026-01-21 11:29:29 -03:00
"value": 24
2026-01-21 13:37:36 -03:00
},
{
"color": "red" ,
"value": 48
2026-01-21 11:29:29 -03:00
}
]
2026-01-21 14:30:55 -03:00
},
"decimals": 2
2026-01-21 02:57:40 -03:00
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
2026-01-21 02:57:40 -03:00
}
2026-01-21 11:29:29 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
2026-01-21 02:57:40 -03:00
},
2026-01-19 16:58:02 -03:00
{
2026-01-21 13:37:36 -03:00
"id": 13 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
2026-01-21 13:37:36 -03:00
"title": "Glue Jobs Last Schedule (hours ago)" ,
2026-01-19 16:58:02 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 14:30:55 -03:00
"h": 6 ,
2026-01-19 16:58:02 -03:00
"w": 12 ,
2026-01-21 11:29:29 -03:00
"x": 12 ,
2026-01-21 14:30:55 -03:00
"y": 23
2026-01-19 16:58:02 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)" ,
2026-01-19 16:58:02 -03:00
"refId": "A" ,
2026-01-21 13:37:36 -03:00
"legendFormat": "{{namespace}}/{{cronjob}}" ,
2026-01-19 16:58:02 -03:00
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 11:29:29 -03:00
"unit": "h" ,
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
"value": 6
},
{
"color": "orange" ,
"value": 24
},
{
"color": "red" ,
"value": 48
}
]
2026-01-21 14:30:55 -03:00
},
"decimals": 2
2026-01-19 16:58:02 -03:00
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
2026-01-19 16:58:02 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
},
{
2026-01-21 13:37:36 -03:00
"id": 14 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
2026-01-21 13:37:36 -03:00
"title": "Ariadne Task Errors (1h)" ,
2026-01-19 16:58:02 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 14:30:55 -03:00
"h": 6 ,
2026-01-19 16:58:02 -03:00
"w": 12 ,
2026-01-21 11:29:29 -03:00
"x": 0 ,
2026-01-21 14:30:55 -03:00
"y": 29
2026-01-19 16:58:02 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h])))" ,
2026-01-19 16:58:02 -03:00
"refId": "A" ,
2026-01-21 13:37:36 -03:00
"legendFormat": "{{task}}" ,
2026-01-19 16:58:02 -03:00
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 13:37:36 -03:00
"unit": "none" ,
2026-01-21 11:29:29 -03:00
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
2026-01-21 13:37:36 -03:00
"value": 1
2026-01-21 11:29:29 -03:00
},
{
"color": "orange" ,
2026-01-21 13:37:36 -03:00
"value": 3
2026-01-21 11:29:29 -03:00
},
{
"color": "red" ,
2026-01-21 13:37:36 -03:00
"value": 5
2026-01-21 11:29:29 -03:00
}
]
2026-01-19 16:58:02 -03:00
}
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
2026-01-19 16:58:02 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
},
{
2026-01-21 13:37:36 -03:00
"id": 15 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
2026-01-21 13:37:36 -03:00
"title": "Ariadne Task Errors (30d)" ,
2026-01-19 16:58:02 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 14:30:55 -03:00
"h": 6 ,
2026-01-21 02:57:40 -03:00
"w": 12 ,
"x": 12 ,
2026-01-21 14:30:55 -03:00
"y": 29
2026-01-19 16:58:02 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d])))" ,
2026-01-19 16:58:02 -03:00
"refId": "A" ,
2026-01-21 13:37:36 -03:00
"legendFormat": "{{task}}" ,
2026-01-19 16:58:02 -03:00
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 13:37:36 -03:00
"unit": "none" ,
2026-01-21 11:29:29 -03:00
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
2026-01-21 13:37:36 -03:00
"value": 1
2026-01-21 11:29:29 -03:00
},
{
"color": "orange" ,
2026-01-21 13:37:36 -03:00
"value": 3
2026-01-21 11:29:29 -03:00
},
{
"color": "red" ,
2026-01-21 13:37:36 -03:00
"value": 5
2026-01-21 11:29:29 -03:00
}
]
2026-01-19 16:58:02 -03:00
}
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
2026-01-19 16:58:02 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
2026-01-20 23:03:39 -03:00
},
2026-01-21 02:57:40 -03:00
{
2026-01-21 13:37:36 -03:00
"id": 16 ,
2026-01-21 11:29:29 -03:00
"type": "bargauge" ,
"title": "Ariadne Access Requests" ,
2026-01-21 02:57:40 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 6 ,
2026-01-21 11:29:29 -03:00
"w": 8 ,
2026-01-21 02:57:40 -03:00
"x": 0 ,
2026-01-21 14:30:55 -03:00
"y": 11
2026-01-21 02:57:40 -03:00
},
"targets": [
{
2026-01-21 15:12:53 -03:00
"expr": "sort_desc(ariadne_access_requests_total)" ,
2026-01-21 02:57:40 -03:00
"refId": "A" ,
2026-01-21 11:29:29 -03:00
"legendFormat": "{{status}}" ,
2026-01-21 02:57:40 -03:00
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
2026-01-21 11:29:29 -03:00
"unit": "none" ,
"min": 0 ,
"max": null ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
"value": 50
},
{
"color": "orange" ,
"value": 70
},
{
"color": "red" ,
"value": 85
}
]
2026-01-21 02:57:40 -03:00
}
},
"overrides": [ ]
},
"options": {
2026-01-21 11:29:29 -03:00
"displayMode": "gradient" ,
"orientation": "horizontal" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
2026-01-21 02:57:40 -03:00
},
"transformations": [
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
]
},
2026-01-20 23:03:39 -03:00
{
2026-01-21 13:37:36 -03:00
"id": 17 ,
2026-01-20 23:03:39 -03:00
"type": "stat" ,
2026-04-04 01:33:15 -03:00
"title": "Platform Test Success Rate (30d)" ,
2026-01-20 23:03:39 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2026-01-21 11:29:29 -03:00
"h": 6 ,
"w": 4 ,
"x": 8 ,
2026-01-21 14:30:55 -03:00
"y": 11
2026-01-20 23:03:39 -03:00
},
"targets": [
{
2026-04-09 01:41:02 -03:00
"expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d])) or on() vector(0))), 1)" ,
2026-01-20 23:03:39 -03:00
"refId": "A" ,
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
"color": "rgba(115, 115, 115, 1)" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
]
},
"unit": "percent" ,
"custom": {
"displayMode": "auto"
},
2026-04-04 01:33:15 -03:00
"decimals": 2
2026-01-20 23:03:39 -03:00
},
"overrides": [ ]
},
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
},
"textMode": "value"
2026-03-31 14:51:49 -03:00
},
2026-04-04 01:33:15 -03:00
"description": "Internal rollup across Ariadne task runs and Metis build/flash outcomes."
2026-01-20 23:03:39 -03:00
},
{
2026-01-21 13:37:36 -03:00
"id": 18 ,
2026-01-20 23:03:39 -03:00
"type": "table" ,
2026-04-04 01:33:15 -03:00
"title": "Platform Test Activity (30d)" ,
2026-01-20 23:03:39 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 6 ,
2026-01-21 11:29:29 -03:00
"w": 12 ,
"x": 12 ,
2026-01-21 14:30:55 -03:00
"y": 11
2026-01-20 23:03:39 -03:00
},
"targets": [
{
2026-04-09 01:41:02 -03:00
"expr": "label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), \"source\", \"ariadne\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(metis_builds_total[30d])), \"source\", \"metis-build\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(metis_flashes_total[30d])), \"source\", \"metis-flash\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d])), \"source\", \"ananke-quality\", \"__name__\", \".*\")" ,
2026-01-20 23:03:39 -03:00
"refId": "A" ,
"instant": true
}
] ,
"fieldConfig": {
"defaults": {
"unit": "none" ,
"custom": {
"filterable": true
}
},
"overrides": [ ]
},
"options": {
"showHeader": true ,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields" ,
"options": {}
},
{
"id": "sortBy" ,
"options": {
"fields": [
"Value"
] ,
"order": "desc"
}
}
2026-03-31 14:51:49 -03:00
] ,
2026-04-04 01:33:15 -03:00
"description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters."
2026-04-08 23:33:17 -03:00
},
{
"id": 19 ,
"type": "timeseries" ,
"title": "Platform Test Success Rate by Suite" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 6 ,
"w": 16 ,
"x": 8 ,
"y": 17
},
"targets": [
{
"refId": "A" ,
2026-04-09 16:14:26 -03:00
"expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)" ,
2026-04-09 15:21:59 -03:00
"legendFormat": "ariadne"
},
{
"refId": "B" ,
2026-04-09 16:14:26 -03:00
"expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)" ,
2026-04-09 15:21:59 -03:00
"legendFormat": "metis"
},
{
"refId": "C" ,
2026-04-09 16:14:26 -03:00
"expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)" ,
2026-04-09 15:21:59 -03:00
"legendFormat": "ananke"
2026-04-09 16:16:35 -03:00
},
{
"refId": "D" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[30d]))), 1)" ,
"legendFormat": "atlasbot"
},
{
"refId": "E" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[30d]))), 1)" ,
"legendFormat": "lesavka"
},
{
"refId": "F" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[30d]))), 1)" ,
"legendFormat": "pegasus"
},
{
"refId": "G" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[30d]))), 1)" ,
"legendFormat": "soteria"
},
{
"refId": "H" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[30d]))), 1)" ,
"legendFormat": "titan-iac"
},
{
"refId": "I" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[30d]))), 1)" ,
"legendFormat": "bstein-home"
},
{
"refId": "J" ,
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[30d]))), 1)" ,
"legendFormat": "arcanagon"
2026-04-08 23:33:17 -03:00
}
] ,
"fieldConfig": {
"defaults": {
2026-04-09 14:56:43 -03:00
"unit": "percent" ,
"min": 0 ,
"max": 100
2026-04-08 23:33:17 -03:00
},
"overrides": [ ]
},
"options": {
"legend": {
"displayMode": "list" ,
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
},
2026-04-09 16:16:35 -03:00
"description": "Application-level rolling pass percentage over the last 30 days. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
2026-01-18 02:50:07 -03:00
}
] ,
"time": {
"from": "now-7d" ,
"to": "now"
},
"annotations": {
"list": [ ]
},
"schemaVersion": 39 ,
"style": "dark" ,
"tags": [
"atlas" ,
2026-01-21 13:37:36 -03:00
"jobs" ,
"glue"
2026-01-18 02:50:07 -03:00
]
}