From b6b1e533eda3723a7e8c2addf6b70d155079caac Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 12 Apr 2026 17:29:18 -0300 Subject: [PATCH] monitoring(jobs): add Ariadne schedule inventory signals --- scripts/dashboards_render_atlas.py | 78 +++++- .../monitoring/dashboards/atlas-jobs.json | 245 +++++++++++++++++- .../monitoring/grafana-dashboard-jobs.yaml | 245 +++++++++++++++++- 3 files changed, 541 insertions(+), 27 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index a6fe0ae0..7a42598e 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -415,13 +415,25 @@ ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="err ARIADNE_TASK_WARNINGS_SERIES = ( 'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)' ) -ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600" -ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600" +ARIADNE_SCHEDULE_TASK_FILTER = 'task=~"^schedule\\..+$"' +ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = ( + f"(time() - ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) / 3600" +) +ARIADNE_SCHEDULE_LAST_ERROR_HOURS = ( + f"(time() - ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) / 3600" +) ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = ( - "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600" + f"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600" ) ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = ( - "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600" + f"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600" +) +ARIADNE_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}" +ARIADNE_SCHEDULE_RUNS_RANGE = ( + f'sum by (task) (increase(ariadne_task_runs_total{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))' +) +ARIADNE_SCHEDULE_ERRORS_RANGE = ( + f'sum by (task) (increase(ariadne_task_runs_total{{status="error",{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))' ) ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total" PLATFORM_TEST_SUITE_NAMES = [ @@ -2812,6 +2824,14 @@ def build_jobs_dashboard(): {"color": "red", "value": 5}, ], } + schedule_status_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "yellow", "value": 0.5}, + {"color": "green", "value": 1}, + ], + } panels.append( bargauge_panel( @@ -2952,12 +2972,13 @@ def build_jobs_dashboard(): panels.append( bargauge_panel( 11, - "Ariadne Schedule Last Success (hours ago)", + "Ariadne Schedule Last Success (hours ago, newest first)", ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS, {"h": 6, "w": 12, "x": 12, "y": 17}, unit="h", instant=True, legend="{{task}}", + sort_order="asc", thresholds=age_thresholds, decimals=2, ) @@ -3072,6 +3093,53 @@ def build_jobs_dashboard(): "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published." ) panels.append(suite_panel) + status_panel = bargauge_panel( + 20, + "Ariadne Schedule Last Status", + ARIADNE_SCHEDULE_LAST_STATUS, + {"h": 8, "w": 8, "x": 0, "y": 35}, + unit="none", + instant=True, + legend="{{task}}", + sort_order="asc", + thresholds=schedule_status_thresholds, + decimals=0, + ) + status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error." + status_panel["fieldConfig"]["defaults"]["mappings"] = [ + { + "type": "value", + "options": { + "0": {"text": "error"}, + "1": {"text": "ok"}, + }, + } + ] + panels.append(status_panel) + schedule_runs_panel = bargauge_panel( + 21, + "Ariadne Schedule Runs (range)", + ARIADNE_SCHEDULE_RUNS_RANGE, + {"h": 8, "w": 8, "x": 8, "y": 35}, + unit="none", + instant=True, + legend="{{task}}", + thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]}, + ) + schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range." + panels.append(schedule_runs_panel) + schedule_errors_panel = bargauge_panel( + 22, + "Ariadne Schedule Errors (range)", + ARIADNE_SCHEDULE_ERRORS_RANGE, + {"h": 8, "w": 8, "x": 16, "y": 35}, + unit="none", + instant=True, + legend="{{task}}", + thresholds=task_error_thresholds, + ) + schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range." + panels.append(schedule_errors_panel) return { "uid": "atlas-jobs", diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index eb2ab3bb..6db70077 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -616,7 +616,7 @@ }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -678,7 +678,7 @@ { "id": 11, "type": "bargauge", - "title": "Ariadne Schedule Last Success (hours ago)", + "title": "Ariadne Schedule Last Success (hours ago, newest first)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -691,7 +691,7 @@ }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)", + "expr": "sort((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -745,7 +745,7 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } ] @@ -1268,37 +1268,37 @@ }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)", "legendFormat": "lesavka" }, { "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)", "legendFormat": "pegasus" }, { "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)", "legendFormat": "soteria" }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)", "legendFormat": "titan-iac" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)", "legendFormat": "bstein-home" }, { "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)", "legendFormat": "arcanagon" }, { @@ -1334,6 +1334,229 @@ } }, "description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published." + }, + { + "id": 20, + "type": "bargauge", + "title": "Ariadne Schedule Last Status", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 35 + }, + "targets": [ + { + "expr": "sort(ariadne_schedule_last_status{task=~\"^schedule\\..+$\"})", + "refId": "A", + "legendFormat": "{{task}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "green", + "value": 1 + } + ] + }, + "decimals": 0, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "error" + }, + "1": { + "text": "ok" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ], + "description": "1 means the last run was ok. 0 means the last run ended in error." + }, + { + "id": 21, + "type": "bargauge", + "title": "Ariadne Schedule Runs (range)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 35 + }, + "targets": [ + { + "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range])))", + "refId": "A", + "legendFormat": "{{task}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Number of runs by schedule task over the selected dashboard time range." + }, + { + "id": 22, + "type": "bargauge", + "title": "Ariadne Schedule Errors (range)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 35 + }, + "targets": [ + { + "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range])))", + "refId": "A", + "legendFormat": "{{task}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Error run count by schedule task over the selected dashboard time range." } ], "time": { diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index e47fcf6f..451fe8c8 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -625,7 +625,7 @@ data: }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -687,7 +687,7 @@ data: { "id": 11, "type": "bargauge", - "title": "Ariadne Schedule Last Success (hours ago)", + "title": "Ariadne Schedule Last Success (hours ago, newest first)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -700,7 +700,7 @@ data: }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)", + "expr": "sort((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -754,7 +754,7 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } ] @@ -1277,37 +1277,37 @@ data: }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)", "legendFormat": "lesavka" }, { "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)", "legendFormat": "pegasus" }, { "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)", "legendFormat": "soteria" }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)", "legendFormat": "titan-iac" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)", "legendFormat": "bstein-home" }, { "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)", "legendFormat": "arcanagon" }, { @@ -1343,6 +1343,229 @@ data: } }, "description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published." + }, + { + "id": 20, + "type": "bargauge", + "title": "Ariadne Schedule Last Status", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 35 + }, + "targets": [ + { + "expr": "sort(ariadne_schedule_last_status{task=~\"^schedule\\..+$\"})", + "refId": "A", + "legendFormat": "{{task}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "green", + "value": 1 + } + ] + }, + "decimals": 0, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "error" + }, + "1": { + "text": "ok" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ], + "description": "1 means the last run was ok. 0 means the last run ended in error." + }, + { + "id": 21, + "type": "bargauge", + "title": "Ariadne Schedule Runs (range)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 35 + }, + "targets": [ + { + "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range])))", + "refId": "A", + "legendFormat": "{{task}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Number of runs by schedule task over the selected dashboard time range." + }, + { + "id": 22, + "type": "bargauge", + "title": "Ariadne Schedule Errors (range)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 35 + }, + "targets": [ + { + "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range])))", + "refId": "A", + "legendFormat": "{{task}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Error run count by schedule task over the selected dashboard time range." } ], "time": {