diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 4438fb4a..67765a0f 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -515,6 +515,31 @@ PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [ {"refId": "B", "expr": PLATFORM_TEST_SUCCESS_RATE_METIS_SERIES, "legendFormat": "metis"}, {"refId": "C", "expr": PLATFORM_TEST_SUCCESS_RATE_ANANKE_SERIES, "legendFormat": "ananke"}, ] + PLATFORM_TEST_GENERIC_SUITE_TARGETS + +PLATFORM_TEST_SUCCESS_RATE_24H_NATIVE_BY_SUITE = ( + 'label_replace(' + '(100 * (sum(increase(ariadne_task_runs_total{status="ok"}[24h]))) / clamp_min((sum(increase(ariadne_task_runs_total[24h]))), 1)) ' + 'and on() ((sum(increase(ariadne_task_runs_total[24h]))) > 0), ' + '"suite", "ariadne", "__name__", ".*") ' + 'or label_replace(' + '(100 * ((sum(increase(metis_builds_total{status="ok"}[24h])) + sum(increase(metis_flashes_total{status="ok"}[24h])))) ' + '/ clamp_min(((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))), 1)) ' + 'and on() (((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))) > 0), ' + '"suite", "metis", "__name__", ".*") ' + 'or label_replace(' + '(100 * (sum(increase(ananke_quality_gate_runs_total{suite="ananke",status="ok"}[24h]))) ' + '/ clamp_min((sum(increase(ananke_quality_gate_runs_total{suite="ananke"}[24h]))), 1)) ' + 'and on() ((sum(increase(ananke_quality_gate_runs_total{suite="ananke"}[24h]))) > 0), ' + '"suite", "ananke", "__name__", ".*")' +) +PLATFORM_TEST_SUCCESS_RATE_24H_GENERIC_BY_SUITE = ( + '(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{status=~"ok|passed|success"}[24h]))) ' + '/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))), 1)) ' + 'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))) > 0)' +) +PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = ( + f'sort_desc(({PLATFORM_TEST_SUCCESS_RATE_24H_NATIVE_BY_SUITE}) or ({PLATFORM_TEST_SUCCESS_RATE_24H_GENERIC_BY_SUITE}))' +) ANANKE_SELECTOR = 'job="ananke-power"' ANANKE_UPS_DB_NAME = "Pyrphoros" ANANKE_UPS_DB_NODE = "titan-db" @@ -1583,24 +1608,29 @@ def build_overview(): ) panels.append(test_success) panels.append( - table_panel( + bargauge_panel( 47, - "Platform Test Failures (24h)", - PLATFORM_TEST_FAILURES_24H_BY_SUITE, + "Platform Suite Pass Rate (24h)", + PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE, {"h": 5, "w": 6, "x": 18, "y": 7}, - unit="none", + unit="percent", instant=True, - transformations=[ - {"id": "labelsToFields", "options": {}}, - {"id": "organize", "options": {"excludeByName": {"Time": True}}}, - {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, - ], - options={"showHeader": True, "cellHeight": "sm"}, - footer={"show": False}, + legend="{{suite}}", + sort_order="desc", + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "yellow", "value": 80}, + {"color": "green", "value": 95}, + ], + }, ) ) panels[-1]["links"] = link_to("atlas-jobs") - panels[-1]["description"] = "Failures by suite in the last 24 hours. This is a per-suite breakdown, not a single opaque total." + panels[-1]["description"] = ( + "24-hour per-suite pass-rate snapshot. This complements the 7-day trend by showing each suite's current quality posture." + ) panels.append( stat_panel( diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index a9a925d8..7bdb67a7 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1951,8 +1951,8 @@ }, { "id": 47, - "type": "table", - "title": "Platform Test Failures (24h)", + "type": "bargauge", + "title": "Platform Suite Pass Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1965,41 +1965,49 @@ }, "targets": [ { - "expr": "sort_desc(sum by (suite) (label_replace(increase(ariadne_task_runs_total{status!=\"ok\"}[24h]), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(increase(metis_builds_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(metis_flashes_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h]), \"suite\", \"ananke\", \"__name__\", \".*\") or increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])))", + "expr": "sort_desc((label_replace((100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[24h]))) / clamp_min((sum(increase(ariadne_task_runs_total[24h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[24h]))) > 0), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace((100 * ((sum(increase(metis_builds_total{status=\"ok\"}[24h])) + sum(increase(metis_flashes_total{status=\"ok\"}[24h])))) / clamp_min(((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))), 1)) and on() (((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))) > 0), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace((100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[24h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))) > 0), \"suite\", \"ananke\", \"__name__\", \".*\")) or ((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))) > 0)))", "refId": "A", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "none", - "custom": { - "filterable": true + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "green", + "value": 95 + } + ] } }, "overrides": [] }, "options": { - "showHeader": true, - "columnFilters": false, - "cellHeight": "sm", - "footer": { - "show": false + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false } }, "transformations": [ - { - "id": "labelsToFields", - "options": {} - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - } - } - }, { "id": "sortBy", "options": { @@ -2017,7 +2025,7 @@ "targetBlank": true } ], - "description": "Failures by suite in the last 24 hours. This is a per-suite breakdown, not a single opaque total." + "description": "24-hour per-suite pass-rate snapshot. This complements the 7-day trend by showing each suite's current quality posture." }, { "id": 30, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 673347d5..92fda082 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1960,8 +1960,8 @@ data: }, { "id": 47, - "type": "table", - "title": "Platform Test Failures (24h)", + "type": "bargauge", + "title": "Platform Suite Pass Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1974,41 +1974,49 @@ data: }, "targets": [ { - "expr": "sort_desc(sum by (suite) (label_replace(increase(ariadne_task_runs_total{status!=\"ok\"}[24h]), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(increase(metis_builds_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(metis_flashes_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h]), \"suite\", \"ananke\", \"__name__\", \".*\") or increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])))", + "expr": "sort_desc((label_replace((100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[24h]))) / clamp_min((sum(increase(ariadne_task_runs_total[24h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[24h]))) > 0), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace((100 * ((sum(increase(metis_builds_total{status=\"ok\"}[24h])) + sum(increase(metis_flashes_total{status=\"ok\"}[24h])))) / clamp_min(((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))), 1)) and on() (((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))) > 0), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace((100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[24h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))) > 0), \"suite\", \"ananke\", \"__name__\", \".*\")) or ((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))) > 0)))", "refId": "A", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "none", - "custom": { - "filterable": true + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "green", + "value": 95 + } + ] } }, "overrides": [] }, "options": { - "showHeader": true, - "columnFilters": false, - "cellHeight": "sm", - "footer": { - "show": false + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false } }, "transformations": [ - { - "id": "labelsToFields", - "options": {} - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - } - } - }, { "id": "sortBy", "options": { @@ -2026,7 +2034,7 @@ data: "targetBlank": true } ], - "description": "Failures by suite in the last 24 hours. This is a per-suite breakdown, not a single opaque total." + "description": "24-hour per-suite pass-rate snapshot. This complements the 7-day trend by showing each suite's current quality posture." }, { "id": 30,