monitoring: make test panel point-based and failure-by-suite

This commit is contained in:
Brad Stein 2026-04-09 19:27:48 -03:00
parent 5cf9a16d97
commit ad1cbd6f85
5 changed files with 139 additions and 140 deletions

View File

@ -441,13 +441,22 @@ TEST_FAILURES_24H_TOTAL = (
'(sum(increase(ananke_quality_gate_runs_total{suite="ananke",status="failed"}[24h])) or on() vector(0)) + ' '(sum(increase(ananke_quality_gate_runs_total{suite="ananke",status="failed"}[24h])) or on() vector(0)) + '
'(sum(increase(platform_quality_gate_runs_total{status!~"ok|passed|success"}[24h])) or on() vector(0))' '(sum(increase(platform_quality_gate_runs_total{status!~"ok|passed|success"}[24h])) or on() vector(0))'
) )
PLATFORM_TEST_FAILURES_24H_BY_SUITE = (
'sort_desc(sum by (suite) ('
'label_replace(increase(ariadne_task_runs_total{status!="ok"}[24h]), "suite", "ariadne", "__name__", ".*") '
'or label_replace(increase(metis_builds_total{status="error"}[24h]), "suite", "metis", "__name__", ".*") '
'or label_replace(increase(metis_flashes_total{status="error"}[24h]), "suite", "metis", "__name__", ".*") '
'or label_replace(increase(ananke_quality_gate_runs_total{suite="ananke",status="failed"}[24h]), "suite", "ananke", "__name__", ".*") '
'or increase(platform_quality_gate_runs_total{status!~"ok|passed|success"}[24h])'
'))'
)
PLATFORM_TEST_ACTIVITY_30D = ( PLATFORM_TEST_ACTIVITY_30D = (
'label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), "source", "ariadne", "__name__", ".*") ' 'label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), "source", "ariadne", "__name__", ".*") '
'or label_replace(sum by (status) (increase(metis_builds_total[30d])), "source", "metis-build", "__name__", ".*") ' 'or label_replace(sum by (status) (increase(metis_builds_total[30d])), "source", "metis-build", "__name__", ".*") '
'or label_replace(sum by (status) (increase(metis_flashes_total[30d])), "source", "metis-flash", "__name__", ".*") ' 'or label_replace(sum by (status) (increase(metis_flashes_total[30d])), "source", "metis-flash", "__name__", ".*") '
'or label_replace(sum by (status) (increase(ananke_quality_gate_runs_total{suite="ananke"}[30d])), "source", "ananke-quality", "__name__", ".*")' 'or label_replace(sum by (status) (increase(ananke_quality_gate_runs_total{suite="ananke"}[30d])), "source", "ananke-quality", "__name__", ".*")'
) )
PLATFORM_TEST_POINT_WINDOW = "$__interval" PLATFORM_TEST_POINT_WINDOW = "1h"
ARIADNE_SUITE_OK_INTERVAL = f'sum(increase(ariadne_task_runs_total{{status="ok"}}[{PLATFORM_TEST_POINT_WINDOW}]))' ARIADNE_SUITE_OK_INTERVAL = f'sum(increase(ariadne_task_runs_total{{status="ok"}}[{PLATFORM_TEST_POINT_WINDOW}]))'
ARIADNE_SUITE_TOTAL_INTERVAL = f'sum(increase(ariadne_task_runs_total[{PLATFORM_TEST_POINT_WINDOW}]))' ARIADNE_SUITE_TOTAL_INTERVAL = f'sum(increase(ariadne_task_runs_total[{PLATFORM_TEST_POINT_WINDOW}]))'
METIS_SUITE_OK_INTERVAL = ( METIS_SUITE_OK_INTERVAL = (
@ -1554,7 +1563,7 @@ def build_overview():
targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS, targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS,
legend_display="table", legend_display="table",
legend_placement="right", legend_placement="right",
legend_calcs=["last"], legend_calcs=["lastNotNull"],
links=link_to("atlas-jobs"), links=link_to("atlas-jobs"),
) )
test_success["fieldConfig"]["defaults"]["min"] = 0 test_success["fieldConfig"]["defaults"]["min"] = 0
@ -1573,19 +1582,25 @@ def build_overview():
"Per-run interval pass points (0-100) for each software suite over the last 30 days. Points are connected to show trend; missing-run intervals are ignored." "Per-run interval pass points (0-100) for each software suite over the last 30 days. Points are connected to show trend; missing-run intervals are ignored."
) )
panels.append(test_success) panels.append(test_success)
test_failures = stat_panel( panels.append(
47, table_panel(
"Platform Test Failures (24h)", 47,
TEST_FAILURES_24H_TOTAL, "Platform Test Failures (24h)",
{"h": 5, "w": 6, "x": 18, "y": 7}, PLATFORM_TEST_FAILURES_24H_BY_SUITE,
unit="none", {"h": 5, "w": 6, "x": 18, "y": 7},
decimals=0, unit="none",
instant=True, instant=True,
thresholds=count_thresholds, transformations=[
links=link_to("atlas-jobs"), {"id": "labelsToFields", "options": {}},
{"id": "organize", "options": {"excludeByName": {"Time": True}}},
{"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}},
],
options={"showHeader": True, "cellHeight": "sm"},
footer={"show": False},
)
) )
test_failures["description"] = "Total failed test events in the last 24h across Ariadne, Metis, Ananke, and any suites publishing platform_quality_gate_runs_total." panels[-1]["links"] = link_to("atlas-jobs")
panels.append(test_failures) panels[-1]["description"] = "Failures by suite in the last 24 hours. This is a per-suite breakdown, not a single opaque total."
panels.append( panels.append(
stat_panel( stat_panel(

View File

@ -1253,52 +1253,52 @@
"targets": [ "targets": [
{ {
"refId": "A", "refId": "A",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[1h]))) / clamp_min((sum(increase(ariadne_task_runs_total[1h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[1h]))) > 0)",
"legendFormat": "ariadne" "legendFormat": "ariadne"
}, },
{ {
"refId": "B", "refId": "B",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[1h])) + sum(increase(metis_flashes_total{status=\"ok\"}[1h])))) / clamp_min(((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))), 1)) and on() (((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))) > 0)",
"legendFormat": "metis" "legendFormat": "metis"
}, },
{ {
"refId": "C", "refId": "C",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[1h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
"legendFormat": "ananke" "legendFormat": "ananke"
}, },
{ {
"refId": "D", "refId": "D",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
"legendFormat": "atlasbot" "legendFormat": "atlasbot"
}, },
{ {
"refId": "E", "refId": "E",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
"legendFormat": "lesavka" "legendFormat": "lesavka"
}, },
{ {
"refId": "F", "refId": "F",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
"legendFormat": "pegasus" "legendFormat": "pegasus"
}, },
{ {
"refId": "G", "refId": "G",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
"legendFormat": "soteria" "legendFormat": "soteria"
}, },
{ {
"refId": "H", "refId": "H",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
"legendFormat": "titan-iac" "legendFormat": "titan-iac"
}, },
{ {
"refId": "I", "refId": "I",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
"legendFormat": "bstein-home" "legendFormat": "bstein-home"
}, },
{ {
"refId": "J", "refId": "J",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
"legendFormat": "arcanagon" "legendFormat": "arcanagon"
} }
], ],

View File

@ -1861,52 +1861,52 @@
"targets": [ "targets": [
{ {
"refId": "A", "refId": "A",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[1h]))) / clamp_min((sum(increase(ariadne_task_runs_total[1h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[1h]))) > 0)",
"legendFormat": "ariadne" "legendFormat": "ariadne"
}, },
{ {
"refId": "B", "refId": "B",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[1h])) + sum(increase(metis_flashes_total{status=\"ok\"}[1h])))) / clamp_min(((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))), 1)) and on() (((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))) > 0)",
"legendFormat": "metis" "legendFormat": "metis"
}, },
{ {
"refId": "C", "refId": "C",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[1h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
"legendFormat": "ananke" "legendFormat": "ananke"
}, },
{ {
"refId": "D", "refId": "D",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
"legendFormat": "atlasbot" "legendFormat": "atlasbot"
}, },
{ {
"refId": "E", "refId": "E",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
"legendFormat": "lesavka" "legendFormat": "lesavka"
}, },
{ {
"refId": "F", "refId": "F",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
"legendFormat": "pegasus" "legendFormat": "pegasus"
}, },
{ {
"refId": "G", "refId": "G",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
"legendFormat": "soteria" "legendFormat": "soteria"
}, },
{ {
"refId": "H", "refId": "H",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
"legendFormat": "titan-iac" "legendFormat": "titan-iac"
}, },
{ {
"refId": "I", "refId": "I",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
"legendFormat": "bstein-home" "legendFormat": "bstein-home"
}, },
{ {
"refId": "J", "refId": "J",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
"legendFormat": "arcanagon" "legendFormat": "arcanagon"
} }
], ],
@ -1932,7 +1932,7 @@
"displayMode": "table", "displayMode": "table",
"placement": "right", "placement": "right",
"calcs": [ "calcs": [
"last" "lastNotNull"
] ]
}, },
"tooltip": { "tooltip": {
@ -1951,7 +1951,7 @@
}, },
{ {
"id": 47, "id": 47,
"type": "stat", "type": "table",
"title": "Platform Test Failures (24h)", "title": "Platform Test Failures (24h)",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@ -1965,59 +1965,51 @@
}, },
"targets": [ "targets": [
{ {
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h])) or on() vector(0)) + (sum(increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])) or on() vector(0))", "expr": "sort_desc(sum by (suite) (label_replace(increase(ariadne_task_runs_total{status!=\"ok\"}[24h]), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(increase(metis_builds_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(metis_flashes_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h]), \"suite\", \"ananke\", \"__name__\", \".*\") or increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])))",
"refId": "A", "refId": "A",
"instant": true "instant": true
} }
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none", "unit": "none",
"custom": { "custom": {
"displayMode": "auto" "filterable": true
}, }
"decimals": 0
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"colorMode": "value", "showHeader": true,
"graphMode": "area", "columnFilters": false,
"justifyMode": "center", "cellHeight": "sm",
"reduceOptions": { "footer": {
"calcs": [ "show": false
"lastNotNull" }
],
"fields": "",
"values": false
},
"textMode": "value"
}, },
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
}
}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
],
"links": [ "links": [
{ {
"title": "Open atlas-jobs dashboard", "title": "Open atlas-jobs dashboard",
@ -2025,7 +2017,7 @@
"targetBlank": true "targetBlank": true
} }
], ],
"description": "Total failed test events in the last 24h across Ariadne, Metis, Ananke, and any suites publishing platform_quality_gate_runs_total." "description": "Failures by suite in the last 24 hours. This is a per-suite breakdown, not a single opaque total."
}, },
{ {
"id": 30, "id": 30,

View File

@ -1262,52 +1262,52 @@ data:
"targets": [ "targets": [
{ {
"refId": "A", "refId": "A",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[1h]))) / clamp_min((sum(increase(ariadne_task_runs_total[1h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[1h]))) > 0)",
"legendFormat": "ariadne" "legendFormat": "ariadne"
}, },
{ {
"refId": "B", "refId": "B",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[1h])) + sum(increase(metis_flashes_total{status=\"ok\"}[1h])))) / clamp_min(((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))), 1)) and on() (((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))) > 0)",
"legendFormat": "metis" "legendFormat": "metis"
}, },
{ {
"refId": "C", "refId": "C",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[1h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
"legendFormat": "ananke" "legendFormat": "ananke"
}, },
{ {
"refId": "D", "refId": "D",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
"legendFormat": "atlasbot" "legendFormat": "atlasbot"
}, },
{ {
"refId": "E", "refId": "E",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
"legendFormat": "lesavka" "legendFormat": "lesavka"
}, },
{ {
"refId": "F", "refId": "F",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
"legendFormat": "pegasus" "legendFormat": "pegasus"
}, },
{ {
"refId": "G", "refId": "G",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
"legendFormat": "soteria" "legendFormat": "soteria"
}, },
{ {
"refId": "H", "refId": "H",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
"legendFormat": "titan-iac" "legendFormat": "titan-iac"
}, },
{ {
"refId": "I", "refId": "I",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
"legendFormat": "bstein-home" "legendFormat": "bstein-home"
}, },
{ {
"refId": "J", "refId": "J",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
"legendFormat": "arcanagon" "legendFormat": "arcanagon"
} }
], ],

View File

@ -1870,52 +1870,52 @@ data:
"targets": [ "targets": [
{ {
"refId": "A", "refId": "A",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[1h]))) / clamp_min((sum(increase(ariadne_task_runs_total[1h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[1h]))) > 0)",
"legendFormat": "ariadne" "legendFormat": "ariadne"
}, },
{ {
"refId": "B", "refId": "B",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[1h])) + sum(increase(metis_flashes_total{status=\"ok\"}[1h])))) / clamp_min(((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))), 1)) and on() (((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))) > 0)",
"legendFormat": "metis" "legendFormat": "metis"
}, },
{ {
"refId": "C", "refId": "C",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[1h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
"legendFormat": "ananke" "legendFormat": "ananke"
}, },
{ {
"refId": "D", "refId": "D",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
"legendFormat": "atlasbot" "legendFormat": "atlasbot"
}, },
{ {
"refId": "E", "refId": "E",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
"legendFormat": "lesavka" "legendFormat": "lesavka"
}, },
{ {
"refId": "F", "refId": "F",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
"legendFormat": "pegasus" "legendFormat": "pegasus"
}, },
{ {
"refId": "G", "refId": "G",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
"legendFormat": "soteria" "legendFormat": "soteria"
}, },
{ {
"refId": "H", "refId": "H",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
"legendFormat": "titan-iac" "legendFormat": "titan-iac"
}, },
{ {
"refId": "I", "refId": "I",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
"legendFormat": "bstein-home" "legendFormat": "bstein-home"
}, },
{ {
"refId": "J", "refId": "J",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)", "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
"legendFormat": "arcanagon" "legendFormat": "arcanagon"
} }
], ],
@ -1941,7 +1941,7 @@ data:
"displayMode": "table", "displayMode": "table",
"placement": "right", "placement": "right",
"calcs": [ "calcs": [
"last" "lastNotNull"
] ]
}, },
"tooltip": { "tooltip": {
@ -1960,7 +1960,7 @@ data:
}, },
{ {
"id": 47, "id": 47,
"type": "stat", "type": "table",
"title": "Platform Test Failures (24h)", "title": "Platform Test Failures (24h)",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@ -1974,59 +1974,51 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h])) or on() vector(0)) + (sum(increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])) or on() vector(0))", "expr": "sort_desc(sum by (suite) (label_replace(increase(ariadne_task_runs_total{status!=\"ok\"}[24h]), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(increase(metis_builds_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(metis_flashes_total{status=\"error\"}[24h]), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h]), \"suite\", \"ananke\", \"__name__\", \".*\") or increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])))",
"refId": "A", "refId": "A",
"instant": true "instant": true
} }
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none", "unit": "none",
"custom": { "custom": {
"displayMode": "auto" "filterable": true
}, }
"decimals": 0
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"colorMode": "value", "showHeader": true,
"graphMode": "area", "columnFilters": false,
"justifyMode": "center", "cellHeight": "sm",
"reduceOptions": { "footer": {
"calcs": [ "show": false
"lastNotNull" }
],
"fields": "",
"values": false
},
"textMode": "value"
}, },
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
}
}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
],
"links": [ "links": [
{ {
"title": "Open atlas-jobs dashboard", "title": "Open atlas-jobs dashboard",
@ -2034,7 +2026,7 @@ data:
"targetBlank": true "targetBlank": true
} }
], ],
"description": "Total failed test events in the last 24h across Ariadne, Metis, Ananke, and any suites publishing platform_quality_gate_runs_total." "description": "Failures by suite in the last 24 hours. This is a per-suite breakdown, not a single opaque total."
}, },
{ {
"id": 30, "id": 30,