monitoring/atlas: resize test/ops rows and source overview tests from atlas-jobs

This commit is contained in:
Brad Stein 2026-04-09 13:39:55 -03:00
parent 764bfe189e
commit 293cd83999
3 changed files with 142 additions and 126 deletions

View File

@ -1237,7 +1237,7 @@ def build_overview():
panel_id,
title,
f"{expr}",
{"h": 3, "w": 6, "x": 6 * idx, "y": 5},
{"h": 2, "w": 6, "x": 6 * idx, "y": 5},
unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
text_mode="name_and_value",
@ -1286,7 +1286,7 @@ def build_overview():
panel_id,
title,
expr,
{"h": 3, "w": 6, "x": 6 * idx, "y": 8},
{"h": 2, "w": 6, "x": 6 * idx, "y": 7},
unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
links=link_to("atlas-storage"),
@ -1308,7 +1308,7 @@ def build_overview():
40,
"UPS Current Load",
None,
{"h": 6, "w": 4, "x": 0, "y": 14},
{"h": 5, "w": 4, "x": 0, "y": 13},
unit="none",
decimals=1,
text_mode="name_and_value",
@ -1355,7 +1355,7 @@ def build_overview():
41,
"UPS History (Power Draw)",
None,
{"h": 6, "w": 4, "x": 4, "y": 14},
{"h": 5, "w": 4, "x": 4, "y": 13},
unit="watt",
targets=[
{"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME},
@ -1372,7 +1372,7 @@ def build_overview():
42,
"Current Climate",
None,
{"h": 6, "w": 4, "x": 8, "y": 14},
{"h": 5, "w": 4, "x": 8, "y": 13},
unit="none",
decimals=2,
text_mode="value",
@ -1395,7 +1395,7 @@ def build_overview():
43,
"Climate History",
None,
{"h": 6, "w": 4, "x": 12, "y": 14},
{"h": 5, "w": 4, "x": 12, "y": 13},
unit="celsius",
targets=[
{"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "Temperature (°C)"},
@ -1422,7 +1422,7 @@ def build_overview():
140,
"Fan Activity",
None,
{"h": 6, "w": 4, "x": 16, "y": 14},
{"h": 5, "w": 4, "x": 16, "y": 13},
unit="none",
decimals=0,
text_mode="name_and_value",
@ -1448,7 +1448,7 @@ def build_overview():
141,
"Fan History (0-10)",
None,
{"h": 6, "w": 4, "x": 20, "y": 14},
{"h": 5, "w": 4, "x": 20, "y": 13},
unit="none",
max_value=10,
targets=[
@ -1468,7 +1468,7 @@ def build_overview():
44,
"One-off Job Pods >1h",
f"({ONEOFF_JOB_POD_AGE_HOURS}) > 1",
{"h": 3, "w": 6, "x": 0, "y": 11},
{"h": 4, "w": 6, "x": 0, "y": 9},
unit="h",
instant=True,
transformations=[
@ -1484,7 +1484,7 @@ def build_overview():
45,
"Ariadne Attempts (24h)",
"sum(increase(ariadne_task_runs_total[24h]))",
{"h": 3, "w": 6, "x": 6, "y": 11},
{"h": 4, "w": 6, "x": 6, "y": 9},
unit="none",
decimals=0,
links=link_to("atlas-jobs"),
@ -1494,29 +1494,31 @@ def build_overview():
46,
"Platform Test Success Rate",
None,
{"h": 3, "w": 6, "x": 12, "y": 11},
{"h": 4, "w": 6, "x": 12, "y": 9},
unit="percent",
targets=[
{
"refId": "A",
"expr": PLATFORM_TEST_SUCCESS_RATE_BY_SUITE_SERIES,
"legendFormat": "{{suite}}",
"datasource": {"type": "datasource", "uid": "-- Dashboard --"},
"dashboardUid": "atlas-jobs",
"panelId": 19,
}
],
legend_display="list",
legend_placement="bottom",
legend_display="table",
legend_placement="right",
legend_calcs=["last"],
links=link_to("atlas-jobs"),
)
test_success["datasource"] = {"type": "datasource", "uid": "-- Dashboard --"}
test_success["description"] = (
"Rolling pass rate per platform suite (Ariadne task automation + Metis build/flash pipelines + Ananke quality gate). "
"Each line tracks pass percentage over time for its suite."
"Sourced directly from Atlas Jobs internal panel #19 (atlas-jobs) so Overview reuses the exact same suite-level test stream."
)
panels.append(test_success)
test_failures = stat_panel(
47,
"Platform Test Failures (24h)",
TEST_FAILURES_24H_TOTAL,
{"h": 3, "w": 6, "x": 18, "y": 11},
{"h": 4, "w": 6, "x": 18, "y": 9},
unit="none",
decimals=0,
instant=True,
@ -1533,7 +1535,7 @@ def build_overview():
30,
"Mail Sent (1d)",
'max(postmark_outbound_sent{window="1d"})',
{"h": 3, "w": 4, "x": 0, "y": 20},
{"h": 2, "w": 4, "x": 0, "y": 18},
unit="none",
links=link_to("atlas-mail"),
)
@ -1544,7 +1546,7 @@ def build_overview():
"type": "stat",
"title": "Mail Bounces (1d)",
"datasource": PROM_DS,
"gridPos": {"h": 3, "w": 4, "x": 8, "y": 20},
"gridPos": {"h": 2, "w": 4, "x": 8, "y": 18},
"targets": [
{
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
@ -1590,7 +1592,7 @@ def build_overview():
32,
"Mail Success Rate (1d)",
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
{"h": 3, "w": 4, "x": 4, "y": 20},
{"h": 2, "w": 4, "x": 4, "y": 18},
unit="percent",
thresholds=mail_success_thresholds,
decimals=1,
@ -1602,7 +1604,7 @@ def build_overview():
33,
"Mail Limit Used (30d)",
"max(postmark_sending_limit_used_percent)",
{"h": 3, "w": 4, "x": 12, "y": 20},
{"h": 2, "w": 4, "x": 12, "y": 18},
unit="percent",
thresholds=mail_limit_thresholds,
decimals=1,
@ -1614,7 +1616,7 @@ def build_overview():
34,
"Postgres Connections Used",
POSTGRES_CONN_USED,
{"h": 3, "w": 4, "x": 16, "y": 20},
{"h": 2, "w": 4, "x": 16, "y": 18},
decimals=0,
text_mode="name_and_value",
legend="{{conn}}",
@ -1626,7 +1628,7 @@ def build_overview():
35,
"Postgres Hottest Connections",
POSTGRES_CONN_HOTTEST,
{"h": 3, "w": 4, "x": 20, "y": 20},
{"h": 2, "w": 4, "x": 20, "y": 18},
unit="none",
decimals=0,
text_mode="name_and_value",

View File

@ -503,7 +503,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 0,
"y": 5
@ -580,7 +580,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 6,
"y": 5
@ -657,7 +657,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 12,
"y": 5
@ -726,7 +726,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 18,
"y": 5
@ -795,10 +795,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 0,
"y": 8
"y": 7
},
"targets": [
{
@ -870,10 +870,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 6,
"y": 8
"y": 7
},
"targets": [
{
@ -945,10 +945,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 12,
"y": 8
"y": 7
},
"targets": [
{
@ -1012,10 +1012,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 18,
"y": 8
"y": 7
},
"targets": [
{
@ -1079,10 +1079,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 0,
"y": 14
"y": 13
},
"targets": [
{
@ -1301,10 +1301,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 4,
"y": 14
"y": 13
},
"targets": [
{
@ -1355,10 +1355,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 8,
"y": 14
"y": 13
},
"targets": [
{
@ -1459,10 +1459,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 12,
"y": 14
"y": 13
},
"targets": [
{
@ -1533,10 +1533,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 16,
"y": 14
"y": 13
},
"targets": [
{
@ -1625,10 +1625,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 20,
"y": 14
"y": 13
},
"targets": [
{
@ -1685,10 +1685,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 0,
"y": 11
"y": 9
},
"targets": [
{
@ -1744,10 +1744,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 6,
"y": 11
"y": 9
},
"targets": [
{
@ -1808,20 +1808,24 @@
"type": "timeseries",
"title": "Platform Test Success Rate",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
"type": "datasource",
"uid": "-- Dashboard --"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 12,
"y": 11
"y": 9
},
"targets": [
{
"refId": "A",
"expr": "label_replace(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-build\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-flash\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval])) or on() vector(0)), 1), \"suite\", \"ananke-quality\", \"__name__\", \".*\")",
"legendFormat": "{{suite}}"
"datasource": {
"type": "datasource",
"uid": "-- Dashboard --"
},
"dashboardUid": "atlas-jobs",
"panelId": 19
}
],
"fieldConfig": {
@ -1832,8 +1836,11 @@
},
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
"displayMode": "table",
"placement": "right",
"calcs": [
"last"
]
},
"tooltip": {
"mode": "multi"
@ -1846,7 +1853,7 @@
"targetBlank": true
}
],
"description": "Rolling pass rate per platform suite (Ariadne task automation + Metis build/flash pipelines + Ananke quality gate). Each line tracks pass percentage over time for its suite."
"description": "Sourced directly from Atlas Jobs internal panel #19 (atlas-jobs) so Overview reuses the exact same suite-level test stream."
},
{
"id": 47,
@ -1857,10 +1864,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 18,
"y": 11
"y": 9
},
"targets": [
{
@ -1935,10 +1942,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 0,
"y": 20
"y": 18
},
"targets": [
{
@ -2002,10 +2009,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 8,
"y": 20
"y": 18
},
"targets": [
{
@ -2107,10 +2114,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 4,
"y": 20
"y": 18
},
"targets": [
{
@ -2183,10 +2190,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 12,
"y": 20
"y": 18
},
"targets": [
{
@ -2259,10 +2266,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 16,
"y": 20
"y": 18
},
"targets": [
{
@ -2322,10 +2329,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 20,
"y": 20
"y": 18
},
"targets": [
{

View File

@ -512,7 +512,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 0,
"y": 5
@ -589,7 +589,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 6,
"y": 5
@ -666,7 +666,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 12,
"y": 5
@ -735,7 +735,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 18,
"y": 5
@ -804,10 +804,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 0,
"y": 8
"y": 7
},
"targets": [
{
@ -879,10 +879,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 6,
"y": 8
"y": 7
},
"targets": [
{
@ -954,10 +954,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 12,
"y": 8
"y": 7
},
"targets": [
{
@ -1021,10 +1021,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 6,
"x": 18,
"y": 8
"y": 7
},
"targets": [
{
@ -1088,10 +1088,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 0,
"y": 14
"y": 13
},
"targets": [
{
@ -1310,10 +1310,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 4,
"y": 14
"y": 13
},
"targets": [
{
@ -1364,10 +1364,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 8,
"y": 14
"y": 13
},
"targets": [
{
@ -1468,10 +1468,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 12,
"y": 14
"y": 13
},
"targets": [
{
@ -1542,10 +1542,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 16,
"y": 14
"y": 13
},
"targets": [
{
@ -1634,10 +1634,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"h": 5,
"w": 4,
"x": 20,
"y": 14
"y": 13
},
"targets": [
{
@ -1694,10 +1694,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 0,
"y": 11
"y": 9
},
"targets": [
{
@ -1753,10 +1753,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 6,
"y": 11
"y": 9
},
"targets": [
{
@ -1817,20 +1817,24 @@ data:
"type": "timeseries",
"title": "Platform Test Success Rate",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
"type": "datasource",
"uid": "-- Dashboard --"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 12,
"y": 11
"y": 9
},
"targets": [
{
"refId": "A",
"expr": "label_replace(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-build\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-flash\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval])) or on() vector(0)), 1), \"suite\", \"ananke-quality\", \"__name__\", \".*\")",
"legendFormat": "{{suite}}"
"datasource": {
"type": "datasource",
"uid": "-- Dashboard --"
},
"dashboardUid": "atlas-jobs",
"panelId": 19
}
],
"fieldConfig": {
@ -1841,8 +1845,11 @@ data:
},
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
"displayMode": "table",
"placement": "right",
"calcs": [
"last"
]
},
"tooltip": {
"mode": "multi"
@ -1855,7 +1862,7 @@ data:
"targetBlank": true
}
],
"description": "Rolling pass rate per platform suite (Ariadne task automation + Metis build/flash pipelines + Ananke quality gate). Each line tracks pass percentage over time for its suite."
"description": "Sourced directly from Atlas Jobs internal panel #19 (atlas-jobs) so Overview reuses the exact same suite-level test stream."
},
{
"id": 47,
@ -1866,10 +1873,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 4,
"w": 6,
"x": 18,
"y": 11
"y": 9
},
"targets": [
{
@ -1944,10 +1951,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 0,
"y": 20
"y": 18
},
"targets": [
{
@ -2011,10 +2018,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 8,
"y": 20
"y": 18
},
"targets": [
{
@ -2116,10 +2123,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 4,
"y": 20
"y": 18
},
"targets": [
{
@ -2192,10 +2199,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 12,
"y": 20
"y": 18
},
"targets": [
{
@ -2268,10 +2275,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 16,
"y": 20
"y": 18
},
"targets": [
{
@ -2331,10 +2338,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 3,
"h": 2,
"w": 4,
"x": 20,
"y": 20
"y": 18
},
"targets": [
{