monitoring: align overview panels with jobs and point-based suite rates

This commit is contained in:
Brad Stein 2026-04-09 16:35:14 -03:00
parent f8c1243dfd
commit 5cf9a16d97
7 changed files with 393 additions and 228 deletions

View File

@ -438,7 +438,8 @@ TEST_FAILURES_24H_TOTAL = (
'(sum(increase(ariadne_task_runs_total{status!="ok"}[24h])) or on() vector(0)) + '
'(sum(increase(metis_builds_total{status="error"}[24h])) or on() vector(0)) + '
'(sum(increase(metis_flashes_total{status="error"}[24h])) or on() vector(0)) + '
'(sum(increase(ananke_quality_gate_runs_total{suite="ananke",status="failed"}[24h])) or on() vector(0))'
'(sum(increase(ananke_quality_gate_runs_total{suite="ananke",status="failed"}[24h])) or on() vector(0)) + '
'(sum(increase(platform_quality_gate_runs_total{status!~"ok|passed|success"}[24h])) or on() vector(0))'
)
PLATFORM_TEST_ACTIVITY_30D = (
'label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), "source", "ariadne", "__name__", ".*") '
@ -446,32 +447,35 @@ PLATFORM_TEST_ACTIVITY_30D = (
'or label_replace(sum by (status) (increase(metis_flashes_total[30d])), "source", "metis-flash", "__name__", ".*") '
'or label_replace(sum by (status) (increase(ananke_quality_gate_runs_total{suite="ananke"}[30d])), "source", "ananke-quality", "__name__", ".*")'
)
PLATFORM_TEST_ROLLING_WINDOW = "30d"
ARIADNE_SUITE_OK_INTERVAL = f'sum(increase(ariadne_task_runs_total{{status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))'
ARIADNE_SUITE_TOTAL_INTERVAL = f'sum(increase(ariadne_task_runs_total[{PLATFORM_TEST_ROLLING_WINDOW}]))'
PLATFORM_TEST_POINT_WINDOW = "$__interval"
ARIADNE_SUITE_OK_INTERVAL = f'sum(increase(ariadne_task_runs_total{{status="ok"}}[{PLATFORM_TEST_POINT_WINDOW}]))'
ARIADNE_SUITE_TOTAL_INTERVAL = f'sum(increase(ariadne_task_runs_total[{PLATFORM_TEST_POINT_WINDOW}]))'
METIS_SUITE_OK_INTERVAL = (
f'(sum(increase(metis_builds_total{{status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}])) + '
f'sum(increase(metis_flashes_total{{status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}])))'
f'(sum(increase(metis_builds_total{{status="ok"}}[{PLATFORM_TEST_POINT_WINDOW}])) + '
f'sum(increase(metis_flashes_total{{status="ok"}}[{PLATFORM_TEST_POINT_WINDOW}])))'
)
METIS_SUITE_TOTAL_INTERVAL = (
f'(sum(increase(metis_builds_total[{PLATFORM_TEST_ROLLING_WINDOW}])) + '
f'sum(increase(metis_flashes_total[{PLATFORM_TEST_ROLLING_WINDOW}])))'
f'(sum(increase(metis_builds_total[{PLATFORM_TEST_POINT_WINDOW}])) + '
f'sum(increase(metis_flashes_total[{PLATFORM_TEST_POINT_WINDOW}])))'
)
ANANKE_SUITE_OK_INTERVAL = (
f'sum(increase(ananke_quality_gate_runs_total{{suite="ananke",status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))'
f'sum(increase(ananke_quality_gate_runs_total{{suite="ananke",status="ok"}}[{PLATFORM_TEST_POINT_WINDOW}]))'
)
ANANKE_SUITE_TOTAL_INTERVAL = (
f'sum(increase(ananke_quality_gate_runs_total{{suite="ananke"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))'
f'sum(increase(ananke_quality_gate_runs_total{{suite="ananke"}}[{PLATFORM_TEST_POINT_WINDOW}]))'
)
PLATFORM_TEST_SUCCESS_RATE_ARIADNE_SERIES = (
f'100 * ({ARIADNE_SUITE_OK_INTERVAL}) / clamp_min(({ARIADNE_SUITE_TOTAL_INTERVAL}), 1)'
f'(100 * ({ARIADNE_SUITE_OK_INTERVAL}) / clamp_min(({ARIADNE_SUITE_TOTAL_INTERVAL}), 1)) '
f'and on() (({ARIADNE_SUITE_TOTAL_INTERVAL}) > 0)'
)
PLATFORM_TEST_SUCCESS_RATE_METIS_SERIES = (
f'100 * ({METIS_SUITE_OK_INTERVAL}) / clamp_min(({METIS_SUITE_TOTAL_INTERVAL}), 1)'
f'(100 * ({METIS_SUITE_OK_INTERVAL}) / clamp_min(({METIS_SUITE_TOTAL_INTERVAL}), 1)) '
f'and on() (({METIS_SUITE_TOTAL_INTERVAL}) > 0)'
)
PLATFORM_TEST_SUCCESS_RATE_ANANKE_SERIES = (
f'100 * ({ANANKE_SUITE_OK_INTERVAL}) / clamp_min(({ANANKE_SUITE_TOTAL_INTERVAL}), 1)'
f'(100 * ({ANANKE_SUITE_OK_INTERVAL}) / clamp_min(({ANANKE_SUITE_TOTAL_INTERVAL}), 1)) '
f'and on() (({ANANKE_SUITE_TOTAL_INTERVAL}) > 0)'
)
PLATFORM_TEST_GENERIC_SUITE_NAMES = [
@ -487,9 +491,10 @@ PLATFORM_TEST_GENERIC_SUITE_TARGETS = [
{
"refId": chr(ord("D") + index),
"expr": (
f'100 * (sum(increase(platform_quality_gate_runs_total{{suite="{suite}",status=~"ok|passed|success"}}'
f'[{PLATFORM_TEST_ROLLING_WINDOW}]))) / '
f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))), 1)'
f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite="{suite}",status=~"ok|passed|success"}}'
f'[{PLATFORM_TEST_POINT_WINDOW}]))) / '
f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1)) '
f'and on() ((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))) > 0)'
),
"legendFormat": suite,
}
@ -1341,10 +1346,10 @@ def build_overview():
text_mode="name_and_value",
targets=[
{"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True},
{"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge ETA", "instant": True},
{"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge", "instant": True},
{"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True},
{"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True},
{"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA", "instant": True},
{"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge", "instant": True},
{"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True},
],
field_overrides=[
@ -1357,11 +1362,11 @@ def build_overview():
"properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge ETA"},
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge"},
"properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA"},
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge"},
"properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{
@ -1374,7 +1379,7 @@ def build_overview():
},
],
links=link_to("atlas-power"),
description="Per-UPS live snapshot: current draw, discharge ETA, and charging/discharging status.",
description="Per-UPS live snapshot: current draw, discharge, and charging/discharging status.",
)
)
panels.append(
@ -1491,31 +1496,54 @@ def build_overview():
)
panels.append(
table_panel(
bargauge_panel(
44,
"One-off Job Pods >1h",
f"({ONEOFF_JOB_POD_AGE_HOURS}) > 1",
"One-off Job Pods (age hours)",
ONEOFF_JOB_POD_AGE_HOURS,
{"h": 5, "w": 6, "x": 0, "y": 7},
unit="h",
instant=True,
transformations=[
{"id": "labelsToFields", "options": {}},
{"id": "organize", "options": {"excludeByName": {"Time": True}}},
{"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}},
],
options={"showHeader": True, "cellHeight": "sm"},
legend="{{namespace}}/{{pod}}",
thresholds=age_thresholds,
limit=12,
decimals=2,
links=link_to("atlas-jobs"),
)
)
panels.append(
stat_panel(
45,
"Ariadne Attempts (24h)",
"sum(increase(ariadne_task_runs_total[24h]))",
{"h": 5, "w": 6, "x": 6, "y": 7},
unit="none",
decimals=0,
links=link_to("atlas-jobs"),
)
{
"id": 45,
"type": "timeseries",
"title": "Ariadne Attempts / Failures",
"datasource": PROM_DS,
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 7},
"targets": [
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
],
"fieldConfig": {
"defaults": {"unit": "none"},
"overrides": [
{
"matcher": {"id": "byName", "options": "Attempts"},
"properties": [
{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}
],
},
{
"matcher": {"id": "byName", "options": "Failures"},
"properties": [
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
],
},
],
},
"options": {
"legend": {"displayMode": "table", "placement": "right"},
"tooltip": {"mode": "multi"},
},
"links": link_to("atlas-jobs"),
}
)
test_success = timeseries_panel(
46,
@ -1531,8 +1559,18 @@ def build_overview():
)
test_success["fieldConfig"]["defaults"]["min"] = 0
test_success["fieldConfig"]["defaults"]["max"] = 100
test_success["fieldConfig"]["defaults"]["custom"] = {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": True,
}
test_success["timeFrom"] = "30d"
test_success["description"] = (
"Application-level rolling pass rate (0-100) over the last 30 days. Includes Ariadne/Metis/Ananke and auto-picks additional suite lines when platform_quality_gate_runs_total is emitted."
"Per-run interval pass points (0-100) for each software suite over the last 30 days. Points are connected to show trend; missing-run intervals are ignored."
)
panels.append(test_success)
test_failures = stat_panel(
@ -1546,9 +1584,7 @@ def build_overview():
thresholds=count_thresholds,
links=link_to("atlas-jobs"),
)
test_failures["description"] = (
"This summary is sourced from the Atlas Jobs internal dashboard rather than a separate overview-only query."
)
test_failures["description"] = "Total failed test events in the last 24h across Ariadne, Metis, Ananke, and any suites publishing platform_quality_gate_runs_total."
panels.append(test_failures)
panels.append(
@ -3010,8 +3046,17 @@ def build_jobs_dashboard():
)
suite_panel["fieldConfig"]["defaults"]["min"] = 0
suite_panel["fieldConfig"]["defaults"]["max"] = 100
suite_panel["fieldConfig"]["defaults"]["custom"] = {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": True,
}
suite_panel["description"] = (
"Application-level rolling pass percentage over the last 30 days. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
"Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
)
panels.append(suite_panel)
@ -3052,10 +3097,10 @@ def build_power_dashboard():
text_mode="name_and_value",
targets=[
{"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True},
{"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge ETA", "instant": True},
{"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge", "instant": True},
{"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True},
{"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True},
{"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA", "instant": True},
{"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge", "instant": True},
{"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True},
],
field_overrides=[
@ -3068,11 +3113,11 @@ def build_power_dashboard():
"properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge ETA"},
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge"},
"properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA"},
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge"},
"properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{

View File

@ -1253,52 +1253,52 @@
"targets": [
{
"refId": "A",
"expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)",
"legendFormat": "ariadne"
},
{
"refId": "B",
"expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)",
"legendFormat": "metis"
},
{
"refId": "C",
"expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)",
"legendFormat": "ananke"
},
{
"refId": "D",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)",
"legendFormat": "atlasbot"
},
{
"refId": "E",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)",
"legendFormat": "lesavka"
},
{
"refId": "F",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)",
"legendFormat": "pegasus"
},
{
"refId": "G",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)",
"legendFormat": "soteria"
},
{
"refId": "H",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)",
"legendFormat": "titan-iac"
},
{
"refId": "I",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)",
"legendFormat": "bstein-home"
},
{
"refId": "J",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)",
"legendFormat": "arcanagon"
}
],
@ -1306,7 +1306,16 @@
"defaults": {
"unit": "percent",
"min": 0,
"max": 100
"max": 100,
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": true
}
},
"overrides": []
},
@ -1319,7 +1328,7 @@
"mode": "multi"
}
},
"description": "Application-level rolling pass percentage over the last 30 days. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
"description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
}
],
"time": {

View File

@ -1094,7 +1094,7 @@
{
"refId": "B",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)",
"legendFormat": "Pyrphoros Discharge ETA",
"legendFormat": "Pyrphoros Discharge",
"instant": true
},
{
@ -1112,7 +1112,7 @@
{
"refId": "E",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)",
"legendFormat": "Statera Discharge ETA",
"legendFormat": "Statera Discharge",
"instant": true
},
{
@ -1183,7 +1183,7 @@
{
"matcher": {
"id": "byName",
"options": "Pyrphoros Discharge ETA"
"options": "Pyrphoros Discharge"
},
"properties": [
{
@ -1199,7 +1199,7 @@
{
"matcher": {
"id": "byName",
"options": "Statera Discharge ETA"
"options": "Statera Discharge"
},
"properties": [
{
@ -1290,7 +1290,7 @@
"targetBlank": true
}
],
"description": "Per-UPS live snapshot: current draw, discharge ETA, and charging/discharging status."
"description": "Per-UPS live snapshot: current draw, discharge, and charging/discharging status."
},
{
"id": 41,
@ -1678,8 +1678,8 @@
},
{
"id": 44,
"type": "table",
"title": "One-off Job Pods >1h",
"type": "bargauge",
"title": "One-off Job Pods (age hours)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1692,38 +1692,61 @@
},
"targets": [
{
"expr": "(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})) > 1",
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
"refId": "A",
"legendFormat": "{{namespace}}/{{pod}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
"min": 0,
"max": null,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 6
},
{
"color": "orange",
"value": 24
},
{
"color": "red",
"value": 48
}
]
},
"decimals": 2
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false,
"cellHeight": "sm"
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
},
"links": [
{
"title": "Open atlas-jobs dashboard",
"url": "/d/atlas-jobs",
"targetBlank": true
}
],
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
}
}
},
{
"id": "sortBy",
"options": {
@ -1732,13 +1755,19 @@
],
"order": "desc"
}
},
{
"id": "limit",
"options": {
"limit": 12
}
}
]
},
{
"id": 45,
"type": "stat",
"title": "Ariadne Attempts (24h)",
"type": "timeseries",
"title": "Ariadne Attempts / Failures",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1751,49 +1780,61 @@
},
"targets": [
{
"expr": "sum(increase(ariadne_task_runs_total[24h]))",
"refId": "A"
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
"refId": "A",
"legendFormat": "Attempts"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
"refId": "B",
"legendFormat": "Failures"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Attempts"
},
"properties": [
{
"color": "rgba(115, 115, 115, 1)",
"value": null
},
{
"color": "green",
"value": 1
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "green"
}
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
},
"decimals": 0
},
"overrides": []
{
"matcher": {
"id": "byName",
"options": "Failures"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "red"
}
}
]
}
]
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
"legend": {
"displayMode": "table",
"placement": "right"
},
"textMode": "value"
"tooltip": {
"mode": "multi"
}
},
"links": [
{
@ -1820,52 +1861,52 @@
"targets": [
{
"refId": "A",
"expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)",
"legendFormat": "ariadne"
},
{
"refId": "B",
"expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)",
"legendFormat": "metis"
},
{
"refId": "C",
"expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)",
"legendFormat": "ananke"
},
{
"refId": "D",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)",
"legendFormat": "atlasbot"
},
{
"refId": "E",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)",
"legendFormat": "lesavka"
},
{
"refId": "F",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)",
"legendFormat": "pegasus"
},
{
"refId": "G",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)",
"legendFormat": "soteria"
},
{
"refId": "H",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)",
"legendFormat": "titan-iac"
},
{
"refId": "I",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)",
"legendFormat": "bstein-home"
},
{
"refId": "J",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)",
"legendFormat": "arcanagon"
}
],
@ -1873,7 +1914,16 @@
"defaults": {
"unit": "percent",
"min": 0,
"max": 100
"max": 100,
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": true
}
},
"overrides": []
},
@ -1896,7 +1946,8 @@
"targetBlank": true
}
],
"description": "Application-level rolling pass rate (0-100) over the last 30 days. Includes Ariadne/Metis/Ananke and auto-picks additional suite lines when platform_quality_gate_runs_total is emitted."
"timeFrom": "30d",
"description": "Per-run interval pass points (0-100) for each software suite over the last 30 days. Points are connected to show trend; missing-run intervals are ignored."
},
{
"id": 47,
@ -1914,7 +1965,7 @@
},
"targets": [
{
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h])) or on() vector(0))",
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h])) or on() vector(0)) + (sum(increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])) or on() vector(0))",
"refId": "A",
"instant": true
}
@ -1974,7 +2025,7 @@
"targetBlank": true
}
],
"description": "This summary is sourced from the Atlas Jobs internal dashboard rather than a separate overview-only query."
"description": "Total failed test events in the last 24h across Ariadne, Metis, Ananke, and any suites publishing platform_quality_gate_runs_total."
},
{
"id": 30,

View File

@ -28,7 +28,7 @@
{
"refId": "B",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)",
"legendFormat": "Pyrphoros Discharge ETA",
"legendFormat": "Pyrphoros Discharge",
"instant": true
},
{
@ -46,7 +46,7 @@
{
"refId": "E",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)",
"legendFormat": "Statera Discharge ETA",
"legendFormat": "Statera Discharge",
"instant": true
},
{
@ -117,7 +117,7 @@
{
"matcher": {
"id": "byName",
"options": "Pyrphoros Discharge ETA"
"options": "Pyrphoros Discharge"
},
"properties": [
{
@ -133,7 +133,7 @@
{
"matcher": {
"id": "byName",
"options": "Statera Discharge ETA"
"options": "Statera Discharge"
},
"properties": [
{

View File

@ -1262,52 +1262,52 @@ data:
"targets": [
{
"refId": "A",
"expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)",
"legendFormat": "ariadne"
},
{
"refId": "B",
"expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)",
"legendFormat": "metis"
},
{
"refId": "C",
"expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)",
"legendFormat": "ananke"
},
{
"refId": "D",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)",
"legendFormat": "atlasbot"
},
{
"refId": "E",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)",
"legendFormat": "lesavka"
},
{
"refId": "F",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)",
"legendFormat": "pegasus"
},
{
"refId": "G",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)",
"legendFormat": "soteria"
},
{
"refId": "H",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)",
"legendFormat": "titan-iac"
},
{
"refId": "I",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)",
"legendFormat": "bstein-home"
},
{
"refId": "J",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)",
"legendFormat": "arcanagon"
}
],
@ -1315,7 +1315,16 @@ data:
"defaults": {
"unit": "percent",
"min": 0,
"max": 100
"max": 100,
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": true
}
},
"overrides": []
},
@ -1328,7 +1337,7 @@ data:
"mode": "multi"
}
},
"description": "Application-level rolling pass percentage over the last 30 days. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
"description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
}
],
"time": {

View File

@ -1103,7 +1103,7 @@ data:
{
"refId": "B",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)",
"legendFormat": "Pyrphoros Discharge ETA",
"legendFormat": "Pyrphoros Discharge",
"instant": true
},
{
@ -1121,7 +1121,7 @@ data:
{
"refId": "E",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)",
"legendFormat": "Statera Discharge ETA",
"legendFormat": "Statera Discharge",
"instant": true
},
{
@ -1192,7 +1192,7 @@ data:
{
"matcher": {
"id": "byName",
"options": "Pyrphoros Discharge ETA"
"options": "Pyrphoros Discharge"
},
"properties": [
{
@ -1208,7 +1208,7 @@ data:
{
"matcher": {
"id": "byName",
"options": "Statera Discharge ETA"
"options": "Statera Discharge"
},
"properties": [
{
@ -1299,7 +1299,7 @@ data:
"targetBlank": true
}
],
"description": "Per-UPS live snapshot: current draw, discharge ETA, and charging/discharging status."
"description": "Per-UPS live snapshot: current draw, discharge, and charging/discharging status."
},
{
"id": 41,
@ -1687,8 +1687,8 @@ data:
},
{
"id": 44,
"type": "table",
"title": "One-off Job Pods >1h",
"type": "bargauge",
"title": "One-off Job Pods (age hours)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1701,38 +1701,61 @@ data:
},
"targets": [
{
"expr": "(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})) > 1",
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
"refId": "A",
"legendFormat": "{{namespace}}/{{pod}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
"min": 0,
"max": null,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 6
},
{
"color": "orange",
"value": 24
},
{
"color": "red",
"value": 48
}
]
},
"decimals": 2
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false,
"cellHeight": "sm"
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
},
"links": [
{
"title": "Open atlas-jobs dashboard",
"url": "/d/atlas-jobs",
"targetBlank": true
}
],
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
}
}
},
{
"id": "sortBy",
"options": {
@ -1741,13 +1764,19 @@ data:
],
"order": "desc"
}
},
{
"id": "limit",
"options": {
"limit": 12
}
}
]
},
{
"id": 45,
"type": "stat",
"title": "Ariadne Attempts (24h)",
"type": "timeseries",
"title": "Ariadne Attempts / Failures",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1760,49 +1789,61 @@ data:
},
"targets": [
{
"expr": "sum(increase(ariadne_task_runs_total[24h]))",
"refId": "A"
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
"refId": "A",
"legendFormat": "Attempts"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
"refId": "B",
"legendFormat": "Failures"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Attempts"
},
"properties": [
{
"color": "rgba(115, 115, 115, 1)",
"value": null
},
{
"color": "green",
"value": 1
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "green"
}
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
},
"decimals": 0
},
"overrides": []
{
"matcher": {
"id": "byName",
"options": "Failures"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "red"
}
}
]
}
]
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
"legend": {
"displayMode": "table",
"placement": "right"
},
"textMode": "value"
"tooltip": {
"mode": "multi"
}
},
"links": [
{
@ -1829,52 +1870,52 @@ data:
"targets": [
{
"refId": "A",
"expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)",
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)",
"legendFormat": "ariadne"
},
{
"refId": "B",
"expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)",
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / clamp_min(((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))), 1)) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)",
"legendFormat": "metis"
},
{
"refId": "C",
"expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)",
"legendFormat": "ananke"
},
{
"refId": "D",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[$__interval]))) > 0)",
"legendFormat": "atlasbot"
},
{
"refId": "E",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[$__interval]))) > 0)",
"legendFormat": "lesavka"
},
{
"refId": "F",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[$__interval]))) > 0)",
"legendFormat": "pegasus"
},
{
"refId": "G",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[$__interval]))) > 0)",
"legendFormat": "soteria"
},
{
"refId": "H",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[$__interval]))) > 0)",
"legendFormat": "titan-iac"
},
{
"refId": "I",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[$__interval]))) > 0)",
"legendFormat": "bstein-home"
},
{
"refId": "J",
"expr": "100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[30d]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[30d]))), 1)",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[$__interval]))) > 0)",
"legendFormat": "arcanagon"
}
],
@ -1882,7 +1923,16 @@ data:
"defaults": {
"unit": "percent",
"min": 0,
"max": 100
"max": 100,
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": true
}
},
"overrides": []
},
@ -1905,7 +1955,8 @@ data:
"targetBlank": true
}
],
"description": "Application-level rolling pass rate (0-100) over the last 30 days. Includes Ariadne/Metis/Ananke and auto-picks additional suite lines when platform_quality_gate_runs_total is emitted."
"timeFrom": "30d",
"description": "Per-run interval pass points (0-100) for each software suite over the last 30 days. Points are connected to show trend; missing-run intervals are ignored."
},
{
"id": 47,
@ -1923,7 +1974,7 @@ data:
},
"targets": [
{
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h])) or on() vector(0))",
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"failed\"}[24h])) or on() vector(0)) + (sum(increase(platform_quality_gate_runs_total{status!~\"ok|passed|success\"}[24h])) or on() vector(0))",
"refId": "A",
"instant": true
}
@ -1983,7 +2034,7 @@ data:
"targetBlank": true
}
],
"description": "This summary is sourced from the Atlas Jobs internal dashboard rather than a separate overview-only query."
"description": "Total failed test events in the last 24h across Ariadne, Metis, Ananke, and any suites publishing platform_quality_gate_runs_total."
},
{
"id": 30,

View File

@ -37,7 +37,7 @@ data:
{
"refId": "B",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)",
"legendFormat": "Pyrphoros Discharge ETA",
"legendFormat": "Pyrphoros Discharge",
"instant": true
},
{
@ -55,7 +55,7 @@ data:
{
"refId": "E",
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)",
"legendFormat": "Statera Discharge ETA",
"legendFormat": "Statera Discharge",
"instant": true
},
{
@ -126,7 +126,7 @@ data:
{
"matcher": {
"id": "byName",
"options": "Pyrphoros Discharge ETA"
"options": "Pyrphoros Discharge"
},
"properties": [
{
@ -142,7 +142,7 @@ data:
{
"matcher": {
"id": "byName",
"options": "Statera Discharge ETA"
"options": "Statera Discharge"
},
"properties": [
{