monitoring(overview): place six power/climate panels on one row and fix test/job data

This commit is contained in:
Brad Stein 2026-04-04 01:33:15 -03:00
parent cdc3c081f5
commit 55b96c0675
5 changed files with 246 additions and 268 deletions

View File

@ -419,17 +419,28 @@ ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
)
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
TEST_REPO_SELECTOR = 'repo=~"ariadne|metis"'
TEST_CI_COVERAGE = f'ariadne_ci_coverage_percent{{{TEST_REPO_SELECTOR}}}'
TEST_CI_TESTS = f'ariadne_ci_tests_total{{{TEST_REPO_SELECTOR}}}'
TEST_SUCCESS_RATE = (
"100 * "
f'sum(max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result="passed"}}[30d])) '
"/ clamp_min("
f'sum(max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result=~"passed|failed|error"}}[30d])), 1)'
PLATFORM_TEST_SUCCESS_EVENTS_30D = (
'(sum(increase(ariadne_task_runs_total{status="ok"}[30d])) or on() vector(0)) + '
'(sum(increase(metis_builds_total{status="ok"}[30d])) or on() vector(0)) + '
'(sum(increase(metis_flashes_total{status="ok"}[30d])) or on() vector(0))'
)
TEST_FAILURES_24H = (
f'sum by (result) (max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result=~"failed|error"}}[24h]))'
PLATFORM_TEST_TOTAL_EVENTS_30D = (
"(sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + "
"(sum(increase(metis_builds_total[30d])) or on() vector(0)) + "
"(sum(increase(metis_flashes_total[30d])) or on() vector(0))"
)
TEST_SUCCESS_RATE = (
f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_30D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_30D}), 1)"
)
TEST_FAILURES_24H_TOTAL = (
'(sum(increase(ariadne_task_runs_total{status!="ok"}[24h])) or on() vector(0)) + '
'(sum(increase(metis_builds_total{status="error"}[24h])) or on() vector(0)) + '
'(sum(increase(metis_flashes_total{status="error"}[24h])) or on() vector(0))'
)
PLATFORM_TEST_ACTIVITY_30D = (
'label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), "source", "ariadne", "__name__", ".*") '
'or label_replace(sum by (status) (increase(metis_builds_total[30d])), "source", "metis-build", "__name__", ".*") '
'or label_replace(sum by (status) (increase(metis_flashes_total[30d])), "source", "metis-flash", "__name__", ".*")'
)
HECATE_SELECTOR = 'job="hecate-power"'
HECATE_UPS_ON_BATTERY = f"sum(hecate_ups_on_battery{{{HECATE_SELECTOR}}}) or on() vector(0)"
@ -1267,7 +1278,7 @@ def build_overview():
40,
"UPS Current Load",
None,
{"h": 5, "w": 8, "x": 0, "y": 11},
{"h": 6, "w": 4, "x": 0, "y": 11},
unit="none",
decimals=1,
text_mode="name_and_value",
@ -1296,15 +1307,15 @@ def build_overview():
41,
"UPS History (Power Draw)",
None,
{"h": 5, "w": 8, "x": 8, "y": 11},
{"h": 6, "w": 4, "x": 4, "y": 11},
unit="watt",
targets=[
{"refId": "A", "expr": HECATE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": "titan-db"},
{"refId": "B", "expr": HECATE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": "tethys"},
{"refId": "C", "expr": HECATE_UPS_DRAW_WATTS_TOTAL_SERIES, "legendFormat": "combined"},
],
legend_display="table",
legend_placement="right",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-power"),
)
)
@ -1313,7 +1324,7 @@ def build_overview():
42,
"Current Climate",
None,
{"h": 5, "w": 8, "x": 16, "y": 11},
{"h": 6, "w": 4, "x": 8, "y": 11},
unit="none",
decimals=2,
text_mode="name_and_value",
@ -1334,7 +1345,7 @@ def build_overview():
43,
"Climate History",
None,
{"h": 5, "w": 8, "x": 0, "y": 16},
{"h": 6, "w": 4, "x": 12, "y": 11},
unit="celsius",
targets=[
{"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "Temperature (°C)"},
@ -1351,8 +1362,8 @@ def build_overview():
],
}
],
legend_display="table",
legend_placement="right",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-power"),
)
)
@ -1361,7 +1372,7 @@ def build_overview():
140,
"Fan Activity",
None,
{"h": 5, "w": 8, "x": 8, "y": 16},
{"h": 6, "w": 4, "x": 16, "y": 11},
unit="none",
decimals=1,
text_mode="name_and_value",
@ -1387,7 +1398,7 @@ def build_overview():
141,
"Fan History (0-10)",
None,
{"h": 5, "w": 8, "x": 16, "y": 16},
{"h": 6, "w": 4, "x": 20, "y": 11},
unit="none",
max_value=10,
targets=[
@ -1396,22 +1407,26 @@ def build_overview():
{"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Outside Inlet"},
{"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior"},
],
legend_display="table",
legend_placement="right",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-power"),
)
)
panels.append(
stat_panel(
table_panel(
44,
"One-off Job Pods >1h",
f"sum(({ONEOFF_JOB_POD_AGE_HOURS}) > bool 1) or on() vector(0)",
{"h": 3, "w": 6, "x": 0, "y": 21},
unit="none",
f"({ONEOFF_JOB_POD_AGE_HOURS}) > 1",
{"h": 3, "w": 6, "x": 0, "y": 17},
unit="h",
instant=True,
thresholds=count_thresholds,
links=link_to("atlas-jobs"),
transformations=[
{"id": "labelsToFields", "options": {}},
{"id": "organize", "options": {"excludeByName": {"Time": True}}},
{"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}},
],
options={"showHeader": True, "cellHeight": "sm"},
)
)
panels.append(
@ -1419,7 +1434,7 @@ def build_overview():
45,
"Ariadne Attempts (24h)",
"sum(increase(ariadne_task_runs_total[24h]))",
{"h": 3, "w": 6, "x": 6, "y": 21},
{"h": 3, "w": 6, "x": 6, "y": 17},
unit="none",
decimals=0,
links=link_to("atlas-jobs"),
@ -1429,7 +1444,7 @@ def build_overview():
46,
"Platform Test Success Rate",
TEST_SUCCESS_RATE,
{"h": 3, "w": 6, "x": 12, "y": 21},
{"h": 3, "w": 6, "x": 12, "y": 17},
unit="percent",
decimals=2,
thresholds={
@ -1451,8 +1466,8 @@ def build_overview():
test_failures = stat_panel(
47,
"Platform Test Failures (24h)",
"sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h]))",
{"h": 3, "w": 6, "x": 18, "y": 21},
TEST_FAILURES_24H_TOTAL,
{"h": 3, "w": 6, "x": 18, "y": 17},
unit="none",
decimals=0,
instant=True,
@ -1469,7 +1484,7 @@ def build_overview():
30,
"Mail Sent (1d)",
'max(postmark_outbound_sent{window="1d"})',
{"h": 3, "w": 4, "x": 0, "y": 24},
{"h": 3, "w": 4, "x": 0, "y": 20},
unit="none",
links=link_to("atlas-mail"),
)
@ -1480,7 +1495,7 @@ def build_overview():
"type": "stat",
"title": "Mail Bounces (1d)",
"datasource": PROM_DS,
"gridPos": {"h": 3, "w": 4, "x": 8, "y": 24},
"gridPos": {"h": 3, "w": 4, "x": 8, "y": 20},
"targets": [
{
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
@ -1526,7 +1541,7 @@ def build_overview():
32,
"Mail Success Rate (1d)",
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
{"h": 3, "w": 4, "x": 4, "y": 24},
{"h": 3, "w": 4, "x": 4, "y": 20},
unit="percent",
thresholds=mail_success_thresholds,
decimals=1,
@ -1538,7 +1553,7 @@ def build_overview():
33,
"Mail Limit Used (30d)",
"max(postmark_sending_limit_used_percent)",
{"h": 3, "w": 4, "x": 12, "y": 24},
{"h": 3, "w": 4, "x": 12, "y": 20},
unit="percent",
thresholds=mail_limit_thresholds,
decimals=1,
@ -1550,7 +1565,7 @@ def build_overview():
34,
"Postgres Connections Used",
POSTGRES_CONN_USED,
{"h": 3, "w": 4, "x": 16, "y": 24},
{"h": 3, "w": 4, "x": 16, "y": 20},
decimals=0,
text_mode="name_and_value",
legend="{{conn}}",
@ -1562,7 +1577,7 @@ def build_overview():
35,
"Postgres Hottest Connections",
POSTGRES_CONN_HOTTEST,
{"h": 3, "w": 4, "x": 20, "y": 24},
{"h": 3, "w": 4, "x": 20, "y": 20},
unit="none",
decimals=0,
text_mode="name_and_value",
@ -1580,7 +1595,7 @@ def build_overview():
11,
"Namespace CPU Share",
namespace_cpu_share_expr(cpu_scope),
{"h": 9, "w": 8, "x": 0, "y": 27},
{"h": 9, "w": 8, "x": 0, "y": 23},
links=namespace_scope_links("namespace_scope_cpu"),
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
@ -1590,7 +1605,7 @@ def build_overview():
12,
"Namespace GPU Share",
namespace_gpu_share_expr(gpu_scope),
{"h": 9, "w": 8, "x": 8, "y": 27},
{"h": 9, "w": 8, "x": 8, "y": 23},
links=namespace_scope_links("namespace_scope_gpu"),
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
@ -1600,7 +1615,7 @@ def build_overview():
13,
"Namespace RAM Share",
namespace_ram_share_expr(ram_scope),
{"h": 9, "w": 8, "x": 16, "y": 27},
{"h": 9, "w": 8, "x": 16, "y": 23},
links=namespace_scope_links("namespace_scope_ram"),
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
@ -1612,7 +1627,7 @@ def build_overview():
14,
"Worker Node CPU",
node_cpu_expr(worker_filter),
{"h": 12, "w": 12, "x": 0, "y": 43},
{"h": 12, "w": 12, "x": 0, "y": 39},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -1626,7 +1641,7 @@ def build_overview():
15,
"Worker Node RAM",
node_mem_expr(worker_filter),
{"h": 12, "w": 12, "x": 12, "y": 43},
{"h": 12, "w": 12, "x": 12, "y": 39},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -1641,7 +1656,7 @@ def build_overview():
16,
"Control plane CPU",
node_cpu_expr(CONTROL_ALL_REGEX),
{"h": 10, "w": 12, "x": 0, "y": 55},
{"h": 10, "w": 12, "x": 0, "y": 51},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -1653,7 +1668,7 @@ def build_overview():
17,
"Control plane RAM",
node_mem_expr(CONTROL_ALL_REGEX),
{"h": 10, "w": 12, "x": 12, "y": 55},
{"h": 10, "w": 12, "x": 12, "y": 51},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -1666,7 +1681,7 @@ def build_overview():
28,
"Node Pod Share",
'(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
{"h": 10, "w": 12, "x": 0, "y": 65},
{"h": 10, "w": 12, "x": 0, "y": 61},
)
)
panels.append(
@ -1674,7 +1689,7 @@ def build_overview():
29,
"Top Nodes by Pod Count",
'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
{"h": 10, "w": 12, "x": 12, "y": 65},
{"h": 10, "w": 12, "x": 12, "y": 61},
unit="none",
limit=12,
decimals=0,
@ -1696,7 +1711,7 @@ def build_overview():
18,
"Cluster Ingress Throughput",
NET_INGRESS_EXPR,
{"h": 7, "w": 8, "x": 0, "y": 36},
{"h": 7, "w": 8, "x": 0, "y": 32},
unit="Bps",
legend="Ingress (Traefik)",
legend_display="list",
@ -1709,7 +1724,7 @@ def build_overview():
19,
"Cluster Egress Throughput",
NET_EGRESS_EXPR,
{"h": 7, "w": 8, "x": 8, "y": 36},
{"h": 7, "w": 8, "x": 8, "y": 32},
unit="Bps",
legend="Egress (Traefik)",
legend_display="list",
@ -1722,7 +1737,7 @@ def build_overview():
20,
"Intra-Cluster Throughput",
NET_INTERNAL_EXPR,
{"h": 7, "w": 8, "x": 16, "y": 36},
{"h": 7, "w": 8, "x": 16, "y": 32},
unit="Bps",
legend="Internal traffic",
legend_display="list",
@ -1736,7 +1751,7 @@ def build_overview():
21,
"Root Filesystem Usage",
root_usage_expr(),
{"h": 16, "w": 12, "x": 0, "y": 75},
{"h": 16, "w": 12, "x": 0, "y": 71},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -1751,7 +1766,7 @@ def build_overview():
22,
"Nodes Closest to Full Root Disks",
f"topk(12, {root_usage_expr()})",
{"h": 16, "w": 12, "x": 12, "y": 75},
{"h": 16, "w": 12, "x": 12, "y": 71},
unit="percent",
thresholds=PERCENT_THRESHOLDS,
links=link_to("atlas-storage"),
@ -2889,27 +2904,26 @@ def build_jobs_dashboard():
)
coverage_panel = stat_panel(
17,
"Platform CI Coverage (%)",
TEST_CI_COVERAGE,
"Platform Test Success Rate (30d)",
TEST_SUCCESS_RATE,
{"h": 6, "w": 4, "x": 8, "y": 11},
unit="percent",
decimals=1,
decimals=2,
instant=True,
legend="{{branch}}",
)
coverage_panel["description"] = "Internal source panel for Atlas Overview automation test rollups."
coverage_panel["description"] = "Internal rollup across Ariadne task runs and Metis build/flash outcomes."
panels.append(coverage_panel)
tests_panel = table_panel(
18,
"Platform CI Tests (Ariadne + Metis)",
TEST_CI_TESTS,
"Platform Test Activity (30d)",
PLATFORM_TEST_ACTIVITY_30D,
{"h": 6, "w": 12, "x": 12, "y": 11},
unit="none",
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
instant=True,
)
tests_panel["description"] = (
"Atlas Overview test panels depend on these internal repo-tagged CI series from Ariadne and Metis."
"Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters."
)
panels.append(tests_panel)

View File

@ -1125,7 +1125,7 @@
{
"id": 17,
"type": "stat",
"title": "Platform CI Coverage (%)",
"title": "Platform Test Success Rate (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1138,9 +1138,8 @@
},
"targets": [
{
"expr": "ariadne_ci_coverage_percent{repo=~\"ariadne|metis\"}",
"expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0))), 1)",
"refId": "A",
"legendFormat": "{{branch}}",
"instant": true
}
],
@ -1167,7 +1166,7 @@
"custom": {
"displayMode": "auto"
},
"decimals": 1
"decimals": 2
},
"overrides": []
},
@ -1184,12 +1183,12 @@
},
"textMode": "value"
},
"description": "Internal source panel for Atlas Overview automation test rollups."
"description": "Internal rollup across Ariadne task runs and Metis build/flash outcomes."
},
{
"id": 18,
"type": "table",
"title": "Platform CI Tests (Ariadne + Metis)",
"title": "Platform Test Activity (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1202,7 +1201,7 @@
},
"targets": [
{
"expr": "ariadne_ci_tests_total{repo=~\"ariadne|metis\"}",
"expr": "label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), \"source\", \"ariadne\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(metis_builds_total[30d])), \"source\", \"metis-build\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(metis_flashes_total[30d])), \"source\", \"metis-flash\", \"__name__\", \".*\")",
"refId": "A",
"instant": true
}
@ -1235,7 +1234,7 @@
}
}
],
"description": "Atlas Overview test panels depend on these internal repo-tagged CI series from Ariadne and Metis."
"description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters."
}
],
"time": {

View File

@ -1079,8 +1079,8 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"h": 6,
"w": 4,
"x": 0,
"y": 11
},
@ -1277,9 +1277,9 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 8,
"h": 6,
"w": 4,
"x": 4,
"y": 11
},
"targets": [
@ -1307,8 +1307,8 @@
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right"
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
@ -1331,9 +1331,9 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"h": 6,
"w": 4,
"x": 8,
"y": 11
},
"targets": [
@ -1433,10 +1433,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 0,
"y": 16
"h": 6,
"w": 4,
"x": 12,
"y": 11
},
"targets": [
{
@ -1483,8 +1483,8 @@
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right"
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
@ -1507,10 +1507,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 8,
"y": 16
"h": 6,
"w": 4,
"x": 16,
"y": 11
},
"targets": [
{
@ -1599,10 +1599,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"y": 16
"h": 6,
"w": 4,
"x": 20,
"y": 11
},
"targets": [
{
@ -1635,8 +1635,8 @@
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right"
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
@ -1652,7 +1652,7 @@
},
{
"id": 44,
"type": "stat",
"type": "table",
"title": "One-off Job Pods >1h",
"datasource": {
"type": "prometheus",
@ -1662,67 +1662,50 @@
"h": 3,
"w": 6,
"x": 0,
"y": 21
"y": 17
},
"targets": [
{
"expr": "sum((((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})) > bool 1) or on() vector(0)",
"expr": "(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})) > 1",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none",
"unit": "h",
"custom": {
"displayMode": "auto"
"filterable": true
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
"showHeader": true,
"columnFilters": false,
"cellHeight": "sm"
},
"links": [
"transformations": [
{
"title": "Open atlas-jobs dashboard",
"url": "/d/atlas-jobs",
"targetBlank": true
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
}
}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
@ -1738,7 +1721,7 @@
"h": 3,
"w": 6,
"x": 6,
"y": 21
"y": 17
},
"targets": [
{
@ -1806,11 +1789,11 @@
"h": 3,
"w": 6,
"x": 12,
"y": 21
"y": 17
},
"targets": [
{
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"passed|failed|error\"}[30d])), 1)",
"expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0))), 1)",
"refId": "A"
}
],
@ -1883,11 +1866,11 @@
"h": 3,
"w": 6,
"x": 18,
"y": 21
"y": 17
},
"targets": [
{
"expr": "sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h]))",
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0))",
"refId": "A",
"instant": true
}
@ -1961,7 +1944,7 @@
"h": 3,
"w": 4,
"x": 0,
"y": 24
"y": 20
},
"targets": [
{
@ -2028,7 +2011,7 @@
"h": 3,
"w": 4,
"x": 8,
"y": 24
"y": 20
},
"targets": [
{
@ -2133,7 +2116,7 @@
"h": 3,
"w": 4,
"x": 4,
"y": 24
"y": 20
},
"targets": [
{
@ -2209,7 +2192,7 @@
"h": 3,
"w": 4,
"x": 12,
"y": 24
"y": 20
},
"targets": [
{
@ -2285,7 +2268,7 @@
"h": 3,
"w": 4,
"x": 16,
"y": 24
"y": 20
},
"targets": [
{
@ -2348,7 +2331,7 @@
"h": 3,
"w": 4,
"x": 20,
"y": 24
"y": 20
},
"targets": [
{
@ -2411,7 +2394,7 @@
"h": 9,
"w": 8,
"x": 0,
"y": 27
"y": 23
},
"targets": [
{
@ -2480,7 +2463,7 @@
"h": 9,
"w": 8,
"x": 8,
"y": 27
"y": 23
},
"targets": [
{
@ -2549,7 +2532,7 @@
"h": 9,
"w": 8,
"x": 16,
"y": 27
"y": 23
},
"targets": [
{
@ -2618,7 +2601,7 @@
"h": 12,
"w": 12,
"x": 0,
"y": 43
"y": 39
},
"targets": [
{
@ -2665,7 +2648,7 @@
"h": 12,
"w": 12,
"x": 12,
"y": 43
"y": 39
},
"targets": [
{
@ -2712,7 +2695,7 @@
"h": 10,
"w": 12,
"x": 0,
"y": 55
"y": 51
},
"targets": [
{
@ -2749,7 +2732,7 @@
"h": 10,
"w": 12,
"x": 12,
"y": 55
"y": 51
},
"targets": [
{
@ -2786,7 +2769,7 @@
"h": 10,
"w": 12,
"x": 0,
"y": 65
"y": 61
},
"targets": [
{
@ -2837,7 +2820,7 @@
"h": 10,
"w": 12,
"x": 12,
"y": 65
"y": 61
},
"targets": [
{
@ -2918,7 +2901,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 36
"y": 32
},
"targets": [
{
@ -2962,7 +2945,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 36
"y": 32
},
"targets": [
{
@ -3006,7 +2989,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 36
"y": 32
},
"targets": [
{
@ -3050,7 +3033,7 @@
"h": 16,
"w": 12,
"x": 0,
"y": 75
"y": 71
},
"targets": [
{
@ -3098,7 +3081,7 @@
"h": 16,
"w": 12,
"x": 12,
"y": 75
"y": 71
},
"targets": [
{

View File

@ -1134,7 +1134,7 @@ data:
{
"id": 17,
"type": "stat",
"title": "Platform CI Coverage (%)",
"title": "Platform Test Success Rate (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1147,9 +1147,8 @@ data:
},
"targets": [
{
"expr": "ariadne_ci_coverage_percent{repo=~\"ariadne|metis\"}",
"expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0))), 1)",
"refId": "A",
"legendFormat": "{{branch}}",
"instant": true
}
],
@ -1176,7 +1175,7 @@ data:
"custom": {
"displayMode": "auto"
},
"decimals": 1
"decimals": 2
},
"overrides": []
},
@ -1193,12 +1192,12 @@ data:
},
"textMode": "value"
},
"description": "Internal source panel for Atlas Overview automation test rollups."
"description": "Internal rollup across Ariadne task runs and Metis build/flash outcomes."
},
{
"id": 18,
"type": "table",
"title": "Platform CI Tests (Ariadne + Metis)",
"title": "Platform Test Activity (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1211,7 +1210,7 @@ data:
},
"targets": [
{
"expr": "ariadne_ci_tests_total{repo=~\"ariadne|metis\"}",
"expr": "label_replace(sum by (status) (increase(ariadne_task_runs_total[30d])), \"source\", \"ariadne\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(metis_builds_total[30d])), \"source\", \"metis-build\", \"__name__\", \".*\") or label_replace(sum by (status) (increase(metis_flashes_total[30d])), \"source\", \"metis-flash\", \"__name__\", \".*\")",
"refId": "A",
"instant": true
}
@ -1244,7 +1243,7 @@ data:
}
}
],
"description": "Atlas Overview test panels depend on these internal repo-tagged CI series from Ariadne and Metis."
"description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters."
}
],
"time": {

View File

@ -1088,8 +1088,8 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"h": 6,
"w": 4,
"x": 0,
"y": 11
},
@ -1286,9 +1286,9 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 8,
"h": 6,
"w": 4,
"x": 4,
"y": 11
},
"targets": [
@ -1316,8 +1316,8 @@ data:
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right"
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
@ -1340,9 +1340,9 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"h": 6,
"w": 4,
"x": 8,
"y": 11
},
"targets": [
@ -1442,10 +1442,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 0,
"y": 16
"h": 6,
"w": 4,
"x": 12,
"y": 11
},
"targets": [
{
@ -1492,8 +1492,8 @@ data:
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right"
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
@ -1516,10 +1516,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 8,
"y": 16
"h": 6,
"w": 4,
"x": 16,
"y": 11
},
"targets": [
{
@ -1608,10 +1608,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"y": 16
"h": 6,
"w": 4,
"x": 20,
"y": 11
},
"targets": [
{
@ -1644,8 +1644,8 @@ data:
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right"
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
@ -1661,7 +1661,7 @@ data:
},
{
"id": 44,
"type": "stat",
"type": "table",
"title": "One-off Job Pods >1h",
"datasource": {
"type": "prometheus",
@ -1671,67 +1671,50 @@ data:
"h": 3,
"w": 6,
"x": 0,
"y": 21
"y": 17
},
"targets": [
{
"expr": "sum((((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})) > bool 1) or on() vector(0)",
"expr": "(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})) > 1",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none",
"unit": "h",
"custom": {
"displayMode": "auto"
"filterable": true
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
"showHeader": true,
"columnFilters": false,
"cellHeight": "sm"
},
"links": [
"transformations": [
{
"title": "Open atlas-jobs dashboard",
"url": "/d/atlas-jobs",
"targetBlank": true
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
}
}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
@ -1747,7 +1730,7 @@ data:
"h": 3,
"w": 6,
"x": 6,
"y": 21
"y": 17
},
"targets": [
{
@ -1815,11 +1798,11 @@ data:
"h": 3,
"w": 6,
"x": 12,
"y": 21
"y": 17
},
"targets": [
{
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"passed|failed|error\"}[30d])), 1)",
"expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0))), 1)",
"refId": "A"
}
],
@ -1892,11 +1875,11 @@ data:
"h": 3,
"w": 6,
"x": 18,
"y": 21
"y": 17
},
"targets": [
{
"expr": "sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h]))",
"expr": "(sum(increase(ariadne_task_runs_total{status!=\"ok\"}[24h])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"error\"}[24h])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"error\"}[24h])) or on() vector(0))",
"refId": "A",
"instant": true
}
@ -1970,7 +1953,7 @@ data:
"h": 3,
"w": 4,
"x": 0,
"y": 24
"y": 20
},
"targets": [
{
@ -2037,7 +2020,7 @@ data:
"h": 3,
"w": 4,
"x": 8,
"y": 24
"y": 20
},
"targets": [
{
@ -2142,7 +2125,7 @@ data:
"h": 3,
"w": 4,
"x": 4,
"y": 24
"y": 20
},
"targets": [
{
@ -2218,7 +2201,7 @@ data:
"h": 3,
"w": 4,
"x": 12,
"y": 24
"y": 20
},
"targets": [
{
@ -2294,7 +2277,7 @@ data:
"h": 3,
"w": 4,
"x": 16,
"y": 24
"y": 20
},
"targets": [
{
@ -2357,7 +2340,7 @@ data:
"h": 3,
"w": 4,
"x": 20,
"y": 24
"y": 20
},
"targets": [
{
@ -2420,7 +2403,7 @@ data:
"h": 9,
"w": 8,
"x": 0,
"y": 27
"y": 23
},
"targets": [
{
@ -2489,7 +2472,7 @@ data:
"h": 9,
"w": 8,
"x": 8,
"y": 27
"y": 23
},
"targets": [
{
@ -2558,7 +2541,7 @@ data:
"h": 9,
"w": 8,
"x": 16,
"y": 27
"y": 23
},
"targets": [
{
@ -2627,7 +2610,7 @@ data:
"h": 12,
"w": 12,
"x": 0,
"y": 43
"y": 39
},
"targets": [
{
@ -2674,7 +2657,7 @@ data:
"h": 12,
"w": 12,
"x": 12,
"y": 43
"y": 39
},
"targets": [
{
@ -2721,7 +2704,7 @@ data:
"h": 10,
"w": 12,
"x": 0,
"y": 55
"y": 51
},
"targets": [
{
@ -2758,7 +2741,7 @@ data:
"h": 10,
"w": 12,
"x": 12,
"y": 55
"y": 51
},
"targets": [
{
@ -2795,7 +2778,7 @@ data:
"h": 10,
"w": 12,
"x": 0,
"y": 65
"y": 61
},
"targets": [
{
@ -2846,7 +2829,7 @@ data:
"h": 10,
"w": 12,
"x": 12,
"y": 65
"y": 61
},
"targets": [
{
@ -2927,7 +2910,7 @@ data:
"h": 7,
"w": 8,
"x": 0,
"y": 36
"y": 32
},
"targets": [
{
@ -2971,7 +2954,7 @@ data:
"h": 7,
"w": 8,
"x": 8,
"y": 36
"y": 32
},
"targets": [
{
@ -3015,7 +2998,7 @@ data:
"h": 7,
"w": 8,
"x": 16,
"y": 36
"y": 32
},
"targets": [
{
@ -3059,7 +3042,7 @@ data:
"h": 16,
"w": 12,
"x": 0,
"y": 75
"y": 71
},
"targets": [
{
@ -3107,7 +3090,7 @@ data:
"h": 16,
"w": 12,
"x": 12,
"y": 75
"y": 71
},
"targets": [
{