monitoring: align overview generator with restored layout

This commit is contained in:
jenkins 2026-05-12 04:19:36 -03:00
parent d1cdb4fd13
commit 6811958b52
3 changed files with 409 additions and 203 deletions

View File

@ -1242,6 +1242,7 @@ def bargauge_panel(
instant=False,
overrides=None,
data_links=None,
include_color=True,
):
"""Return a bar gauge panel with label-aware reduction."""
cleaned_expr = expr.strip()
@ -1250,6 +1251,26 @@ def bargauge_panel(
expr = f"sort_desc({expr})"
elif sort_order == "asc":
expr = f"sort({expr})"
defaults = {}
if include_color:
defaults["color"] = {"mode": "thresholds"}
defaults.update(
{
"unit": unit,
"min": 0,
"max": 100 if unit == "percent" else None,
"thresholds": thresholds
or {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 50},
{"color": "orange", "value": 70},
{"color": "red", "value": 85},
],
},
}
)
panel = {
"id": panel_id,
"type": "bargauge",
@ -1265,22 +1286,7 @@ def bargauge_panel(
}
],
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"unit": unit,
"min": 0,
"max": 100 if unit == "percent" else None,
"thresholds": thresholds
or {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 50},
{"color": "orange", "value": 70},
{"color": "red", "value": 85},
],
},
},
"defaults": defaults,
"overrides": [],
},
"options": {
@ -1368,6 +1374,11 @@ def link_to(uid):
]
def overview_link_to(uid):
"""Return the historical Overview dashboard link label."""
return [{"title": f"Open {uid} dashboard", "url": f"/d/{uid}", "targetBlank": True}]
# ---------------------------------------------------------------------------
# Dashboard builders
# ---------------------------------------------------------------------------
@ -1375,6 +1386,52 @@ def link_to(uid):
def build_overview():
panels = []
overview_link = overview_link_to
climate_drop_labels = "job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group"
climate_temp_series = f"max without ({climate_drop_labels}) (typhon_temperature_celsius != 0)"
climate_humidity_series = f"max without ({climate_drop_labels}) (typhon_relative_humidity_percent != 0)"
climate_pressure_series = f"max without ({climate_drop_labels}) (typhon_vpd_kpa != 0)"
overview_pvc_backup_age = (
'max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver="restic"}) / 3600) '
'or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver="restic",reason=~"missing|no_completed|lookup_failed|unknown_timestamp"} > 0) '
'* (pvc_backup_count{driver="restic"} > bool 0)) * 999))) or on() vector(0)'
)
def overview_metric_pair_expr(first_expr, first_name, second_expr, second_name):
return (
f'label_replace({first_expr}, "metric", "{first_name}", "__name__", ".*") '
f'or label_replace({second_expr}, "metric", "{second_name}", "__name__", ".*")'
)
def overview_platform_test_success_targets():
suites = [
("ariadne", "ariadne"),
("metis", "metis"),
("ananke", "ananke"),
("atlasbot", "atlasbot"),
("lesavka", "lesavka"),
("pegasus", "pegasus|pegasus-health|pegasus_health"),
("soteria", "soteria"),
("titan-iac", "titan-iac|titan_iac"),
("bstein-home", "bstein-home|bstein_home"),
("arcanagon", "arcanagon"),
("data-prepper", "data-prepper|data_prepper"),
]
targets = []
for index, (legend, suite_regex) in enumerate(suites):
total = f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_regex}"}}[1h]))'
passed = (
f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_regex}",'
f'status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[1h]))'
)
targets.append(
{
"refId": chr(ord("A") + index),
"expr": f"(100 * ({passed}) / clamp_min(({total}), 1)) and on() (({total}) > 0) or on() vector(0)",
"legendFormat": legend,
}
)
return targets
age_thresholds = {
"mode": "absolute",
@ -1415,7 +1472,7 @@ def build_overview():
{"color": "red", "value": 3},
],
},
"links": link_to("atlas-pods"),
"links": overview_link("atlas-pods"),
},
{
"id": 5,
@ -1431,7 +1488,7 @@ def build_overview():
{"color": "red", "value": 3},
],
},
"links": link_to("atlas-pods"),
"links": overview_link("atlas-pods"),
},
{
"id": 27,
@ -1459,7 +1516,7 @@ def build_overview():
{"color": "red", "value": 3},
],
},
"links": link_to("atlas-pods"),
"links": overview_link("atlas-pods"),
},
{
"id": 6,
@ -1475,7 +1532,7 @@ def build_overview():
{"color": "red", "value": 3},
],
},
"links": link_to("atlas-pods"),
"links": overview_link("atlas-pods"),
},
{
"id": 1,
@ -1560,7 +1617,7 @@ def build_overview():
text_mode="name_and_value" if is_hottest_panel else "value",
legend="{{node}}" if is_hottest_panel else None,
instant=is_hottest_panel,
links=link_to("atlas-storage" if panel_id in {23, 24, 25, 26} else "atlas-nodes"),
links=overview_link("atlas-storage" if panel_id in {23, 24, 25, 26} else "atlas-nodes"),
)
)
@ -1591,189 +1648,259 @@ def build_overview():
{"color": "green", "value": 98},
],
}
status_mapping = [
{
"type": "value",
"options": {
"0": {"text": "⚡ Charging"},
"1": {"text": "🔋 Discharging"},
},
}
]
panels.append(
stat_panel(
40,
"UPS Current Load",
ups_text = {"titleSize": 14, "valueSize": 30}
for panel_id, title, draw_expr, runtime_expr, y_pos in [
(40, "Pyrphoros UPS Current", ANANKE_UPS_DRAW_WATTS_DB, ANANKE_UPS_RUNTIME_DB, 7),
(144, "Statera UPS Current", ANANKE_UPS_DRAW_WATTS_TETHYS, ANANKE_UPS_RUNTIME_TETHYS, 10),
]:
panel = stat_panel(
panel_id,
title,
None,
{"h": 6, "w": 4, "x": 0, "y": 12},
{"h": 3, "w": 6, "x": 0, "y": y_pos},
unit="none",
decimals=1,
text_mode="name_and_value",
targets=[
{"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True},
{"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge", "instant": True},
{"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True},
{"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True},
{"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge", "instant": True},
{"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True},
{
"expr": overview_metric_pair_expr(draw_expr, "Draw", runtime_expr, "Runtime"),
"refId": "A",
"legendFormat": "{{metric}}",
"instant": True,
}
],
field_overrides=[
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Draw (W)"},
"properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)"},
"properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge"},
"properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge"},
"properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Status"},
"properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
},
{
"matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Status"},
"properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
},
{"matcher": {"id": "byName", "options": "Draw"}, "properties": [{"id": "unit", "value": "watt"}]},
{"matcher": {"id": "byName", "options": "Runtime"}, "properties": [{"id": "unit", "value": "s"}]},
],
orientation="horizontal",
wide_layout=True,
links=link_to("atlas-power"),
description="Per-UPS live snapshot: current draw, discharge, and charging/discharging status.",
links=overview_link("atlas-power"),
)
)
panel["options"]["text"] = ups_text
panels.append(panel)
panels.append(
timeseries_panel(
41,
"UPS History (Power Draw)",
None,
{"h": 6, "w": 4, "x": 4, "y": 12},
{"h": 6, "w": 6, "x": 6, "y": 7},
unit="watt",
targets=[
{"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME},
{"refId": "B", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": ANANKE_UPS_TETHYS_NAME},
],
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-power"),
legend_display="table",
legend_placement="right",
links=overview_link("atlas-power"),
)
)
panels.append(
stat_panel(
42,
"Current Climate",
None,
{"h": 6, "w": 4, "x": 8, "y": 12},
unit="none",
decimals=2,
text_mode="value",
targets=[
{"refId": "A", "expr": CLIMATE_TEMP_MAX, "legendFormat": "Tent Temp (°C)", "instant": True},
{"refId": "B", "expr": CLIMATE_PRESSURE_CURRENT, "legendFormat": "Tent VPD (kPa)", "instant": True},
{"refId": "C", "expr": CLIMATE_HUMIDITY_MAX, "legendFormat": "Tent RH (%)", "instant": True},
{"refId": "D", "expr": CLIMATE_DEWPOINT_CURRENT, "legendFormat": "Dew Point (°C)", "instant": True},
],
field_overrides=[
{"matcher": {"id": "byName", "options": "Tent Temp (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]},
{"matcher": {"id": "byName", "options": "Tent VPD (kPa)"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]},
{"matcher": {"id": "byName", "options": "Tent RH (%)"}, "properties": [{"id": "unit", "value": "percent"}]},
{"matcher": {"id": "byName", "options": "Dew Point (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]},
],
links=link_to("atlas-power"),
description="Current tent temperature, humidity, VPD, and dew point.",
orientation="horizontal",
wide_layout=True,
)
temp_panel = stat_panel(
42,
"Current Enclosure Temperature",
None,
{"h": 3, "w": 6, "x": 0, "y": 13},
unit="none",
text_mode="name_and_value",
targets=[
{
"expr": overview_metric_pair_expr(
f"max({climate_temp_series}) or on() vector(0)",
"°C",
f"max(({climate_temp_series}) * 9 / 5 + 32) or on() vector(0)",
"°F",
),
"refId": "A",
"legendFormat": "{{metric}}",
"instant": True,
}
],
field_overrides=[
{"matcher": {"id": "byName", "options": "°C"}, "properties": [{"id": "unit", "value": "celsius"}]},
{"matcher": {"id": "byName", "options": "°F"}, "properties": [{"id": "unit", "value": "fahrenheit"}]},
],
links=overview_link("atlas-power"),
)
temp_panel["options"]["text"] = ups_text
panels.append(temp_panel)
climate_panel = stat_panel(
143,
"Current Enclosure Climate",
None,
{"h": 3, "w": 6, "x": 0, "y": 16},
unit="none",
text_mode="name_and_value",
targets=[
{
"expr": overview_metric_pair_expr(
f"max({climate_humidity_series}) or on() vector(0)",
"%RH",
f"max({climate_pressure_series}) or on() vector(0)",
"kPa",
),
"refId": "A",
"legendFormat": "{{metric}}",
"instant": True,
}
],
field_overrides=[
{"matcher": {"id": "byName", "options": "%RH"}, "properties": [{"id": "unit", "value": "suffix:%RH"}]},
{"matcher": {"id": "byName", "options": "kPa"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]},
],
links=overview_link("atlas-power"),
)
climate_panel["options"]["text"] = ups_text
panels.append(climate_panel)
panels.append(
timeseries_panel(
43,
"Climate History",
"Enclosure Climate History",
None,
{"h": 6, "w": 4, "x": 12, "y": 12},
unit="celsius",
{"h": 6, "w": 6, "x": 6, "y": 13},
unit="none",
targets=[
{"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "Temperature (°C)"},
{"refId": "B", "expr": CLIMATE_HUMIDITY_SERIES, "legendFormat": "Humidity (%)"},
{"refId": "C", "expr": CLIMATE_PRESSURE_SERIES, "legendFormat": "VPD (kPa)"},
{"refId": "D", "expr": CLIMATE_DEWPOINT_SERIES, "legendFormat": "Dew Point (°C)"},
{"refId": "A", "expr": climate_temp_series, "legendFormat": "C"},
{"refId": "B", "expr": climate_humidity_series, "legendFormat": "RH"},
{"refId": "C", "expr": climate_pressure_series, "legendFormat": "P"},
{"refId": "D", "expr": f"(min_over_time({climate_temp_series}[$__range]) - 0.08)", "legendFormat": "C bound min"},
{"refId": "E", "expr": f"(max_over_time({climate_temp_series}[$__range]) + 0.08)", "legendFormat": "C bound max"},
{"refId": "F", "expr": f"clamp_min((min_over_time({climate_humidity_series}[$__range]) - 0.35), 0)", "legendFormat": "RH bound min"},
{"refId": "G", "expr": f"clamp_max((max_over_time({climate_humidity_series}[$__range]) + 0.35), 100)", "legendFormat": "RH bound max"},
{"refId": "H", "expr": f"clamp_min((min_over_time({climate_pressure_series}[$__range]) - 0.03), 0)", "legendFormat": "P bound min"},
{"refId": "I", "expr": f"(max_over_time({climate_pressure_series}[$__range]) + 0.03)", "legendFormat": "P bound max"},
],
field_overrides=[
{
"matcher": {"id": "byName", "options": "Humidity (%)"},
"matcher": {"id": "byName", "options": "C"},
"properties": [
{"id": "unit", "value": "percent"},
{"id": "unit", "value": "suffix:°C"},
{"id": "decimals", "value": 2},
{"id": "custom.axisPlacement", "value": "left"},
{"id": "custom.axisCenteredZero", "value": False},
],
},
{
"matcher": {"id": "byName", "options": "VPD (kPa)"},
"matcher": {"id": "byRegexp", "options": "C bound .*"},
"properties": [
{"id": "unit", "value": "none"},
{"id": "custom.axisPlacement", "value": "right"},
{"id": "custom.axisLabel", "value": "kPa"},
{"id": "decimals", "value": 2},
{"id": "unit", "value": "suffix:°C"},
{"id": "custom.axisPlacement", "value": "left"},
{"id": "custom.axisCenteredZero", "value": False},
{"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}},
{"id": "custom.lineWidth", "value": 0},
{"id": "custom.fillOpacity", "value": 0},
{"id": "custom.showPoints", "value": "never"},
{"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}},
],
}
},
{
"matcher": {"id": "byName", "options": "RH"},
"properties": [
{"id": "unit", "value": "suffix:%"},
{"id": "decimals", "value": 2},
{"id": "custom.axisPlacement", "value": "right"},
{"id": "custom.axisCenteredZero", "value": False},
],
},
{
"matcher": {"id": "byRegexp", "options": "RH bound .*"},
"properties": [
{"id": "unit", "value": "suffix:%"},
{"id": "custom.axisPlacement", "value": "right"},
{"id": "custom.axisCenteredZero", "value": False},
{"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}},
{"id": "custom.lineWidth", "value": 0},
{"id": "custom.fillOpacity", "value": 0},
{"id": "custom.showPoints", "value": "never"},
{"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}},
],
},
{
"matcher": {"id": "byName", "options": "P"},
"properties": [
{"id": "unit", "value": "suffix:kPa"},
{"id": "custom.axisPlacement", "value": "right"},
{"id": "decimals", "value": 2},
{"id": "custom.axisCenteredZero", "value": False},
],
},
{
"matcher": {"id": "byRegexp", "options": "P bound .*"},
"properties": [
{"id": "unit", "value": "suffix:kPa"},
{"id": "custom.axisPlacement", "value": "right"},
{"id": "custom.axisCenteredZero", "value": False},
{"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}},
{"id": "custom.lineWidth", "value": 0},
{"id": "custom.fillOpacity", "value": 0},
{"id": "custom.showPoints", "value": "never"},
{"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}},
],
},
],
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-power"),
description="Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis).",
links=overview_link("atlas-power"),
description="Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible.",
)
)
panels.append(
stat_panel(
140,
"Fan Activity",
None,
{"h": 6, "w": 4, "x": 16, "y": 12},
unit="none",
decimals=0,
text_mode="name_and_value",
targets=[
{"refId": "A", "expr": f"round({CLIMATE_FAN_OUTLET_CURRENT})", "legendFormat": "Inside Outlet", "instant": True},
{"refId": "B", "expr": f"round({CLIMATE_FAN_INSIDE_INLET_CURRENT})", "legendFormat": "Inside Inlet", "instant": True},
{"refId": "C", "expr": f"round({CLIMATE_FAN_OUTSIDE_INLET_CURRENT})", "legendFormat": "Outside Inlet", "instant": True},
{"refId": "D", "expr": f"round({CLIMATE_FAN_INTERIOR_CURRENT})", "legendFormat": "Interior Fans", "instant": True},
panels[-1]["fieldConfig"]["defaults"]["custom"] = {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": True,
}
fan_current = (
f'label_replace((round(max({climate_drop_labels and "max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="1"}})) or on() vector(0))), "metric", "Outlet", "__name__", ".*") '
f'or label_replace((round(max({"max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="2"}})) or on() vector(0))), "metric", "Inlet - In", "__name__", ".*") '
f'or label_replace((round(max({"max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="3"}})) or on() vector(0))), "metric", "Inlet - Out", "__name__", ".*") '
f'or label_replace((round(max({"max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="4"}})) or on() vector(0))), "metric", "Interior", "__name__", ".*")'
)
fan_panel = stat_panel(
140,
"Fan Activity",
None,
{"h": 6, "w": 6, "x": 12, "y": 13},
unit="none",
decimals=0,
text_mode="name_and_value",
targets=[{"expr": fan_current, "refId": "A", "legendFormat": "{{metric}}", "instant": True}],
field_overrides=[
{"matcher": {"id": "byName", "options": "Outlet"}, "properties": [{"id": "decimals", "value": 0}]},
{"matcher": {"id": "byName", "options": "Inlet - In"}, "properties": [{"id": "decimals", "value": 0}]},
{"matcher": {"id": "byName", "options": "Inlet - Out"}, "properties": [{"id": "decimals", "value": 0}]},
{"matcher": {"id": "byName", "options": "Interior"}, "properties": [{"id": "decimals", "value": 0}]},
],
thresholds={
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 7},
{"color": "red", "value": 9},
],
thresholds={
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 7},
{"color": "red", "value": 9},
],
},
orientation="horizontal",
wide_layout=True,
links=link_to("atlas-power"),
)
},
orientation="vertical",
wide_layout=False,
links=overview_link("atlas-power"),
)
fan_panel["options"]["text"] = {"valueSize": 26}
panels.append(fan_panel)
panels.append(
timeseries_panel(
141,
"Fan History (0-10)",
None,
{"h": 6, "w": 4, "x": 20, "y": 12},
{"h": 6, "w": 6, "x": 18, "y": 13},
unit="none",
max_value=10,
targets=[
{"refId": "A", "expr": CLIMATE_FAN_OUTLET_SERIES, "legendFormat": "Inside Outlet"},
{"refId": "B", "expr": CLIMATE_FAN_INSIDE_INLET_SERIES, "legendFormat": "Inside Inlet"},
{"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Outside Inlet"},
{"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior Fans"},
{"refId": "A", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="1"}})', "legendFormat": "Outlet"},
{"refId": "B", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="2"}})', "legendFormat": "Inlet - Inside"},
{"refId": "C", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="3"}})', "legendFormat": "Inlet - Outside"},
{"refId": "D", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="4"}})', "legendFormat": "Interior"},
],
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-power"),
legend_display="table",
legend_placement="right",
links=overview_link("atlas-power"),
)
)
@ -1782,14 +1909,15 @@ def build_overview():
44,
"One-off Job Pods (age hours)",
ONEOFF_JOB_POD_AGE_HOURS,
{"h": 5, "w": 6, "x": 0, "y": 7},
{"h": 5, "w": 8, "x": 0, "y": 32},
unit="h",
instant=True,
legend="{{namespace}}/{{pod}}",
thresholds=age_thresholds,
limit=12,
decimals=2,
links=link_to("atlas-jobs"),
links=overview_link("atlas-jobs"),
include_color=False,
)
)
panels.append(
@ -1798,10 +1926,10 @@ def build_overview():
"type": "timeseries",
"title": "Ariadne Attempts / Failures",
"datasource": PROM_DS,
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 7},
"gridPos": {"h": 6, "w": 6, "x": 12, "y": 7},
"targets": [
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
{"expr": f"{ARIADNE_TASK_ATTEMPTS_SERIES} or on() vector(0)", "refId": "A", "legendFormat": "Attempts"},
{"expr": f"{ARIADNE_TASK_FAILURES_SERIES} or on() vector(0)", "refId": "B", "legendFormat": "Failures"},
],
"fieldConfig": {
"defaults": {"unit": "none"},
@ -1824,20 +1952,20 @@ def build_overview():
"legend": {"displayMode": "table", "placement": "right"},
"tooltip": {"mode": "multi"},
},
"links": link_to("atlas-jobs"),
"links": overview_link("atlas-jobs"),
}
)
test_success = timeseries_panel(
46,
"Platform Test Success Rate",
None,
{"h": 5, "w": 6, "x": 12, "y": 7},
{"h": 6, "w": 6, "x": 18, "y": 7},
unit="percent",
targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS,
targets=overview_platform_test_success_targets(),
legend_display="table",
legend_placement="right",
legend_calcs=["lastNotNull"],
links=link_to("atlas-jobs"),
links=overview_link("atlas-testing"),
)
test_success["fieldConfig"]["defaults"]["min"] = 0
test_success["fieldConfig"]["defaults"]["max"] = 100
@ -1855,12 +1983,94 @@ def build_overview():
"Per-run interval pass points (0-100) for each software suite over the last 7 days. Points are connected to show trend; missing-run intervals are ignored."
)
panels.append(test_success)
for panel_id, title, metric, x_pos, description in [
(
142,
"Jenkins Last Success (h, newest first)",
"ariadne_jenkins_build_weather_job_last_success_timestamp_seconds",
8,
"Top 6 most recent Jenkins successes by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list.",
),
(
243,
"Jenkins Last Failure (h, newest first)",
"ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds",
12,
"Top 6 most recent Jenkins failures by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list.",
),
]:
base_expr = f"min by (exported_job,job_url,weather_icon) ((time() - {metric}) / 3600)"
topk_expr = f"sort(bottomk(6, {base_expr}))"
success_expr = (
f'label_replace(({topk_expr}) and on(exported_job,job_url,weather_icon) '
'(max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1), '
'"run_state", "ok", "exported_job", ".*")'
)
failure_expr = (
f'label_replace(({topk_expr}) and on(exported_job,job_url,weather_icon) '
'(max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) != 1), '
'"run_state", "bad", "exported_job", ".*")'
)
panels.append(
{
"id": panel_id,
"type": "stat",
"title": title,
"datasource": PROM_DS,
"gridPos": {"h": 5, "w": 4, "x": x_pos, "y": 32},
"targets": [
{
"refId": "A",
"expr": f"sort(({success_expr}) or ({failure_expr}))",
"instant": True,
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"decimals": 1,
"min": 0,
"displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}",
"links": [
{
"title": "Open Jenkins job",
"url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/",
"targetBlank": True,
}
],
},
"overrides": [
{
"matcher": {"id": "byRegexp", "options": '.*run_state="ok".*'},
"properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}],
},
{
"matcher": {"id": "byRegexp", "options": '.*run_state="bad".*'},
"properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}],
},
],
},
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "left",
"orientation": "horizontal",
"wideLayout": True,
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
"textMode": "name_and_value",
"text": {"titleSize": 11, "valueSize": 11},
},
"transformations": [{"id": "sortBy", "options": {"fields": ["Value"], "order": "asc"}}],
"links": overview_link("atlas-jobs"),
"description": description,
}
)
panels.append(
bargauge_panel(
47,
"PVC Backup Health / Age",
PVC_BACKUP_AGE_HOURS_BY_PVC,
{"h": 5, "w": 6, "x": 18, "y": 7},
overview_pvc_backup_age,
{"h": 5, "w": 8, "x": 16, "y": 32},
unit="h",
instant=True,
legend="{{namespace}}/{{pvc}}",
@ -1874,11 +2084,12 @@ def build_overview():
{"color": "red", "value": 50},
],
},
include_color=False,
)
)
panels[-1]["links"] = link_to("atlas-storage")
panels[-1]["links"] = overview_link("atlas-storage")
panels[-1]["description"] = (
"Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible."
"Backup age in hours computed from last-success timestamps for restic-managed PVCs (nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility."
)
panels.append(
@ -1886,9 +2097,9 @@ def build_overview():
30,
"Mail Sent (1d)",
'max(postmark_outbound_sent{window="1d"})',
{"h": 2, "w": 4, "x": 0, "y": 18},
{"h": 2, "w": 4, "x": 0, "y": 19},
unit="none",
links=link_to("atlas-mail"),
links=overview_link("atlas-mail"),
)
)
panels.append(
@ -1897,7 +2108,7 @@ def build_overview():
"type": "stat",
"title": "Mail Bounces (1d)",
"datasource": PROM_DS,
"gridPos": {"h": 2, "w": 4, "x": 8, "y": 18},
"gridPos": {"h": 2, "w": 4, "x": 8, "y": 19},
"targets": [
{
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
@ -1935,7 +2146,7 @@ def build_overview():
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
"textMode": "name_and_value",
},
"links": link_to("atlas-mail"),
"links": overview_link("atlas-mail"),
}
)
panels.append(
@ -1943,11 +2154,11 @@ def build_overview():
32,
"Mail Success Rate (1d)",
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
{"h": 2, "w": 4, "x": 4, "y": 18},
{"h": 2, "w": 4, "x": 4, "y": 19},
unit="percent",
thresholds=mail_success_thresholds,
decimals=1,
links=link_to("atlas-mail"),
links=overview_link("atlas-mail"),
)
)
panels.append(
@ -1955,11 +2166,11 @@ def build_overview():
33,
"Mail Limit Used (30d)",
"max(postmark_sending_limit_used_percent)",
{"h": 2, "w": 4, "x": 12, "y": 18},
{"h": 2, "w": 4, "x": 12, "y": 19},
unit="percent",
thresholds=mail_limit_thresholds,
decimals=1,
links=link_to("atlas-mail"),
links=overview_link("atlas-mail"),
)
)
panels.append(
@ -1967,7 +2178,7 @@ def build_overview():
34,
"Postgres Connections Used",
POSTGRES_CONN_USED,
{"h": 2, "w": 4, "x": 16, "y": 18},
{"h": 2, "w": 4, "x": 16, "y": 19},
decimals=0,
text_mode="name_and_value",
legend="{{conn}}",
@ -1979,7 +2190,7 @@ def build_overview():
35,
"Postgres Hottest Connections",
POSTGRES_CONN_HOTTEST,
{"h": 2, "w": 4, "x": 20, "y": 18},
{"h": 2, "w": 4, "x": 20, "y": 19},
unit="none",
decimals=0,
text_mode="name_and_value",
@ -2029,13 +2240,13 @@ def build_overview():
14,
"Worker Node CPU",
node_cpu_expr(worker_filter),
{"h": 12, "w": 12, "x": 0, "y": 39},
{"h": 12, "w": 12, "x": 0, "y": 44},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
legend_display="table",
legend_placement="right",
links=link_to("atlas-nodes"),
links=overview_link("atlas-nodes"),
)
)
panels.append(
@ -2043,13 +2254,13 @@ def build_overview():
15,
"Worker Node RAM",
node_mem_expr(worker_filter),
{"h": 12, "w": 12, "x": 12, "y": 39},
{"h": 12, "w": 12, "x": 12, "y": 44},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
legend_display="table",
legend_placement="right",
links=link_to("atlas-nodes"),
links=overview_link("atlas-nodes"),
)
)
@ -2058,7 +2269,7 @@ def build_overview():
16,
"Control plane CPU",
node_cpu_expr(CONTROL_ALL_REGEX),
{"h": 10, "w": 12, "x": 0, "y": 51},
{"h": 10, "w": 12, "x": 0, "y": 56},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -2070,7 +2281,7 @@ def build_overview():
17,
"Control plane RAM",
node_mem_expr(CONTROL_ALL_REGEX),
{"h": 10, "w": 12, "x": 12, "y": 51},
{"h": 10, "w": 12, "x": 12, "y": 56},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -2083,7 +2294,7 @@ def build_overview():
28,
"Node Pod Share",
'(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
{"h": 10, "w": 12, "x": 0, "y": 61},
{"h": 10, "w": 12, "x": 0, "y": 66},
)
)
panels.append(
@ -2091,7 +2302,7 @@ def build_overview():
29,
"Top Nodes by Pod Count",
'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
{"h": 10, "w": 12, "x": 12, "y": 61},
{"h": 10, "w": 12, "x": 12, "y": 66},
unit="none",
limit=12,
decimals=0,
@ -2105,6 +2316,7 @@ def build_overview():
],
},
instant=True,
include_color=False,
)
)
@ -2113,12 +2325,12 @@ def build_overview():
18,
"Cluster Ingress Throughput",
NET_INGRESS_EXPR,
{"h": 7, "w": 8, "x": 0, "y": 32},
{"h": 7, "w": 8, "x": 0, "y": 37},
unit="Bps",
legend="Ingress (Traefik)",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-network"),
links=overview_link("atlas-network"),
)
)
panels.append(
@ -2126,12 +2338,12 @@ def build_overview():
19,
"Cluster Egress Throughput",
NET_EGRESS_EXPR,
{"h": 7, "w": 8, "x": 8, "y": 32},
{"h": 7, "w": 8, "x": 8, "y": 37},
unit="Bps",
legend="Egress (Traefik)",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-network"),
links=overview_link("atlas-network"),
)
)
panels.append(
@ -2139,12 +2351,12 @@ def build_overview():
20,
"Intra-Cluster Throughput",
NET_INTERNAL_EXPR,
{"h": 7, "w": 8, "x": 16, "y": 32},
{"h": 7, "w": 8, "x": 16, "y": 37},
unit="Bps",
legend="Internal traffic",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-network"),
links=overview_link("atlas-network"),
)
)
@ -2153,14 +2365,14 @@ def build_overview():
21,
"Root Filesystem Usage",
root_usage_expr(),
{"h": 16, "w": 12, "x": 0, "y": 71},
{"h": 16, "w": 12, "x": 0, "y": 76},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
legend_display="table",
legend_placement="right",
time_from="30d",
links=link_to("atlas-storage"),
links=overview_link("atlas-storage"),
)
)
panels.append(
@ -2168,14 +2380,14 @@ def build_overview():
22,
"Nodes Closest to Full Astraios Disks",
astraios_usage_expr(),
{"h": 16, "w": 12, "x": 12, "y": 71},
{"h": 16, "w": 12, "x": 12, "y": 76},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
legend_display="table",
legend_placement="right",
time_from="1w",
links=link_to("atlas-storage"),
links=overview_link("atlas-storage"),
)
)
return {
@ -2197,13 +2409,7 @@ def build_overview():
},
"time": {"from": "now-1h", "to": "now"},
"refresh": "1m",
"links": [
{
"title": "Atlas Testing (Internal)",
"url": "/d/atlas-jobs",
"targetBlank": False,
}
],
"links": [],
}

View File

@ -479,7 +479,7 @@
"overrides": []
},
"options": {
"displayMode": "basic",
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [

View File

@ -488,7 +488,7 @@ data:
"overrides": []
},
"options": {
"displayMode": "basic",
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [