From e0b124ca4ef573929127c15b6f56612cb4e20181 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 8 Apr 2026 23:33:17 -0300 Subject: [PATCH] monitoring: switch power telemetry to ananke metrics --- scripts/dashboards_render_atlas.py | 240 ++++++++++-------- .../monitoring/dashboards/atlas-jobs.json | 38 +++ .../monitoring/dashboards/atlas-overview.json | 74 ++---- .../monitoring/dashboards/atlas-power.json | 18 +- .../monitoring/grafana-dashboard-jobs.yaml | 38 +++ .../grafana-dashboard-overview.yaml | 74 ++---- .../monitoring/grafana-dashboard-power.yaml | 18 +- services/monitoring/helmrelease.yaml | 4 +- 8 files changed, 274 insertions(+), 230 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index f75857d1..dd02d950 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -442,70 +442,83 @@ PLATFORM_TEST_ACTIVITY_30D = ( 'or label_replace(sum by (status) (increase(metis_builds_total[30d])), "source", "metis-build", "__name__", ".*") ' 'or label_replace(sum by (status) (increase(metis_flashes_total[30d])), "source", "metis-flash", "__name__", ".*")' ) -HECATE_SELECTOR = 'job="hecate-power"' -HECATE_UPS_DB_NAME = "Pyrphoros" -HECATE_UPS_DB_NODE = "titan-db" -HECATE_UPS_TETHYS_NAME = "Statera" -HECATE_UPS_TETHYS_NODE = "titan-24" -HECATE_UPS_ON_BATTERY = f"sum(hecate_ups_on_battery{{{HECATE_SELECTOR}}}) or on() vector(0)" -HECATE_UPS_LOW_BATTERY = f"sum(hecate_ups_low_battery{{{HECATE_SELECTOR}}}) or on() vector(0)" -HECATE_UPS_RUNTIME_MIN = f"min(hecate_ups_runtime_seconds{{{HECATE_SELECTOR}}}) or on() vector(0)" -HECATE_UPS_RUNTIME_HEADROOM_PERCENT = ( - f"100 * min(hecate_ups_runtime_seconds{{{HECATE_SELECTOR}}}) / " - f"clamp_min(max(hecate_ups_threshold_seconds{{{HECATE_SELECTOR}}}), 1)" +PLATFORM_TEST_SUCCESS_RATE_BY_SUITE_SERIES = ( + 'label_replace(100 * (sum(increase(ariadne_task_runs_total{status="ok"}[$__interval])) or on() vector(0)) ' + '/ clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), ' + '"suite", "ariadne", "__name__", ".*") ' + 'or label_replace(100 * (sum(increase(metis_builds_total{status="ok"}[$__interval])) or on() vector(0)) ' + '/ clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), ' + '"suite", "metis-build", "__name__", ".*") ' + 'or label_replace(100 * (sum(increase(metis_flashes_total{status="ok"}[$__interval])) or on() vector(0)) ' + '/ clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), ' + '"suite", "metis-flash", "__name__", ".*")' ) -HECATE_UPS_TRIGGER_COUNT_1D = f"increase(hecate_shutdown_triggers_total{{{HECATE_SELECTOR}}}[1d]) or on() vector(0)" -HECATE_UPS_RUNTIME_DB = ( - f'max(hecate_ups_runtime_seconds{{{HECATE_SELECTOR},instance="titan-db"}}) or on() vector(0)' +ANANKE_SELECTOR = 'job="ananke-power"' +ANANKE_UPS_DB_NAME = "Pyrphoros" +ANANKE_UPS_DB_NODE = "titan-db" +ANANKE_UPS_TETHYS_NAME = "Statera" +ANANKE_UPS_TETHYS_NODE = "titan-24" +ANANKE_UPS_DB_SELECTOR = f'{ANANKE_SELECTOR},source="{ANANKE_UPS_DB_NAME}"' +ANANKE_UPS_TETHYS_SELECTOR = f'{ANANKE_SELECTOR},source="{ANANKE_UPS_TETHYS_NAME}"' +ANANKE_UPS_ON_BATTERY = f"sum(ananke_ups_on_battery{{{ANANKE_SELECTOR}}}) or on() vector(0)" +ANANKE_UPS_LOW_BATTERY = f"sum(ananke_ups_low_battery{{{ANANKE_SELECTOR}}}) or on() vector(0)" +ANANKE_UPS_RUNTIME_MIN = f"min(ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}}) or on() vector(0)" +ANANKE_UPS_RUNTIME_HEADROOM_PERCENT = ( + f"100 * min(ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}}) / " + f"clamp_min(max(ananke_ups_threshold_seconds{{{ANANKE_SELECTOR}}}), 1)" ) -HECATE_UPS_RUNTIME_TETHYS = ( - f'max(hecate_ups_runtime_seconds{{{HECATE_SELECTOR},instance="titan-24"}}) or on() vector(0)' +ANANKE_UPS_TRIGGER_COUNT_1D = f"increase(ananke_shutdown_triggers_total{{{ANANKE_SELECTOR}}}[1d]) or on() vector(0)" +ANANKE_UPS_RUNTIME_DB = ( + f'max(ananke_ups_runtime_seconds{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_ON_BATTERY_DB = ( - f'max(hecate_ups_on_battery{{{HECATE_SELECTOR},instance="titan-db"}}) or on() vector(0)' +ANANKE_UPS_RUNTIME_TETHYS = ( + f'max(ananke_ups_runtime_seconds{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_ON_BATTERY_TETHYS = ( - f'max(hecate_ups_on_battery{{{HECATE_SELECTOR},instance="titan-24"}}) or on() vector(0)' +ANANKE_UPS_ON_BATTERY_DB = ( + f'max(ananke_ups_on_battery{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_BATTERY_CHARGE_DB = ( - f'max(hecate_ups_battery_charge_percent{{{HECATE_SELECTOR},instance="titan-db"}}) or on() vector(0)' +ANANKE_UPS_ON_BATTERY_TETHYS = ( + f'max(ananke_ups_on_battery{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_BATTERY_CHARGE_TETHYS = ( - f'max(hecate_ups_battery_charge_percent{{{HECATE_SELECTOR},instance="titan-24"}}) or on() vector(0)' +ANANKE_UPS_BATTERY_CHARGE_DB = ( + f'max(ananke_ups_battery_charge_percent{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_LOAD_DB = ( - f'max(hecate_ups_load_percent{{{HECATE_SELECTOR},instance="titan-db"}}) or on() vector(0)' +ANANKE_UPS_BATTERY_CHARGE_TETHYS = ( + f'max(ananke_ups_battery_charge_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_LOAD_TETHYS = ( - f'max(hecate_ups_load_percent{{{HECATE_SELECTOR},instance="titan-24"}}) or on() vector(0)' +ANANKE_UPS_LOAD_DB = ( + f'max(ananke_ups_load_percent{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_DRAW_WATTS_DB = ( - f'max((hecate_ups_load_percent{{{HECATE_SELECTOR},instance="titan-db"}} ' - f'* hecate_ups_power_nominal_watts{{{HECATE_SELECTOR},instance="titan-db"}}) / 100) or on() vector(0)' +ANANKE_UPS_LOAD_TETHYS = ( + f'max(ananke_ups_load_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)' ) -HECATE_UPS_DRAW_WATTS_TETHYS = ( - f'max((hecate_ups_load_percent{{{HECATE_SELECTOR},instance="titan-24"}} ' - f'* hecate_ups_power_nominal_watts{{{HECATE_SELECTOR},instance="titan-24"}}) / 100) or on() vector(0)' +ANANKE_UPS_DRAW_WATTS_DB = ( + f'max((ananke_ups_load_percent{{{ANANKE_UPS_DB_SELECTOR}}} ' + f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_DB_SELECTOR}}}) / 100) or on() vector(0)' ) -HECATE_UPS_DRAW_WATTS_TOTAL = ( - f'sum((hecate_ups_load_percent{{{HECATE_SELECTOR}}} * hecate_ups_power_nominal_watts{{{HECATE_SELECTOR}}}) / 100) ' +ANANKE_UPS_DRAW_WATTS_TETHYS = ( + f'max((ananke_ups_load_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}} ' + f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_TETHYS_SELECTOR}}}) / 100) or on() vector(0)' +) +ANANKE_UPS_DRAW_WATTS_TOTAL = ( + f'sum((ananke_ups_load_percent{{{ANANKE_SELECTOR}}} * ananke_ups_power_nominal_watts{{{ANANKE_SELECTOR}}}) / 100) ' "or on() vector(0)" ) -HECATE_UPS_DRAW_WATTS_DB_SERIES = ( - f'((hecate_ups_load_percent{{{HECATE_SELECTOR},instance="titan-db"}} ' - f'* hecate_ups_power_nominal_watts{{{HECATE_SELECTOR},instance="titan-db"}}) / 100)' +ANANKE_UPS_DRAW_WATTS_DB_SERIES = ( + f'((ananke_ups_load_percent{{{ANANKE_UPS_DB_SELECTOR}}} ' + f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_DB_SELECTOR}}}) / 100)' ) -HECATE_UPS_DRAW_WATTS_TETHYS_SERIES = ( - f'((hecate_ups_load_percent{{{HECATE_SELECTOR},instance="titan-24"}} ' - f'* hecate_ups_power_nominal_watts{{{HECATE_SELECTOR},instance="titan-24"}}) / 100)' +ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES = ( + f'((ananke_ups_load_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}} ' + f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_TETHYS_SELECTOR}}}) / 100)' ) -HECATE_UPS_DRAW_WATTS_TOTAL_SERIES = ( - f'sum((hecate_ups_load_percent{{{HECATE_SELECTOR}}} * hecate_ups_power_nominal_watts{{{HECATE_SELECTOR}}}) / 100)' +ANANKE_UPS_DRAW_WATTS_TOTAL_SERIES = ( + f'sum((ananke_ups_load_percent{{{ANANKE_SELECTOR}}} * ananke_ups_power_nominal_watts{{{ANANKE_SELECTOR}}}) / 100)' ) -HECATE_UPS_RUNTIME_BY_SOURCE = f"hecate_ups_runtime_seconds{{{HECATE_SELECTOR}}}" -HECATE_UPS_LOAD_BY_SOURCE = f"hecate_ups_load_percent{{{HECATE_SELECTOR}}}" -HECATE_UPS_CHARGE_BY_SOURCE = f"hecate_ups_battery_charge_percent{{{HECATE_SELECTOR}}}" -HECATE_UPS_TRIGGER_BY_SOURCE = f"hecate_ups_trigger_active{{{HECATE_SELECTOR}}}" +ANANKE_UPS_RUNTIME_BY_SOURCE = f"ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}}" +ANANKE_UPS_LOAD_BY_SOURCE = f"ananke_ups_load_percent{{{ANANKE_SELECTOR}}}" +ANANKE_UPS_CHARGE_BY_SOURCE = f"ananke_ups_battery_charge_percent{{{ANANKE_SELECTOR}}}" +ANANKE_UPS_TRIGGER_BY_SOURCE = f"ananke_ups_trigger_active{{{ANANKE_SELECTOR}}}" CLIMATE_SENSOR_COUNT = "count(atlas_climate_temperature_celsius) or on() vector(0)" CLIMATE_TEMP_MAX = "max(atlas_climate_tent_temperature_celsius) or max(atlas_climate_temperature_celsius) or on() vector(0)" CLIMATE_PRESSURE_CURRENT = "max(atlas_climate_tent_pressure_kpa) or max(atlas_climate_pressure_kpa) or on() vector(0)" @@ -1293,37 +1306,37 @@ def build_overview(): decimals=1, text_mode="name_and_value", targets=[ - {"refId": "A", "expr": HECATE_UPS_DRAW_WATTS_DB, "legendFormat": f"{HECATE_UPS_DB_NAME} Draw (W)", "instant": True}, - {"refId": "B", "expr": HECATE_UPS_RUNTIME_DB, "legendFormat": f"{HECATE_UPS_DB_NAME} Discharge ETA", "instant": True}, - {"refId": "C", "expr": HECATE_UPS_ON_BATTERY_DB, "legendFormat": f"{HECATE_UPS_DB_NAME} Status", "instant": True}, - {"refId": "D", "expr": HECATE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{HECATE_UPS_TETHYS_NAME} Draw (W)", "instant": True}, - {"refId": "E", "expr": HECATE_UPS_RUNTIME_TETHYS, "legendFormat": f"{HECATE_UPS_TETHYS_NAME} Discharge ETA", "instant": True}, - {"refId": "F", "expr": HECATE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{HECATE_UPS_TETHYS_NAME} Status", "instant": True}, + {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True}, + {"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge ETA", "instant": True}, + {"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True}, + {"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True}, + {"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA", "instant": True}, + {"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True}, ], field_overrides=[ { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_DB_NAME} Draw (W)"}, - "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_DB_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_TETHYS_NAME} Draw (W)"}, - "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_TETHYS_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_DB_NAME} Discharge ETA"}, - "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_DB_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge ETA"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_TETHYS_NAME} Discharge ETA"}, - "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_TETHYS_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_DB_NAME} Status"}, - "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {HECATE_UPS_DB_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_TETHYS_NAME} Status"}, - "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {HECATE_UPS_TETHYS_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], }, ], links=link_to("atlas-power"), @@ -1338,9 +1351,9 @@ def build_overview(): {"h": 6, "w": 4, "x": 4, "y": 14}, unit="watt", targets=[ - {"refId": "A", "expr": HECATE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": HECATE_UPS_DB_NAME}, - {"refId": "B", "expr": HECATE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": HECATE_UPS_TETHYS_NAME}, - {"refId": "C", "expr": HECATE_UPS_DRAW_WATTS_TOTAL_SERIES, "legendFormat": "combined"}, + {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME}, + {"refId": "B", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": ANANKE_UPS_TETHYS_NAME}, + {"refId": "C", "expr": ANANKE_UPS_DRAW_WATTS_TOTAL_SERIES, "legendFormat": "combined"}, ], legend_display="list", legend_placement="bottom", @@ -1470,27 +1483,26 @@ def build_overview(): links=link_to("atlas-jobs"), ) ) - test_success = stat_panel( + test_success = timeseries_panel( 46, "Platform Test Success Rate", - TEST_SUCCESS_RATE, + None, {"h": 3, "w": 6, "x": 12, "y": 11}, unit="percent", - decimals=2, - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "red", "value": None}, - {"color": "orange", "value": 90}, - {"color": "yellow", "value": 97}, - {"color": "green", "value": 99}, - ], - }, + targets=[ + { + "refId": "A", + "expr": PLATFORM_TEST_SUCCESS_RATE_BY_SUITE_SERIES, + "legendFormat": "{{suite}}", + } + ], + legend_display="list", + legend_placement="bottom", links=link_to("atlas-jobs"), ) test_success["description"] = ( - "Atlas Overview mirrors the Atlas Jobs internal dashboard for automation test health. " - "This panel rolls up the shared Ariadne and Metis CI metrics from that internal dashboard." + "Rolling pass rate per platform suite (Ariadne task automation + Metis build/flash pipelines). " + "Each line tracks pass percentage over time for its suite." ) panels.append(test_success) test_failures = stat_panel( @@ -2956,6 +2968,26 @@ def build_jobs_dashboard(): "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters." ) panels.append(tests_panel) + suite_panel = timeseries_panel( + 19, + "Platform Test Success Rate by Suite", + None, + {"h": 6, "w": 16, "x": 8, "y": 17}, + unit="percent", + targets=[ + { + "refId": "A", + "expr": PLATFORM_TEST_SUCCESS_RATE_BY_SUITE_SERIES, + "legendFormat": "{{suite}}", + } + ], + legend_display="list", + legend_placement="bottom", + ) + suite_panel["description"] = ( + "Per-suite pass percentage over time. Used by Atlas Overview and kept here for detailed triage." + ) + panels.append(suite_panel) return { "uid": "atlas-jobs", @@ -2993,37 +3025,37 @@ def build_power_dashboard(): decimals=1, text_mode="name_and_value", targets=[ - {"refId": "A", "expr": HECATE_UPS_DRAW_WATTS_DB, "legendFormat": f"{HECATE_UPS_DB_NAME} Draw (W)", "instant": True}, - {"refId": "B", "expr": HECATE_UPS_RUNTIME_DB, "legendFormat": f"{HECATE_UPS_DB_NAME} Discharge ETA", "instant": True}, - {"refId": "C", "expr": HECATE_UPS_ON_BATTERY_DB, "legendFormat": f"{HECATE_UPS_DB_NAME} Status", "instant": True}, - {"refId": "D", "expr": HECATE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{HECATE_UPS_TETHYS_NAME} Draw (W)", "instant": True}, - {"refId": "E", "expr": HECATE_UPS_RUNTIME_TETHYS, "legendFormat": f"{HECATE_UPS_TETHYS_NAME} Discharge ETA", "instant": True}, - {"refId": "F", "expr": HECATE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{HECATE_UPS_TETHYS_NAME} Status", "instant": True}, + {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True}, + {"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge ETA", "instant": True}, + {"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True}, + {"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True}, + {"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA", "instant": True}, + {"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True}, ], field_overrides=[ { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_DB_NAME} Draw (W)"}, - "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_DB_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_TETHYS_NAME} Draw (W)"}, - "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_TETHYS_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_DB_NAME} Discharge ETA"}, - "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_DB_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge ETA"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_TETHYS_NAME} Discharge ETA"}, - "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {HECATE_UPS_TETHYS_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge ETA"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_DB_NAME} Status"}, - "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {HECATE_UPS_DB_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], }, { - "matcher": {"id": "byName", "options": f"{HECATE_UPS_TETHYS_NAME} Status"}, - "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {HECATE_UPS_TETHYS_NODE}"}], + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], }, ], description=( @@ -3039,9 +3071,9 @@ def build_power_dashboard(): {"h": 8, "w": 12, "x": 12, "y": 0}, unit="watt", targets=[ - {"refId": "A", "expr": HECATE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": HECATE_UPS_DB_NAME}, - {"refId": "B", "expr": HECATE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": HECATE_UPS_TETHYS_NAME}, - {"refId": "C", "expr": HECATE_UPS_DRAW_WATTS_TOTAL_SERIES, "legendFormat": "combined"}, + {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME}, + {"refId": "B", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": ANANKE_UPS_TETHYS_NAME}, + {"refId": "C", "expr": ANANKE_UPS_DRAW_WATTS_TOTAL_SERIES, "legendFormat": "combined"}, ], legend_display="table", legend_placement="right", diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index 7799aec4..37bf2f4f 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -1235,6 +1235,44 @@ } ], "description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters." + }, + { + "id": 19, + "type": "timeseries", + "title": "Platform Test Success Rate by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 16, + "x": 8, + "y": 17 + }, + "targets": [ + { + "refId": "A", + "expr": "label_replace(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-build\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-flash\", \"__name__\", \".*\")", + "legendFormat": "{{suite}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "description": "Per-suite pass percentage over time. Used by Atlas Overview and kept here for detailed triage." } ], "time": { diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index c6949bfd..de85c5d6 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1087,37 +1087,37 @@ "targets": [ { "refId": "A", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)", "legendFormat": "Pyrphoros Draw (W)", "instant": true }, { "refId": "B", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Discharge ETA", "instant": true }, { "refId": "C", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Status", "instant": true }, { "refId": "D", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)", "legendFormat": "Statera Draw (W)", "instant": true }, { "refId": "E", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Discharge ETA", "instant": true }, { "refId": "F", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Status", "instant": true } @@ -1309,17 +1309,17 @@ "targets": [ { "refId": "A", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100)", "legendFormat": "Pyrphoros" }, { "refId": "B", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)", "legendFormat": "Statera" }, { "refId": "C", - "expr": "sum((hecate_ups_load_percent{job=\"hecate-power\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\"}) / 100)", + "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)", "legendFormat": "combined" } ], @@ -1805,7 +1805,7 @@ }, { "id": 46, - "type": "stat", + "type": "timeseries", "title": "Platform Test Success Rate", "datasource": { "type": "prometheus", @@ -1819,57 +1819,25 @@ }, "targets": [ { - "expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0))), 1)", - "refId": "A" + "refId": "A", + "expr": "label_replace(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-build\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-flash\", \"__name__\", \".*\")", + "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 90 - }, - { - "color": "yellow", - "value": 97 - }, - { - "color": "green", - "value": 99 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 + "unit": "percent" }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "legend": { + "displayMode": "list", + "placement": "bottom" }, - "textMode": "value" + "tooltip": { + "mode": "multi" + } }, "links": [ { @@ -1878,7 +1846,7 @@ "targetBlank": true } ], - "description": "Atlas Overview mirrors the Atlas Jobs internal dashboard for automation test health. This panel rolls up the shared Ariadne and Metis CI metrics from that internal dashboard." + "description": "Rolling pass rate per platform suite (Ariadne task automation + Metis build/flash pipelines). Each line tracks pass percentage over time for its suite." }, { "id": 47, diff --git a/services/monitoring/dashboards/atlas-power.json b/services/monitoring/dashboards/atlas-power.json index 3313f5ef..1e8df3b8 100644 --- a/services/monitoring/dashboards/atlas-power.json +++ b/services/monitoring/dashboards/atlas-power.json @@ -21,37 +21,37 @@ "targets": [ { "refId": "A", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)", "legendFormat": "Pyrphoros Draw (W)", "instant": true }, { "refId": "B", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Discharge ETA", "instant": true }, { "refId": "C", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Status", "instant": true }, { "refId": "D", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)", "legendFormat": "Statera Draw (W)", "instant": true }, { "refId": "E", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Discharge ETA", "instant": true }, { "refId": "F", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Status", "instant": true } @@ -236,17 +236,17 @@ "targets": [ { "refId": "A", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100)", "legendFormat": "Pyrphoros" }, { "refId": "B", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)", "legendFormat": "Statera" }, { "refId": "C", - "expr": "sum((hecate_ups_load_percent{job=\"hecate-power\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\"}) / 100)", + "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)", "legendFormat": "combined" } ], diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index 1dc455d4..60f261f2 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -1244,6 +1244,44 @@ data: } ], "description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters." + }, + { + "id": 19, + "type": "timeseries", + "title": "Platform Test Success Rate by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 16, + "x": 8, + "y": 17 + }, + "targets": [ + { + "refId": "A", + "expr": "label_replace(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-build\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-flash\", \"__name__\", \".*\")", + "legendFormat": "{{suite}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "description": "Per-suite pass percentage over time. Used by Atlas Overview and kept here for detailed triage." } ], "time": { diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 2115ea78..945c684c 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1096,37 +1096,37 @@ data: "targets": [ { "refId": "A", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)", "legendFormat": "Pyrphoros Draw (W)", "instant": true }, { "refId": "B", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Discharge ETA", "instant": true }, { "refId": "C", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Status", "instant": true }, { "refId": "D", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)", "legendFormat": "Statera Draw (W)", "instant": true }, { "refId": "E", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Discharge ETA", "instant": true }, { "refId": "F", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Status", "instant": true } @@ -1318,17 +1318,17 @@ data: "targets": [ { "refId": "A", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100)", "legendFormat": "Pyrphoros" }, { "refId": "B", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)", "legendFormat": "Statera" }, { "refId": "C", - "expr": "sum((hecate_ups_load_percent{job=\"hecate-power\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\"}) / 100)", + "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)", "legendFormat": "combined" } ], @@ -1814,7 +1814,7 @@ data: }, { "id": 46, - "type": "stat", + "type": "timeseries", "title": "Platform Test Success Rate", "datasource": { "type": "prometheus", @@ -1828,57 +1828,25 @@ data: }, "targets": [ { - "expr": "100 * ((sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_builds_total{status=\"ok\"}[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total{status=\"ok\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(ariadne_task_runs_total[30d])) or on() vector(0)) + (sum(increase(metis_builds_total[30d])) or on() vector(0)) + (sum(increase(metis_flashes_total[30d])) or on() vector(0))), 1)", - "refId": "A" + "refId": "A", + "expr": "label_replace(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)), 1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_builds_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-build\", \"__name__\", \".*\") or label_replace(100 * (sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])) or on() vector(0)) / clamp_min((sum(increase(metis_flashes_total[$__interval])) or on() vector(0)), 1), \"suite\", \"metis-flash\", \"__name__\", \".*\")", + "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 90 - }, - { - "color": "yellow", - "value": 97 - }, - { - "color": "green", - "value": 99 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 + "unit": "percent" }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "legend": { + "displayMode": "list", + "placement": "bottom" }, - "textMode": "value" + "tooltip": { + "mode": "multi" + } }, "links": [ { @@ -1887,7 +1855,7 @@ data: "targetBlank": true } ], - "description": "Atlas Overview mirrors the Atlas Jobs internal dashboard for automation test health. This panel rolls up the shared Ariadne and Metis CI metrics from that internal dashboard." + "description": "Rolling pass rate per platform suite (Ariadne task automation + Metis build/flash pipelines). Each line tracks pass percentage over time for its suite." }, { "id": 47, diff --git a/services/monitoring/grafana-dashboard-power.yaml b/services/monitoring/grafana-dashboard-power.yaml index 96bbb87b..294bd75a 100644 --- a/services/monitoring/grafana-dashboard-power.yaml +++ b/services/monitoring/grafana-dashboard-power.yaml @@ -30,37 +30,37 @@ data: "targets": [ { "refId": "A", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)", "legendFormat": "Pyrphoros Draw (W)", "instant": true }, { "refId": "B", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Discharge ETA", "instant": true }, { "refId": "C", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-db\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", "legendFormat": "Pyrphoros Status", "instant": true }, { "refId": "D", - "expr": "max((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100) or on() vector(0)", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)", "legendFormat": "Statera Draw (W)", "instant": true }, { "refId": "E", - "expr": "max(hecate_ups_runtime_seconds{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Discharge ETA", "instant": true }, { "refId": "F", - "expr": "max(hecate_ups_on_battery{job=\"hecate-power\",instance=\"titan-24\"}) or on() vector(0)", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", "legendFormat": "Statera Status", "instant": true } @@ -245,17 +245,17 @@ data: "targets": [ { "refId": "A", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-db\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-db\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100)", "legendFormat": "Pyrphoros" }, { "refId": "B", - "expr": "((hecate_ups_load_percent{job=\"hecate-power\",instance=\"titan-24\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\",instance=\"titan-24\"}) / 100)", + "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)", "legendFormat": "Statera" }, { "refId": "C", - "expr": "sum((hecate_ups_load_percent{job=\"hecate-power\"} * hecate_ups_power_nominal_watts{job=\"hecate-power\"}) / 100)", + "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)", "legendFormat": "combined" } ], diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index ec836405..48280947 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -224,8 +224,8 @@ spec: target_label: instance replacement: titan-jh - # --- Hecate power telemetry (host-level daemon on UPS hosts) --- - - job_name: "hecate-power" + # --- Ananke power telemetry (host-level daemon on UPS hosts) --- + - job_name: "ananke-power" static_configs: - targets: ["192.168.22.10:9560"] labels: