From 411bc6b90df5880a7379e0299354ec65ceaa6def Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 18 Apr 2026 14:50:59 -0300 Subject: [PATCH] monitoring: elevate Atlas Testing dashboard and no-data fallbacks --- scripts/dashboards_render_atlas.py | 537 +++--- .../monitoring/dashboards/atlas-jobs.json | 1612 +++++++---------- .../monitoring/dashboards/atlas-overview.json | 96 +- .../monitoring/grafana-dashboard-jobs.yaml | 1612 +++++++---------- .../grafana-dashboard-overview.yaml | 96 +- 5 files changed, 1747 insertions(+), 2206 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 42857e88..ea1772e8 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -501,40 +501,65 @@ PLATFORM_TEST_SUITE_NAMES = [ "data-prepper", ] PLATFORM_TEST_SUITE_MATCHER = "|".join(PLATFORM_TEST_SUITE_NAMES) +PLATFORM_TEST_SUCCESS_STATUS = "ok|passed|success" PLATFORM_TEST_SUCCESS_EVENTS_30D = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"ok|passed|success"}}[30d])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[30d])) or on() vector(0))' ) PLATFORM_TEST_TOTAL_EVENTS_30D = ( f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d])) or on() vector(0))' ) +PLATFORM_TEST_SUCCESS_EVENTS_7D = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[7d])) or on() vector(0))' +) +PLATFORM_TEST_TOTAL_EVENTS_7D = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[7d])) or on() vector(0))' +) +PLATFORM_TEST_SUCCESS_EVENTS_24H = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])) or on() vector(0))' +) +PLATFORM_TEST_TOTAL_EVENTS_24H = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h])) or on() vector(0))' +) TEST_SUCCESS_RATE = ( f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_30D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_30D}), 1)" ) +TEST_SUCCESS_RATE_7D = ( + f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_7D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_7D}), 1)" +) +TEST_SUCCESS_RATE_24H = ( + f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_24H}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_24H}), 1)" +) TEST_FAILURES_24H_TOTAL = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"ok|passed|success"}}[24h])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])) or on() vector(0))' ) PLATFORM_TEST_FAILURES_24H_BY_SUITE = ( - f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"ok|passed|success"}}[24h])))' + f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])))' ) PLATFORM_TEST_ACTIVITY_30D = ( f'sum by (suite, status) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d]))' ) +PLATFORM_TEST_RUNS_24H_TOTAL = PLATFORM_TEST_TOTAL_EVENTS_24H +PLATFORM_TEST_ACTIVE_SUITES_24H = ( + f'sum((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h])) > 0)) ' + "or on() vector(0)" +) PLATFORM_TEST_POINT_WINDOW = "1h" PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [ { "refId": chr(ord("A") + index), "expr": ( - f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite="{suite}",status=~"ok|passed|success"}}' + f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite="{suite}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}' f'[{PLATFORM_TEST_POINT_WINDOW}]))) / ' f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1)) ' - f'and on() ((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))) > 0)' + f'and on() ((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))) > 0) ' + "or on() vector(0)" ), "legendFormat": suite, } for index, suite in enumerate(PLATFORM_TEST_SUITE_NAMES) ] PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = ( - f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"ok|passed|success"}}[24h]))) ' + f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h]))) ' f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))), 1)) ' f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))) > 0))' ) @@ -906,6 +931,7 @@ def table_panel( filterable=True, footer=None, format=None, + description=None, ): """Return a Grafana table panel definition.""" # Optional PromQL subquery helpers in expr: share(), etc. @@ -930,6 +956,8 @@ def table_panel( } if transformations: panel["transformations"] = transformations + if description: + panel["description"] = description return panel @@ -1026,6 +1054,25 @@ def namespace_scope_links(var_name): ] +def testing_suite_variable(): + options = [{"text": suite, "value": suite, "selected": False} for suite in PLATFORM_TEST_SUITE_NAMES] + return { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": ",".join(PLATFORM_TEST_SUITE_NAMES), + "current": {"text": "All", "value": "$__all", "selected": True}, + "options": options, + "hide": 0, + "multi": False, + "includeAll": True, + "allValue": PLATFORM_TEST_SUITE_MATCHER, + "refresh": 1, + "sort": 1, + "skipUrlSync": False, + } + + def bargauge_panel( panel_id, title, @@ -1120,8 +1167,27 @@ def text_panel(panel_id, title, content, grid): } +DASHBOARD_LINK_TITLES = { + "atlas-overview": "Open Atlas Overview", + "atlas-pods": "Open Atlas Pods", + "atlas-nodes": "Open Atlas Nodes", + "atlas-storage": "Open Atlas Storage", + "atlas-network": "Open Atlas Network", + "atlas-mail": "Open Atlas Mail", + "atlas-jobs": "Open Atlas Testing", + "atlas-power": "Open Atlas Power", + "atlas-gpu": "Open Atlas GPU", +} + + def link_to(uid): - return [{"title": f"Open {uid} dashboard", "url": f"/d/{uid}", "targetBlank": True}] + return [ + { + "title": DASHBOARD_LINK_TITLES.get(uid, f"Open {uid} dashboard"), + "url": f"/d/{uid}", + "targetBlank": True, + } + ] # --------------------------------------------------------------------------- @@ -1959,7 +2025,13 @@ def build_overview(): }, "time": {"from": "now-1h", "to": "now"}, "refresh": "1m", - "links": [], + "links": [ + { + "title": "Atlas Testing (Internal)", + "url": "/d/atlas-jobs", + "targetBlank": False, + } + ], } @@ -2844,274 +2916,161 @@ def build_mail_dashboard(): def build_jobs_dashboard(): panels = [] - age_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 6}, - {"color": "orange", "value": 24}, - {"color": "red", "value": 48}, - ], - } - recent_error_thresholds = { + suite_var = "${suite:regex}" + success_thresholds = { "mode": "absolute", "steps": [ {"color": "red", "value": None}, - {"color": "orange", "value": 1}, - {"color": "yellow", "value": 6}, - {"color": "green", "value": 24}, + {"color": "orange", "value": 70}, + {"color": "yellow", "value": 85}, + {"color": "green", "value": 95}, ], } - - task_error_thresholds = { + count_thresholds = { "mode": "absolute", "steps": [ {"color": "green", "value": None}, {"color": "yellow", "value": 1}, {"color": "orange", "value": 3}, - {"color": "red", "value": 5}, + {"color": "red", "value": 6}, ], } panels.append( - bargauge_panel( + text_panel( 1, - "Ariadne Task Errors (range)", - ARIADNE_TASK_ERRORS_RANGE, - {"h": 7, "w": 8, "x": 0, "y": 0}, - unit="none", + "Testing Modes", + ( + "### Atlas Testing\n" + "- **Overview mode**: set `Suite=All` to compare every project.\n" + "- **Suite drilldown mode**: choose one `Suite` to inspect run outcomes, coverage drift, and failure mix.\n" + "- Jenkins drilldown: [ananke](https://jenkins.bstein.dev/job/ananke/) · " + "[ariadne](https://jenkins.bstein.dev/job/ariadne/) · " + "[atlasbot](https://jenkins.bstein.dev/job/atlasbot/) · " + "[bstein-dev-home](https://jenkins.bstein.dev/job/bstein-dev-home/) · " + "[lesavka](https://jenkins.bstein.dev/job/lesavka/) · " + "[metis](https://jenkins.bstein.dev/job/metis/) · " + "[pegasus](https://jenkins.bstein.dev/job/pegasus/) · " + "[titan-iac](https://jenkins.bstein.dev/job/titan-iac/) · " + "[typhon](https://jenkins.bstein.dev/job/typhon/)" + ), + {"h": 4, "w": 24, "x": 0, "y": 0}, + ) + ) + + panels.append( + stat_panel( + 2, + "Success Rate (24h)", + TEST_SUCCESS_RATE_24H, + {"h": 6, "w": 4, "x": 0, "y": 4}, + unit="percent", + decimals=2, instant=True, - legend="{{task}}", - thresholds=task_error_thresholds, + thresholds=success_thresholds, ) ) panels.append( - { - "id": 2, - "type": "timeseries", - "title": "Ariadne Attempts / Failures", - "datasource": PROM_DS, - "gridPos": {"h": 7, "w": 8, "x": 8, "y": 0}, - "targets": [ - {"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"}, - {"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"}, - ], - "fieldConfig": { - "defaults": {"unit": "none"}, - "overrides": [ - { - "matcher": {"id": "byName", "options": "Attempts"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}} - ], - }, - { - "matcher": {"id": "byName", "options": "Failures"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}} - ], - }, - ], - }, - "options": { - "legend": {"displayMode": "table", "placement": "right"}, - "tooltip": {"mode": "multi"}, - }, - } - ) - panels.append( - bargauge_panel( + stat_panel( 3, - "One-off Job Pods (age hours)", - ONEOFF_JOB_POD_AGE_HOURS, - {"h": 7, "w": 8, "x": 16, "y": 0}, - unit="h", - instant=True, - legend="{{namespace}}/{{pod}}", - thresholds=age_thresholds, - limit=12, + "Success Rate (7d)", + TEST_SUCCESS_RATE_7D, + {"h": 6, "w": 4, "x": 4, "y": 4}, + unit="percent", decimals=2, + instant=True, + thresholds=success_thresholds, ) ) panels.append( stat_panel( 4, - "Ariadne Schedules Stale (>36h)", - ARIADNE_SCHEDULE_STALE_COUNT, - {"h": 4, "w": 4, "x": 0, "y": 7}, - unit="none", - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 2}, - {"color": "red", "value": 3}, - ], - }, + "Success Rate (30d)", + TEST_SUCCESS_RATE, + {"h": 6, "w": 4, "x": 8, "y": 4}, + unit="percent", + decimals=2, + instant=True, + thresholds=success_thresholds, ) ) panels.append( stat_panel( 5, - "Ariadne Schedules Missing Success", - ARIADNE_SCHEDULE_MISSING_COUNT, - {"h": 4, "w": 4, "x": 4, "y": 7}, + "Failures (24h)", + TEST_FAILURES_24H_TOTAL, + {"h": 6, "w": 4, "x": 12, "y": 4}, unit="none", + instant=True, + thresholds=count_thresholds, ) ) panels.append( stat_panel( 6, - "Ariadne Schedules Failed Last Run", - ARIADNE_SCHEDULE_FAILED_COUNT, - {"h": 4, "w": 4, "x": 8, "y": 7}, + "Runs (24h)", + PLATFORM_TEST_RUNS_24H_TOTAL, + {"h": 6, "w": 4, "x": 16, "y": 4}, unit="none", + instant=True, + thresholds={ + "mode": "absolute", + "steps": [{"color": "red", "value": None}, {"color": "green", "value": 1}], + }, ) ) panels.append( stat_panel( 7, - "Ariadne Task Errors (1h)", - ARIADNE_TASK_ERRORS_1H_TOTAL, - {"h": 4, "w": 4, "x": 12, "y": 7}, + "Suites Active (24h)", + PLATFORM_TEST_ACTIVE_SUITES_24H, + {"h": 6, "w": 4, "x": 20, "y": 4}, unit="none", + decimals=0, + instant=True, + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "yellow", "value": 4}, + {"color": "green", "value": 8}, + ], + }, ) ) + panels.append( - stat_panel( + bargauge_panel( 8, - "Ariadne Task Errors (24h)", - ARIADNE_TASK_ERRORS_24H_TOTAL, - {"h": 4, "w": 4, "x": 16, "y": 7}, - unit="none", + "Suite Scoreboard: Success Rate (24h)", + PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE, + {"h": 8, "w": 12, "x": 0, "y": 10}, + unit="percent", + instant=True, + legend="{{suite}}", + thresholds=success_thresholds, + decimals=2, ) ) panels.append( - stat_panel( + bargauge_panel( 9, - "Ariadne Task Runs (1h)", - ARIADNE_TASK_RUNS_1H_TOTAL, - {"h": 4, "w": 4, "x": 20, "y": 7}, - unit="none", - ) - ) - panels.append( - bargauge_panel( - 10, - "Ariadne Schedule Last Error (hours ago)", - ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS, - {"h": 6, "w": 12, "x": 0, "y": 17}, - unit="h", - instant=True, - legend="{{task}}", - thresholds=recent_error_thresholds, - decimals=2, - ) - ) - panels.append( - bargauge_panel( - 11, - "Ariadne Schedule Last Success (hours ago)", - ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS, - {"h": 6, "w": 12, "x": 12, "y": 17}, - unit="h", - instant=True, - legend="{{task}}", - thresholds=age_thresholds, - decimals=2, - ) - ) - panels.append( - bargauge_panel( - 12, - "Ariadne Fast Schedule Last Success (hours ago)", - ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS, - {"h": 6, "w": 12, "x": 0, "y": 23}, - unit="h", - instant=True, - legend="{{task}}", - thresholds=age_thresholds, - decimals=2, - ) - ) - panels.append( - bargauge_panel( - 13, - "Ariadne Fast Schedule Next Run (hours from now)", - ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS, - {"h": 6, "w": 12, "x": 12, "y": 23}, - unit="h", - instant=True, - legend="{{task}}", - thresholds=age_thresholds, - decimals=2, - ) - ) - panels.append( - bargauge_panel( - 14, - "Ariadne Task Errors (1h)", - ARIADNE_TASK_ERRORS_1H, - {"h": 6, "w": 12, "x": 0, "y": 29}, + "Suite Scoreboard: Failures (24h)", + PLATFORM_TEST_FAILURES_24H_BY_SUITE, + {"h": 8, "w": 12, "x": 12, "y": 10}, unit="none", instant=True, - legend="{{task}}", - thresholds=task_error_thresholds, + legend="{{suite}}", + thresholds=count_thresholds, + decimals=0, ) ) - panels.append( - bargauge_panel( - 15, - "Ariadne Task Errors (30d)", - ARIADNE_TASK_ERRORS_30D, - {"h": 6, "w": 12, "x": 12, "y": 29}, - unit="none", - instant=True, - legend="{{task}}", - thresholds=task_error_thresholds, - ) - ) - panels.append( - bargauge_panel( - 16, - "Ariadne Access Requests", - ARIADNE_ACCESS_REQUESTS, - {"h": 6, "w": 8, "x": 0, "y": 11}, - unit="none", - instant=True, - legend="{{status}}", - ) - ) - coverage_panel = stat_panel( - 17, - "Platform Test Success Rate (30d)", - TEST_SUCCESS_RATE, - {"h": 6, "w": 4, "x": 8, "y": 11}, - unit="percent", - decimals=2, - instant=True, - ) - coverage_panel["description"] = "Internal rollup across Ariadne task runs and Metis build/flash outcomes." - panels.append(coverage_panel) - tests_panel = table_panel( - 18, - "Platform Test Activity (30d)", - PLATFORM_TEST_ACTIVITY_30D, - {"h": 6, "w": 12, "x": 12, "y": 11}, - unit="none", - transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}], - instant=True, - ) - tests_panel["description"] = ( - "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters." - ) - panels.append(tests_panel) + suite_panel = timeseries_panel( - 19, - "Platform Test Success Rate by Suite", + 10, + "Suite Success History (1h points)", None, - {"h": 6, "w": 16, "x": 8, "y": 17}, + {"h": 8, "w": 24, "x": 0, "y": 18}, unit="percent", targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS, legend_display="list", @@ -3123,27 +3082,153 @@ def build_jobs_dashboard(): "drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, - "fillOpacity": 10, + "fillOpacity": 8, "showPoints": "always", - "pointSize": 4, + "pointSize": 3, "spanNulls": True, } - suite_panel["description"] = ( - "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published." - ) panels.append(suite_panel) + panels.append( + table_panel( + 11, + "Suite Activity Matrix (30d)", + PLATFORM_TEST_ACTIVITY_30D, + {"h": 8, "w": 24, "x": 0, "y": 26}, + unit="none", + transformations=[ + {"id": "labelsToFields", "options": {}}, + {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, + ], + instant=True, + description="Totals by suite and status over the last 30 days.", + ) + ) + + panels.append( + timeseries_panel( + 12, + "Selected Suite Run Outcomes", + None, + {"h": 8, "w": 12, "x": 0, "y": 34}, + unit="none", + targets=[ + { + "refId": "A", + "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[$__interval])) or on() vector(0)', + "legendFormat": "Success", + }, + { + "refId": "B", + "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[$__interval])) or on() vector(0)', + "legendFormat": "Failure", + }, + { + "refId": "C", + "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval])) or on() vector(0)', + "legendFormat": "Total", + }, + ], + legend_display="table", + legend_placement="right", + legend_calcs=["lastNotNull", "sum"], + description="Use Suite Drilldown to isolate one project.", + ) + ) + + panels.append( + timeseries_panel( + 13, + "Selected Suite Hygiene & Coverage History", + None, + {"h": 8, "w": 12, "x": 12, "y": 34}, + unit="none", + targets=[ + { + "refId": "A", + "expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{suite=~"{suite_var}"}}[$__interval])', + "legendFormat": "{{suite}} coverage %", + }, + { + "refId": "B", + "expr": f'max_over_time(platform_quality_gate_source_lines_over_500_total{{suite=~"{suite_var}"}}[$__interval])', + "legendFormat": "{{suite}} files >500 LOC", + }, + ], + legend_display="table", + legend_placement="right", + legend_calcs=["lastNotNull", "max"], + description="Coverage and LOC hygiene trend for selected suite(s).", + ) + ) + + panels.append( + table_panel( + 14, + "Selected Suite Failure Mix (30d)", + f'sum by (suite, status) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[30d]))', + {"h": 8, "w": 12, "x": 0, "y": 42}, + unit="none", + transformations=[ + {"id": "labelsToFields", "options": {}}, + {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, + ], + instant=True, + description="Breakdown of non-success outcomes by status over 30 days.", + ) + ) + + panels.append( + table_panel( + 15, + "Selected Suite Latest Test Counters", + f'sum by (suite, result, __name__) ({{__name__=~".*_quality_gate_tests_total",suite=~"{suite_var}"}})', + {"h": 8, "w": 12, "x": 12, "y": 42}, + unit="none", + transformations=[ + {"id": "labelsToFields", "options": {}}, + {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, + ], + instant=True, + description="Latest per-suite counters (passed/failed/error/skipped/total) from each pipeline exporter.", + ) + ) + + panels.append( + table_panel( + 16, + "Selected Suite Check-Level Failures", + ( + f'sum by (suite, check, result, __name__) ' + f'({{__name__=~".*_quality_gate_checks_total",suite=~"{suite_var}",result!~"ok|passed|success"}})' + ), + {"h": 8, "w": 24, "x": 0, "y": 50}, + unit="none", + transformations=[ + {"id": "labelsToFields", "options": {}}, + {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, + ], + instant=True, + description="Per-check failure details for suites that publish quality_gate_checks_total metrics.", + ) + ) + return { "uid": "atlas-jobs", - "title": "Atlas Jobs", + "title": "Atlas Testing", "folderUid": PRIVATE_FOLDER, "editable": True, "panels": panels, - "time": {"from": "now-7d", "to": "now"}, + "time": {"from": "now-30d", "to": "now"}, "annotations": {"list": []}, "schemaVersion": 39, "style": "dark", - "tags": ["atlas", "jobs", "ariadne"], + "tags": ["atlas", "testing", "quality-gate", "ci"], + "templating": { + "list": [ + testing_suite_variable(), + ] + }, } diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index a54789eb..33267af1 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -1,255 +1,44 @@ { "uid": "atlas-jobs", - "title": "Atlas Jobs", + "title": "Atlas Testing", "folderUid": "atlas-internal", "editable": true, "panels": [ { "id": 1, - "type": "bargauge", - "title": "Ariadne Task Errors (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, + "type": "text", + "title": "Testing Modes", "gridPos": { - "h": 7, - "w": 8, + "h": 4, + "w": 24, "x": 0, "y": 0 }, - "targets": [ - { - "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range])))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 3 - }, - { - "color": "red", - "value": 5 - } - ] - } - }, - "overrides": [] - }, + "datasource": null, "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 2, - "type": "timeseries", - "title": "Ariadne Attempts / Failures", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "expr": "sum(increase(ariadne_task_runs_total[$__interval]))", - "refId": "A", - "legendFormat": "Attempts" - }, - { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))", - "refId": "B", - "legendFormat": "Failures" - } - ], - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Attempts" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Failures" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] - }, - "options": { - "legend": { - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "multi" - } + "mode": "markdown", + "content": "### Atlas Testing\n- **Overview mode**: set `Suite=All` to compare every project.\n- **Suite drilldown mode**: choose one `Suite` to inspect run outcomes, coverage drift, and failure mix.\n- Jenkins drilldown: [ananke](https://jenkins.bstein.dev/job/ananke/) \u00b7 [ariadne](https://jenkins.bstein.dev/job/ariadne/) \u00b7 [atlasbot](https://jenkins.bstein.dev/job/atlasbot/) \u00b7 [bstein-dev-home](https://jenkins.bstein.dev/job/bstein-dev-home/) \u00b7 [lesavka](https://jenkins.bstein.dev/job/lesavka/) \u00b7 [metis](https://jenkins.bstein.dev/job/metis/) \u00b7 [pegasus](https://jenkins.bstein.dev/job/pegasus/) \u00b7 [titan-iac](https://jenkins.bstein.dev/job/titan-iac/) \u00b7 [typhon](https://jenkins.bstein.dev/job/typhon/)" } }, { - "id": 3, - "type": "bargauge", - "title": "One-off Job Pods (age hours)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 0 - }, - "targets": [ - { - "expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))", - "refId": "A", - "legendFormat": "{{namespace}}/{{pod}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - }, - { - "id": "limit", - "options": { - "limit": 12 - } - } - ] - }, - { - "id": 4, + "id": 2, "type": "stat", - "title": "Ariadne Schedules Stale (>36h)", + "title": "Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, "x": 0, - "y": 7 + "y": 4 }, "targets": [ { - "expr": "sum((((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"})) > bool 129600)) or on() vector(0)", - "refId": "A" + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h])) or on() vector(0))), 1)", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -262,27 +51,168 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, - { - "color": "yellow", - "value": 1 - }, { "color": "orange", - "value": 2 + "value": 70 }, { - "color": "red", - "value": 3 + "color": "yellow", + "value": 85 + }, + { + "color": "green", + "value": 95 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Success Rate (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 4 + }, + "targets": [ + { + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[7d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[7d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "yellow", + "value": 85 + }, + { + "color": "green", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Success Rate (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 4 + }, + "targets": [ + { + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "yellow", + "value": 85 + }, + { + "color": "green", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 }, "overrides": [] }, @@ -303,21 +233,22 @@ { "id": 5, "type": "stat", - "title": "Ariadne Schedules Missing Success", + "title": "Failures (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, - "x": 4, - "y": 7 + "x": 12, + "y": 4 }, "targets": [ { - "expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"})) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -330,12 +261,20 @@ "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 6 } ] }, @@ -363,21 +302,22 @@ { "id": 6, "type": "stat", - "title": "Ariadne Schedules Failed Last Run", + "title": "Runs (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, - "x": 8, - "y": 7 + "x": 16, + "y": 4 }, "targets": [ { - "expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}) > bool 0)) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -390,7 +330,7 @@ "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, { @@ -423,21 +363,22 @@ { "id": 7, "type": "stat", - "title": "Ariadne Task Errors (1h)", + "title": "Suites Active (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, - "x": 12, - "y": 7 + "x": 20, + "y": 4 }, "targets": [ { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))", - "refId": "A" + "expr": "sum((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h])) > 0)) or on() vector(0)", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -450,19 +391,24 @@ "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, + { + "color": "yellow", + "value": 4 + }, { "color": "green", - "value": 1 + "value": 8 } ] }, "unit": "none", "custom": { "displayMode": "auto" - } + }, + "decimals": 0 }, "overrides": [] }, @@ -482,151 +428,31 @@ }, { "id": 8, - "type": "stat", - "title": "Ariadne Task Errors (24h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 7 - }, - "targets": [ - { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[24h]))", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 9, - "type": "stat", - "title": "Ariadne Task Runs (1h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 7 - }, - "targets": [ - { - "expr": "sum(increase(ariadne_task_runs_total[1h]))", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 10, "type": "bargauge", - "title": "Ariadne Schedule Last Error (hours ago)", + "title": "Suite Scoreboard: Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 17 + "y": 10 }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}[$__range])) / 3600)", + "expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))", "refId": "A", - "legendFormat": "{{task}}", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "unit": "percent", "min": 0, - "max": null, + "max": 100, "thresholds": { "mode": "absolute", "steps": [ @@ -634,468 +460,21 @@ "color": "red", "value": null }, - { - "color": "orange", - "value": 1 - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "green", - "value": 24 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 11, - "type": "bargauge", - "title": "Ariadne Schedule Last Success (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 17 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 12, - "type": "bargauge", - "title": "Ariadne Fast Schedule Last Success (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 23 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 13, - "type": "bargauge", - "title": "Ariadne Fast Schedule Next Run (hours from now)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 23 - }, - "targets": [ - { - "expr": "sort_desc(((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"} - time()) / 3600))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 14, - "type": "bargauge", - "title": "Ariadne Task Errors (1h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 29 - }, - "targets": [ - { - "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h])))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 3 - }, - { - "color": "red", - "value": 5 - } - ] - } - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 15, - "type": "bargauge", - "title": "Ariadne Task Errors (30d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 29 - }, - "targets": [ - { - "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d])))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 3 - }, - { - "color": "red", - "value": 5 - } - ] - } - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 16, - "type": "bargauge", - "title": "Ariadne Access Requests", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 11 - }, - "targets": [ - { - "expr": "sort_desc(ariadne_access_requests_total)", - "refId": "A", - "legendFormat": "{{status}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 50 - }, { "color": "orange", "value": 70 }, { - "color": "red", + "color": "yellow", "value": 85 + }, + { + "color": "green", + "value": 95 } ] - } + }, + "decimals": 2 }, "overrides": [] }, @@ -1123,81 +502,191 @@ ] }, { - "id": 17, - "type": "stat", - "title": "Platform Test Success Rate (30d)", + "id": 9, + "type": "bargauge", + "title": "Suite Scoreboard: Failures (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 4, - "x": 8, - "y": 11 + "h": 8, + "w": 12, + "x": 12, + "y": 10 }, "targets": [ { - "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)", + "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))", "refId": "A", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "none", + "min": 0, + "max": null, "thresholds": { "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 6 } ] }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 + "decimals": 0 }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", + "displayMode": "gradient", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false - }, - "textMode": "value" + } }, - "description": "Internal rollup across Ariadne task runs and Metis build/flash outcomes." + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] }, { - "id": 18, - "type": "table", - "title": "Platform Test Activity (30d)", + "id": 10, + "type": "timeseries", + "title": "Suite Success History (1h points)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 11 + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "targets": [ + { + "refId": "A", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "ariadne" + }, + { + "refId": "B", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "metis" + }, + { + "refId": "C", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "ananke" + }, + { + "refId": "D", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "atlasbot" + }, + { + "refId": "E", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "lesavka" + }, + { + "refId": "F", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "pegasus" + }, + { + "refId": "G", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "soteria" + }, + { + "refId": "H", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "titan-iac" + }, + { + "refId": "I", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "bstein-home" + }, + { + "refId": "J", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "arcanagon" + }, + { + "refId": "K", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "data-prepper" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "custom": { + "drawStyle": "line", + "lineInterpolation": "linear", + "lineWidth": 2, + "fillOpacity": 8, + "showPoints": "always", + "pointSize": 3, + "spanNulls": true + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 11, + "type": "table", + "title": "Suite Activity Matrix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 }, "targets": [ { @@ -1234,110 +723,263 @@ } } ], - "description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters." + "description": "Totals by suite and status over the last 30 days." }, { - "id": 19, + "id": 12, "type": "timeseries", - "title": "Platform Test Success Rate by Suite", + "title": "Selected Suite Run Outcomes", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 16, - "x": 8, - "y": 17 + "h": 8, + "w": 12, + "x": 0, + "y": 34 }, "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)", - "legendFormat": "ariadne" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",status=~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Success" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)", - "legendFormat": "metis" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",status!~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Failure" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)", - "legendFormat": "ananke" - }, - { - "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)", - "legendFormat": "atlasbot" - }, - { - "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)", - "legendFormat": "lesavka" - }, - { - "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)", - "legendFormat": "pegasus" - }, - { - "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)", - "legendFormat": "soteria" - }, - { - "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)", - "legendFormat": "titan-iac" - }, - { - "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)", - "legendFormat": "bstein-home" - }, - { - "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)", - "legendFormat": "arcanagon" - }, - { - "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)", - "legendFormat": "data-prepper" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\"}[$__interval])) or on() vector(0)", + "legendFormat": "Total" } ], "fieldConfig": { "defaults": { - "unit": "percent", - "min": 0, - "max": 100, - "custom": { - "drawStyle": "line", - "lineInterpolation": "linear", - "lineWidth": 2, - "fillOpacity": 10, - "showPoints": "always", - "pointSize": 4, - "spanNulls": true - } + "unit": "none" }, "overrides": [] }, "options": { "legend": { - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "right", + "calcs": [ + "lastNotNull", + "sum" + ] }, "tooltip": { "mode": "multi" } }, - "description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published." + "description": "Use Suite Drilldown to isolate one project." + }, + { + "id": 13, + "type": "timeseries", + "title": "Selected Suite Hygiene & Coverage History", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "targets": [ + { + "refId": "A", + "expr": "max_over_time(platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite:regex}\"}[$__interval])", + "legendFormat": "{{suite}} coverage %" + }, + { + "refId": "B", + "expr": "max_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"${suite:regex}\"}[$__interval])", + "legendFormat": "{{suite}} files >500 LOC" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": [ + "lastNotNull", + "max" + ] + }, + "tooltip": { + "mode": "multi" + } + }, + "description": "Coverage and LOC hygiene trend for selected suite(s)." + }, + { + "id": 14, + "type": "table", + "title": "Selected Suite Failure Mix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "targets": [ + { + "expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",status!~\"ok|passed|success\"}[30d]))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Breakdown of non-success outcomes by status over 30 days." + }, + { + "id": 15, + "type": "table", + "title": "Selected Suite Latest Test Counters", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "targets": [ + { + "expr": "sum by (suite, result, __name__) ({__name__=~\".*_quality_gate_tests_total\",suite=~\"${suite:regex}\"})", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Latest per-suite counters (passed/failed/error/skipped/total) from each pipeline exporter." + }, + { + "id": 16, + "type": "table", + "title": "Selected Suite Check-Level Failures", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 50 + }, + "targets": [ + { + "expr": "sum by (suite, check, result, __name__) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",result!~\"ok|passed|success\"})", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Per-check failure details for suites that publish quality_gate_checks_total metrics." } ], "time": { - "from": "now-7d", + "from": "now-30d", "to": "now" }, "annotations": { @@ -1347,7 +989,87 @@ "style": "dark", "tags": [ "atlas", - "jobs", - "ariadne" - ] + "testing", + "quality-gate", + "ci" + ], + "templating": { + "list": [ + { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": "ariadne,metis,ananke,atlasbot,lesavka,pegasus,soteria,titan-iac,bstein-home,arcanagon,data-prepper", + "current": { + "text": "All", + "value": "$__all", + "selected": true + }, + "options": [ + { + "text": "ariadne", + "value": "ariadne", + "selected": false + }, + { + "text": "metis", + "value": "metis", + "selected": false + }, + { + "text": "ananke", + "value": "ananke", + "selected": false + }, + { + "text": "atlasbot", + "value": "atlasbot", + "selected": false + }, + { + "text": "lesavka", + "value": "lesavka", + "selected": false + }, + { + "text": "pegasus", + "value": "pegasus", + "selected": false + }, + { + "text": "soteria", + "value": "soteria", + "selected": false + }, + { + "text": "titan-iac", + "value": "titan-iac", + "selected": false + }, + { + "text": "bstein-home", + "value": "bstein-home", + "selected": false + }, + { + "text": "arcanagon", + "value": "arcanagon", + "selected": false + }, + { + "text": "data-prepper", + "value": "data-prepper", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": true, + "allValue": "ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper", + "refresh": 1, + "sort": 1, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index cc24c27a..0992bdcc 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -129,7 +129,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -204,7 +204,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -352,7 +352,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -427,7 +427,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -565,7 +565,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -642,7 +642,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -711,7 +711,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -780,7 +780,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -855,7 +855,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -930,7 +930,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -997,7 +997,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1064,7 +1064,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1287,7 +1287,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1342,7 +1342,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1481,7 +1481,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1578,7 +1578,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1673,7 +1673,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1733,7 +1733,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1804,7 +1804,7 @@ }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -1901,7 +1901,7 @@ }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -1924,57 +1924,57 @@ "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ananke" }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "lesavka" }, { "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "pegasus" }, { "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "soteria" }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "titan-iac" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "bstein-home" }, { "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "arcanagon" }, { "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "data-prepper" } ], @@ -2009,7 +2009,7 @@ }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -2092,7 +2092,7 @@ ], "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -2160,7 +2160,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2265,7 +2265,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2341,7 +2341,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2417,7 +2417,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2797,7 +2797,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -2844,7 +2844,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -3094,7 +3094,7 @@ }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3138,7 +3138,7 @@ }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3182,7 +3182,7 @@ }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3230,7 +3230,7 @@ "timeFrom": "30d", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3278,7 +3278,7 @@ "timeFrom": "1w", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3402,5 +3402,11 @@ "to": "now" }, "refresh": "1m", - "links": [] + "links": [ + { + "title": "Atlas Testing (Internal)", + "url": "/d/atlas-jobs", + "targetBlank": false + } + ] } diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index dd2b019b..127c068a 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -9,256 +9,45 @@ data: atlas-jobs.json: | { "uid": "atlas-jobs", - "title": "Atlas Jobs", + "title": "Atlas Testing", "folderUid": "atlas-internal", "editable": true, "panels": [ { "id": 1, - "type": "bargauge", - "title": "Ariadne Task Errors (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, + "type": "text", + "title": "Testing Modes", "gridPos": { - "h": 7, - "w": 8, + "h": 4, + "w": 24, "x": 0, "y": 0 }, - "targets": [ - { - "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range])))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 3 - }, - { - "color": "red", - "value": 5 - } - ] - } - }, - "overrides": [] - }, + "datasource": null, "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 2, - "type": "timeseries", - "title": "Ariadne Attempts / Failures", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 0 - }, - "targets": [ - { - "expr": "sum(increase(ariadne_task_runs_total[$__interval]))", - "refId": "A", - "legendFormat": "Attempts" - }, - { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))", - "refId": "B", - "legendFormat": "Failures" - } - ], - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Attempts" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Failures" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] - }, - "options": { - "legend": { - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "multi" - } + "mode": "markdown", + "content": "### Atlas Testing\n- **Overview mode**: set `Suite=All` to compare every project.\n- **Suite drilldown mode**: choose one `Suite` to inspect run outcomes, coverage drift, and failure mix.\n- Jenkins drilldown: [ananke](https://jenkins.bstein.dev/job/ananke/) \u00b7 [ariadne](https://jenkins.bstein.dev/job/ariadne/) \u00b7 [atlasbot](https://jenkins.bstein.dev/job/atlasbot/) \u00b7 [bstein-dev-home](https://jenkins.bstein.dev/job/bstein-dev-home/) \u00b7 [lesavka](https://jenkins.bstein.dev/job/lesavka/) \u00b7 [metis](https://jenkins.bstein.dev/job/metis/) \u00b7 [pegasus](https://jenkins.bstein.dev/job/pegasus/) \u00b7 [titan-iac](https://jenkins.bstein.dev/job/titan-iac/) \u00b7 [typhon](https://jenkins.bstein.dev/job/typhon/)" } }, { - "id": 3, - "type": "bargauge", - "title": "One-off Job Pods (age hours)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 0 - }, - "targets": [ - { - "expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))", - "refId": "A", - "legendFormat": "{{namespace}}/{{pod}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - }, - { - "id": "limit", - "options": { - "limit": 12 - } - } - ] - }, - { - "id": 4, + "id": 2, "type": "stat", - "title": "Ariadne Schedules Stale (>36h)", + "title": "Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, "x": 0, - "y": 7 + "y": 4 }, "targets": [ { - "expr": "sum((((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"})) > bool 129600)) or on() vector(0)", - "refId": "A" + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h])) or on() vector(0))), 1)", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -271,27 +60,168 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, - { - "color": "yellow", - "value": 1 - }, { "color": "orange", - "value": 2 + "value": 70 }, { - "color": "red", - "value": 3 + "color": "yellow", + "value": 85 + }, + { + "color": "green", + "value": 95 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Success Rate (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 4 + }, + "targets": [ + { + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[7d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[7d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "yellow", + "value": 85 + }, + { + "color": "green", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Success Rate (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 4 + }, + "targets": [ + { + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "yellow", + "value": 85 + }, + { + "color": "green", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 }, "overrides": [] }, @@ -312,21 +242,22 @@ data: { "id": 5, "type": "stat", - "title": "Ariadne Schedules Missing Success", + "title": "Failures (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, - "x": 4, - "y": 7 + "x": 12, + "y": 4 }, "targets": [ { - "expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"})) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -339,12 +270,20 @@ data: "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 6 } ] }, @@ -372,21 +311,22 @@ data: { "id": 6, "type": "stat", - "title": "Ariadne Schedules Failed Last Run", + "title": "Runs (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, - "x": 8, - "y": 7 + "x": 16, + "y": 4 }, "targets": [ { - "expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}) > bool 0)) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -399,7 +339,7 @@ data: "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, { @@ -432,21 +372,22 @@ data: { "id": 7, "type": "stat", - "title": "Ariadne Task Errors (1h)", + "title": "Suites Active (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 6, "w": 4, - "x": 12, - "y": 7 + "x": 20, + "y": 4 }, "targets": [ { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))", - "refId": "A" + "expr": "sum((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h])) > 0)) or on() vector(0)", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -459,19 +400,24 @@ data: "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, + { + "color": "yellow", + "value": 4 + }, { "color": "green", - "value": 1 + "value": 8 } ] }, "unit": "none", "custom": { "displayMode": "auto" - } + }, + "decimals": 0 }, "overrides": [] }, @@ -491,151 +437,31 @@ data: }, { "id": 8, - "type": "stat", - "title": "Ariadne Task Errors (24h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 7 - }, - "targets": [ - { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[24h]))", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 9, - "type": "stat", - "title": "Ariadne Task Runs (1h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 7 - }, - "targets": [ - { - "expr": "sum(increase(ariadne_task_runs_total[1h]))", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 10, "type": "bargauge", - "title": "Ariadne Schedule Last Error (hours ago)", + "title": "Suite Scoreboard: Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 17 + "y": 10 }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}[$__range])) / 3600)", + "expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))", "refId": "A", - "legendFormat": "{{task}}", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "unit": "percent", "min": 0, - "max": null, + "max": 100, "thresholds": { "mode": "absolute", "steps": [ @@ -643,468 +469,21 @@ data: "color": "red", "value": null }, - { - "color": "orange", - "value": 1 - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "green", - "value": 24 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 11, - "type": "bargauge", - "title": "Ariadne Schedule Last Success (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 17 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 12, - "type": "bargauge", - "title": "Ariadne Fast Schedule Last Success (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 23 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"}[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 13, - "type": "bargauge", - "title": "Ariadne Fast Schedule Next Run (hours from now)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 23 - }, - "targets": [ - { - "expr": "sort_desc(((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room|schedule.pod_cleaner|schedule.opensearch_prune|schedule.image_sweeper|schedule.metis_k3s_token_sync|schedule.platform_quality_suite_probe)$\"} - time()) / 3600))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 14, - "type": "bargauge", - "title": "Ariadne Task Errors (1h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 29 - }, - "targets": [ - { - "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h])))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 3 - }, - { - "color": "red", - "value": 5 - } - ] - } - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 15, - "type": "bargauge", - "title": "Ariadne Task Errors (30d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 29 - }, - "targets": [ - { - "expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d])))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 3 - }, - { - "color": "red", - "value": 5 - } - ] - } - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 16, - "type": "bargauge", - "title": "Ariadne Access Requests", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 11 - }, - "targets": [ - { - "expr": "sort_desc(ariadne_access_requests_total)", - "refId": "A", - "legendFormat": "{{status}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 50 - }, { "color": "orange", "value": 70 }, { - "color": "red", + "color": "yellow", "value": 85 + }, + { + "color": "green", + "value": 95 } ] - } + }, + "decimals": 2 }, "overrides": [] }, @@ -1132,81 +511,191 @@ data: ] }, { - "id": 17, - "type": "stat", - "title": "Platform Test Success Rate (30d)", + "id": 9, + "type": "bargauge", + "title": "Suite Scoreboard: Failures (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 4, - "x": 8, - "y": 11 + "h": 8, + "w": 12, + "x": 12, + "y": 10 }, "targets": [ { - "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)", + "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))", "refId": "A", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "none", + "min": 0, + "max": null, "thresholds": { "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 6 } ] }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 + "decimals": 0 }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", + "displayMode": "gradient", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false - }, - "textMode": "value" + } }, - "description": "Internal rollup across Ariadne task runs and Metis build/flash outcomes." + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] }, { - "id": 18, - "type": "table", - "title": "Platform Test Activity (30d)", + "id": 10, + "type": "timeseries", + "title": "Suite Success History (1h points)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 11 + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "targets": [ + { + "refId": "A", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "ariadne" + }, + { + "refId": "B", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "metis" + }, + { + "refId": "C", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "ananke" + }, + { + "refId": "D", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "atlasbot" + }, + { + "refId": "E", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "lesavka" + }, + { + "refId": "F", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "pegasus" + }, + { + "refId": "G", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "soteria" + }, + { + "refId": "H", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "titan-iac" + }, + { + "refId": "I", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "bstein-home" + }, + { + "refId": "J", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "arcanagon" + }, + { + "refId": "K", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", + "legendFormat": "data-prepper" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "custom": { + "drawStyle": "line", + "lineInterpolation": "linear", + "lineWidth": 2, + "fillOpacity": 8, + "showPoints": "always", + "pointSize": 3, + "spanNulls": true + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 11, + "type": "table", + "title": "Suite Activity Matrix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 }, "targets": [ { @@ -1243,110 +732,263 @@ data: } } ], - "description": "Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters." + "description": "Totals by suite and status over the last 30 days." }, { - "id": 19, + "id": 12, "type": "timeseries", - "title": "Platform Test Success Rate by Suite", + "title": "Selected Suite Run Outcomes", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 16, - "x": 8, - "y": 17 + "h": 8, + "w": 12, + "x": 0, + "y": 34 }, "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)", - "legendFormat": "ariadne" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",status=~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Success" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)", - "legendFormat": "metis" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",status!~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Failure" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)", - "legendFormat": "ananke" - }, - { - "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)", - "legendFormat": "atlasbot" - }, - { - "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)", - "legendFormat": "lesavka" - }, - { - "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)", - "legendFormat": "pegasus" - }, - { - "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)", - "legendFormat": "soteria" - }, - { - "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)", - "legendFormat": "titan-iac" - }, - { - "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)", - "legendFormat": "bstein-home" - }, - { - "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)", - "legendFormat": "arcanagon" - }, - { - "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)", - "legendFormat": "data-prepper" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\"}[$__interval])) or on() vector(0)", + "legendFormat": "Total" } ], "fieldConfig": { "defaults": { - "unit": "percent", - "min": 0, - "max": 100, - "custom": { - "drawStyle": "line", - "lineInterpolation": "linear", - "lineWidth": 2, - "fillOpacity": 10, - "showPoints": "always", - "pointSize": 4, - "spanNulls": true - } + "unit": "none" }, "overrides": [] }, "options": { "legend": { - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "right", + "calcs": [ + "lastNotNull", + "sum" + ] }, "tooltip": { "mode": "multi" } }, - "description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published." + "description": "Use Suite Drilldown to isolate one project." + }, + { + "id": 13, + "type": "timeseries", + "title": "Selected Suite Hygiene & Coverage History", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "targets": [ + { + "refId": "A", + "expr": "max_over_time(platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite:regex}\"}[$__interval])", + "legendFormat": "{{suite}} coverage %" + }, + { + "refId": "B", + "expr": "max_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"${suite:regex}\"}[$__interval])", + "legendFormat": "{{suite}} files >500 LOC" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": [ + "lastNotNull", + "max" + ] + }, + "tooltip": { + "mode": "multi" + } + }, + "description": "Coverage and LOC hygiene trend for selected suite(s)." + }, + { + "id": 14, + "type": "table", + "title": "Selected Suite Failure Mix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "targets": [ + { + "expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",status!~\"ok|passed|success\"}[30d]))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Breakdown of non-success outcomes by status over 30 days." + }, + { + "id": 15, + "type": "table", + "title": "Selected Suite Latest Test Counters", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "targets": [ + { + "expr": "sum by (suite, result, __name__) ({__name__=~\".*_quality_gate_tests_total\",suite=~\"${suite:regex}\"})", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Latest per-suite counters (passed/failed/error/skipped/total) from each pipeline exporter." + }, + { + "id": 16, + "type": "table", + "title": "Selected Suite Check-Level Failures", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 50 + }, + "targets": [ + { + "expr": "sum by (suite, check, result, __name__) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",result!~\"ok|passed|success\"})", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Per-check failure details for suites that publish quality_gate_checks_total metrics." } ], "time": { - "from": "now-7d", + "from": "now-30d", "to": "now" }, "annotations": { @@ -1356,7 +998,87 @@ data: "style": "dark", "tags": [ "atlas", - "jobs", - "ariadne" - ] + "testing", + "quality-gate", + "ci" + ], + "templating": { + "list": [ + { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": "ariadne,metis,ananke,atlasbot,lesavka,pegasus,soteria,titan-iac,bstein-home,arcanagon,data-prepper", + "current": { + "text": "All", + "value": "$__all", + "selected": true + }, + "options": [ + { + "text": "ariadne", + "value": "ariadne", + "selected": false + }, + { + "text": "metis", + "value": "metis", + "selected": false + }, + { + "text": "ananke", + "value": "ananke", + "selected": false + }, + { + "text": "atlasbot", + "value": "atlasbot", + "selected": false + }, + { + "text": "lesavka", + "value": "lesavka", + "selected": false + }, + { + "text": "pegasus", + "value": "pegasus", + "selected": false + }, + { + "text": "soteria", + "value": "soteria", + "selected": false + }, + { + "text": "titan-iac", + "value": "titan-iac", + "selected": false + }, + { + "text": "bstein-home", + "value": "bstein-home", + "selected": false + }, + { + "text": "arcanagon", + "value": "arcanagon", + "selected": false + }, + { + "text": "data-prepper", + "value": "data-prepper", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": true, + "allValue": "ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper", + "refresh": 1, + "sort": 1, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index bfa84784..44bcbad4 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -138,7 +138,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -213,7 +213,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -361,7 +361,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -436,7 +436,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -574,7 +574,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -651,7 +651,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -720,7 +720,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -789,7 +789,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -864,7 +864,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -939,7 +939,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1006,7 +1006,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1073,7 +1073,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1296,7 +1296,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1351,7 +1351,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1490,7 +1490,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1587,7 +1587,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1682,7 +1682,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1742,7 +1742,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1813,7 +1813,7 @@ data: }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -1910,7 +1910,7 @@ data: }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -1933,57 +1933,57 @@ data: "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ananke" }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "lesavka" }, { "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "pegasus" }, { "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "soteria" }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "titan-iac" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "bstein-home" }, { "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "arcanagon" }, { "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "data-prepper" } ], @@ -2018,7 +2018,7 @@ data: }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -2101,7 +2101,7 @@ data: ], "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -2169,7 +2169,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2274,7 +2274,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2350,7 +2350,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2426,7 +2426,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2806,7 +2806,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -2853,7 +2853,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -3103,7 +3103,7 @@ data: }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3147,7 +3147,7 @@ data: }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3191,7 +3191,7 @@ data: }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3239,7 +3239,7 @@ data: "timeFrom": "30d", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3287,7 +3287,7 @@ data: "timeFrom": "1w", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3411,5 +3411,11 @@ data: "to": "now" }, "refresh": "1m", - "links": [] + "links": [ + { + "title": "Atlas Testing (Internal)", + "url": "/d/atlas-jobs", + "targetBlank": false + } + ] }