From 9a20f4f8544d07876741f285675cb96d620d2376 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 18 Apr 2026 17:47:06 -0300 Subject: [PATCH] monitoring(testing): redesign atlas testing dashboard and unify suite aliases --- scripts/dashboards_render_atlas.py | 458 ++++++--- .../monitoring/dashboards/atlas-overview.json | 22 +- .../monitoring/dashboards/atlas-testing.json | 969 ++++++++++++++---- .../grafana-dashboard-overview.yaml | 22 +- .../monitoring/grafana-dashboard-testing.yaml | 969 ++++++++++++++---- 5 files changed, 1909 insertions(+), 531 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 79807dd7..8853f903 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -535,9 +535,28 @@ PLATFORM_TEST_SUITE_NAMES = [ "arcanagon", "data-prepper", ] -PLATFORM_TEST_SUITE_MATCHER = "|".join(PLATFORM_TEST_SUITE_NAMES) +PLATFORM_TEST_SUCCESS_STATUS = "ok|passed|success" +PLATFORM_TEST_SUITE_VALUE_BY_NAME = { + "ariadne": "ariadne", + "metis": "metis", + "ananke": "ananke", + "atlasbot": "atlasbot", + "lesavka": "lesavka", + "pegasus": "pegasus|pegasus-health|pegasus_health", + "soteria": "soteria", + "titan-iac": "titan-iac|titan_iac", + "bstein-home": "bstein-home|bstein_home", + "arcanagon": "arcanagon", + "data-prepper": "data-prepper|data_prepper", +} +PLATFORM_TEST_SUITE_MATCHER = "|".join( + PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite) for suite in PLATFORM_TEST_SUITE_NAMES +) +PLATFORM_TEST_SUITE_VARIABLE_ALL_MATCHER = "|".join( + PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite) for suite in PLATFORM_TEST_SUITE_NAMES +) PLATFORM_TEST_SUCCESS_EVENTS_30D = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"ok|passed|success"}}[30d])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[30d])) or on() vector(0))' ) PLATFORM_TEST_TOTAL_EVENTS_30D = ( f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d])) or on() vector(0))' @@ -546,10 +565,10 @@ TEST_SUCCESS_RATE = ( f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_30D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_30D}), 1)" ) TEST_FAILURES_24H_TOTAL = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"ok|passed|success"}}[24h])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])) or on() vector(0))' ) PLATFORM_TEST_FAILURES_24H_BY_SUITE = ( - f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"ok|passed|success"}}[24h])))' + f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])))' ) PLATFORM_TEST_ACTIVITY_30D = ( f'sum by (suite, status) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d]))' @@ -559,10 +578,10 @@ PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [ { "refId": chr(ord("A") + index), "expr": ( - f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite="{suite}",status=~"ok|passed|success"}}' + f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}' f'[{PLATFORM_TEST_POINT_WINDOW}]))) / ' - f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1)) ' - f'and on() ((sum(increase(platform_quality_gate_runs_total{{suite="{suite}"}}[{PLATFORM_TEST_POINT_WINDOW}]))) > 0) ' + f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}"}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1)) ' + f'and on() ((sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}"}}[{PLATFORM_TEST_POINT_WINDOW}]))) > 0) ' "or on() vector(0)" ), "legendFormat": suite, @@ -570,7 +589,7 @@ PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [ for index, suite in enumerate(PLATFORM_TEST_SUITE_NAMES) ] PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = ( - f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"ok|passed|success"}}[24h]))) ' + f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h]))) ' f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))), 1)) ' f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))) > 0))' ) @@ -1224,7 +1243,7 @@ def table_panel( return panel -def pie_panel(panel_id, title, expr, grid, *, links=None, description=None): +def pie_panel(panel_id, title, expr, grid, *, links=None, description=None, legend="{{namespace}}", unit="percent", instant=False): """Return a pie chart panel with readable namespace labels.""" panel = { "id": panel_id, @@ -1232,10 +1251,10 @@ def pie_panel(panel_id, title, expr, grid, *, links=None, description=None): "title": title, "datasource": PROM_DS, "gridPos": grid, - "targets": [{"expr": expr, "refId": "A", "legendFormat": "{{namespace}}"}], + "targets": [{"expr": expr, "refId": "A", "legendFormat": legend, **({"instant": True} if instant else {})}], "fieldConfig": { "defaults": { - "unit": "percent", + "unit": unit, "color": {"mode": "palette-classic"}, }, "overrides": [], @@ -1295,6 +1314,36 @@ def namespace_scope_variable(var_name, label): } +def testing_suite_variable(): + options = [ + { + "text": suite, + "value": PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite), + "selected": False, + } + for suite in PLATFORM_TEST_SUITE_NAMES + ] + query = ",".join( + f"{suite} : {PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}" + for suite in PLATFORM_TEST_SUITE_NAMES + ) + return { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": query, + "current": {"text": "All", "value": "$__all", "selected": True}, + "options": options, + "hide": 0, + "multi": False, + "includeAll": True, + "allValue": PLATFORM_TEST_SUITE_VARIABLE_ALL_MATCHER, + "refresh": 1, + "sort": 1, + "skipUrlSync": False, + } + + def namespace_scope_links(var_name): def with_value(value): encoded = urllib.parse.quote(value, safe="") @@ -4014,6 +4063,55 @@ def build_jobs_dashboard(): def build_testing_dashboard(): panels = [] + suite_var = "${suite}" + success = PLATFORM_TEST_SUCCESS_STATUS + runs_24h = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[24h])) or on() vector(0))' + ) + runs_30d = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[30d])) or on() vector(0))' + ) + success_24h = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[24h])) or on() vector(0))' + ) + success_30d = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[30d])) or on() vector(0))' + ) + failures_24h = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{success}"}}[24h])) or on() vector(0))' + ) + success_rate_24h = f"100 * ({success_24h}) / clamp_min(({runs_24h}), 1)" + success_rate_30d = f"100 * ({success_30d}) / clamp_min(({runs_30d}), 1)" + suite_index_30d = ( + f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[30d]))' + ) + coverage_by_suite = ( + f'(max by (suite) ({{__name__=~".*_quality_gate_coverage_percent",suite=~"{suite_var}"}})) ' + f'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{suite=~"{suite_var}"}}))' + ) + coverage_with_missing = ( + f"({coverage_by_suite}) or on(suite) (0 * ({suite_index_30d}) - 1)" + ) + coverage_gap = f"clamp_min(95 - ({coverage_by_suite}), 0)" + smell_by_suite = ( + f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{suite=~"{suite_var}"}})' + ) + smell_with_missing = ( + f"({smell_by_suite}) or on(suite) (0 * ({suite_index_30d}) - 1)" + ) + average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))" + suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))' + success_rate_by_suite_24h = ( + f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[24h]))) ' + f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[24h]))), 1)) ' + f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[24h]))) > 0))' + ) + success_history_by_suite = ( + f'(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[$__interval]))) ' + f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval]))), 1)) ' + f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval]))) > 0))' + ) + pass_rate_thresholds = { "mode": "absolute", "steps": [ @@ -4032,6 +4130,15 @@ def build_testing_dashboard(): {"color": "red", "value": 5}, ], } + coverage_gap_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 1}, + {"color": "orange", "value": 5}, + {"color": "red", "value": 10}, + ], + } smell_thresholds = { "mode": "absolute", "steps": [ @@ -4043,49 +4150,97 @@ def build_testing_dashboard(): ], } - pass_rate_panel = stat_panel( - 1, - "Platform Test Success Rate (30d)", - TEST_SUCCESS_RATE, - {"h": 4, "w": 6, "x": 0, "y": 0}, - unit="percent", - decimals=2, - instant=True, - thresholds=pass_rate_thresholds, + panels.append( + text_panel( + 1, + "Testing Modes", + ( + "### Atlas Testing\n" + "- **Overview mode**: keep `Suite=All` to compare every project.\n" + "- **Drilldown mode**: choose one suite to isolate quality checks, failures, and trends.\n" + "- Goal line: **95% coverage** and **0 files over 500 LOC** for every suite." + ), + {"h": 3, "w": 24, "x": 0, "y": 0}, + ) ) - pass_rate_panel["description"] = "Overall success rate across tracked suites over the last 30 days." - panels.append(pass_rate_panel) - failures_panel = stat_panel( - 2, - "Platform Test Failures (24h)", - TEST_FAILURES_24H_TOTAL, - {"h": 4, "w": 6, "x": 6, "y": 0}, - unit="none", - instant=True, - thresholds=failures_thresholds, + panels.append( + stat_panel( + 2, + "Success Rate (24h)", + success_rate_24h, + {"h": 5, "w": 4, "x": 0, "y": 3}, + unit="percent", + decimals=2, + instant=True, + thresholds=pass_rate_thresholds, + ) ) - failures_panel["description"] = "Total failed runs in the last 24 hours." - panels.append(failures_panel) - activity_panel = table_panel( - 3, - "Platform Test Activity (30d)", - PLATFORM_TEST_ACTIVITY_30D, - {"h": 8, "w": 12, "x": 12, "y": 0}, - unit="none", - transformations=[ - {"id": "labelsToFields", "options": {}}, - {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, - ], - instant=True, + panels.append( + stat_panel( + 3, + "Success Rate (30d)", + success_rate_30d, + {"h": 5, "w": 4, "x": 4, "y": 3}, + unit="percent", + decimals=2, + instant=True, + thresholds=pass_rate_thresholds, + ) + ) + panels.append( + stat_panel( + 4, + "Failures (24h)", + failures_24h, + {"h": 5, "w": 4, "x": 8, "y": 3}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 5, + "Runs (24h)", + runs_24h, + {"h": 5, "w": 4, "x": 12, "y": 3}, + unit="none", + instant=True, + thresholds={ + "mode": "absolute", + "steps": [{"color": "red", "value": None}, {"color": "green", "value": 1}], + }, + ) + ) + panels.append( + stat_panel( + 6, + "Avg Coverage (%)", + average_coverage, + {"h": 5, "w": 4, "x": 16, "y": 3}, + unit="percent", + decimals=2, + instant=True, + thresholds=pass_rate_thresholds, + ) + ) + panels.append( + stat_panel( + 7, + "Suites with LOC >500", + suites_loc_violating, + {"h": 5, "w": 4, "x": 20, "y": 3}, + unit="none", + instant=True, + thresholds=smell_thresholds, + ) ) - activity_panel["description"] = "Suite/status event counts over 30 days." - panels.append(activity_panel) panels.append( bargauge_panel( - 4, - "Platform Test Failures by Suite (24h)", - PLATFORM_TEST_FAILURES_24H_BY_SUITE, - {"h": 8, "w": 12, "x": 0, "y": 8}, + 8, + "Failures by Suite (24h)", + f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{success}"}}[24h]))', + {"h": 8, "w": 8, "x": 0, "y": 8}, unit="none", instant=True, legend="{{suite}}", @@ -4094,10 +4249,10 @@ def build_testing_dashboard(): ) panels.append( bargauge_panel( - 5, - "Platform Test Success Rate by Suite (24h, lowest first)", - PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE, - {"h": 8, "w": 12, "x": 12, "y": 8}, + 9, + "Success Rate by Suite (24h)", + success_rate_by_suite_24h, + {"h": 8, "w": 8, "x": 8, "y": 8}, unit="percent", instant=True, legend="{{suite}}", @@ -4106,34 +4261,146 @@ def build_testing_dashboard(): decimals=2, ) ) - suite_panel = timeseries_panel( - 6, - "Platform Test Success Rate by Suite", - None, + coverage_gap_panel = bargauge_panel( + 10, + "Coverage Gap to 95% by Suite", + coverage_gap, + {"h": 8, "w": 8, "x": 16, "y": 8}, + unit="percent", + instant=True, + legend="{{suite}}", + sort_order="desc", + thresholds=coverage_gap_thresholds, + decimals=2, + ) + coverage_gap_panel["description"] = "Gap from the 95% target. 0 means the suite is at or above target." + panels.append(coverage_gap_panel) + history_panel = timeseries_panel( + 11, + "Success History by Suite", + success_history_by_suite, {"h": 8, "w": 24, "x": 0, "y": 16}, unit="percent", - targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS, + legend="{{suite}}", legend_display="list", legend_placement="bottom", ) - suite_panel["fieldConfig"]["defaults"]["min"] = 0 - suite_panel["fieldConfig"]["defaults"]["max"] = 100 - suite_panel["fieldConfig"]["defaults"]["custom"] = { + history_panel["fieldConfig"]["defaults"]["min"] = 0 + history_panel["fieldConfig"]["defaults"]["max"] = 100 + history_panel["fieldConfig"]["defaults"]["custom"] = { "drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, - "fillOpacity": 10, + "fillOpacity": 8, "showPoints": "always", - "pointSize": 4, + "pointSize": 3, "spanNulls": True, } - suite_panel["description"] = "Trend line per suite. Flat gaps mean no runs in that interval." - panels.append(suite_panel) + history_panel["description"] = "Trend per suite. In drilldown mode this becomes the selected suite history." + panels.append(history_panel) + panels.append( + timeseries_panel( + 12, + "Run Outcomes (Selected Scope)", + None, + {"h": 8, "w": 8, "x": 0, "y": 24}, + unit="none", + targets=[ + { + "refId": "A", + "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[$__interval])) or on() vector(0)', + "legendFormat": "Success", + }, + { + "refId": "B", + "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{success}"}}[$__interval])) or on() vector(0)', + "legendFormat": "Failure", + }, + { + "refId": "C", + "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval])) or on() vector(0)', + "legendFormat": "Total", + }, + ], + legend_display="list", + legend_placement="bottom", + legend_calcs=["lastNotNull", "sum"], + ) + ) + panels.append( + timeseries_panel( + 13, + "Coverage & LOC History (Selected Scope)", + None, + {"h": 8, "w": 8, "x": 8, "y": 24}, + unit="none", + targets=[ + { + "refId": "A", + "expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{suite=~"{suite_var}"}}[$__interval])', + "legendFormat": "{{suite}} coverage %", + }, + { + "refId": "B", + "expr": f'max_over_time(platform_quality_gate_source_lines_over_500_total{{suite=~"{suite_var}"}}[$__interval])', + "legendFormat": "{{suite}} files >500 LOC", + }, + ], + legend_display="list", + legend_placement="bottom", + legend_calcs=["lastNotNull", "max"], + ) + ) + panels.append( + pie_panel( + 14, + "Run Status Mix (30d)", + f'sum by (status) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[30d]))', + {"h": 8, "w": 8, "x": 16, "y": 24}, + legend="{{status}}", + unit="none", + instant=True, + ) + ) + latest_tests_panel = bargauge_panel( + 15, + "Latest Test Counters (Suite + Result)", + f'sum by (suite, result) ({{__name__=~".*_quality_gate_tests_total",suite=~"{suite_var}"}})', + {"h": 8, "w": 12, "x": 0, "y": 32}, + unit="none", + instant=True, + legend="{{suite}} · {{result}}", + sort_order="desc", + limit=24, + ) + latest_tests_panel["description"] = ( + "Latest emitted test counters per suite/result. " + "If a suite is missing here, that exporter is not sending *_quality_gate_tests_total." + ) + panels.append(latest_tests_panel) + failing_checks_panel = bargauge_panel( + 16, + "Failing Checks (Suite + Check)", + ( + f'sum by (suite, check) ' + f'({{__name__=~".*_quality_gate_checks_total",suite=~"{suite_var}",result!~"{success}"}})' + ), + {"h": 8, "w": 12, "x": 12, "y": 32}, + unit="none", + instant=True, + legend="{{suite}} · {{check}}", + sort_order="desc", + limit=24, + thresholds=failures_thresholds, + ) + failing_checks_panel["description"] = "Top failing checks in the selected scope. Empty is healthy." + panels.append(failing_checks_panel) + coverage_panel = bargauge_panel( - 7, - "Quality Gate Coverage by Suite (%, gate 95)", - QUALITY_GATE_COVERAGE_BY_SUITE_WITH_MISSING, - {"h": 8, "w": 12, "x": 0, "y": 24}, + 17, + "Coverage by Suite (Latest, gate 95)", + coverage_with_missing, + {"h": 8, "w": 12, "x": 0, "y": 40}, unit="percent", instant=True, legend="{{suite}}", @@ -4141,63 +4408,23 @@ def build_testing_dashboard(): thresholds=pass_rate_thresholds, decimals=2, ) - coverage_panel["description"] = ( - "Latest reported per-suite line coverage. The quality gate target is 95%. " - "A value of -1 means that suite has runs but no coverage metric published yet." - ) coverage_panel["fieldConfig"]["defaults"]["mappings"] = [ - { - "type": "value", - "options": { - "-1": {"text": "missing"}, - }, - } + {"type": "value", "options": {"-1": {"text": "missing"}}} ] panels.append(coverage_panel) - coverage_gap_panel = bargauge_panel( - 8, - "Coverage Gap to 95% by Suite", - QUALITY_GATE_COVERAGE_GAP_BY_SUITE, - {"h": 8, "w": 12, "x": 12, "y": 24}, - unit="percent", - instant=True, - legend="{{suite}}", - sort_order="desc", - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 5}, - {"color": "red", "value": 10}, - ], - }, - decimals=2, - ) - coverage_gap_panel["description"] = "How far each suite is below the 95% target (0 means at or above target)." - panels.append(coverage_gap_panel) smell_panel = bargauge_panel( - 9, - "Code Smell Infractions by Suite (files >500 LOC)", - QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE_WITH_MISSING, - {"h": 8, "w": 24, "x": 0, "y": 32}, + 18, + "Files >500 LOC by Suite (Latest)", + smell_with_missing, + {"h": 8, "w": 12, "x": 12, "y": 40}, unit="none", instant=True, legend="{{suite}}", sort_order="desc", thresholds=smell_thresholds, ) - smell_panel["description"] = ( - "Per-suite count of files violating the 500-line hygiene/code-smell threshold. " - "A value of -1 means that suite has runs but no smell-infraction metric published yet." - ) smell_panel["fieldConfig"]["defaults"]["mappings"] = [ - { - "type": "value", - "options": { - "-1": {"text": "missing"}, - }, - } + {"type": "value", "options": {"-1": {"text": "missing"}}} ] panels.append(smell_panel) @@ -4212,6 +4439,7 @@ def build_testing_dashboard(): "schemaVersion": 39, "style": "dark", "tags": ["atlas", "testing", "quality"], + "templating": {"list": [testing_suite_variable()]}, } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index d02bd77d..09a42a40 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -2196,57 +2196,57 @@ "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ananke" }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "lesavka" }, { "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "pegasus" }, { "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "soteria" }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "titan-iac" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "bstein-home" }, { "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "arcanagon" }, { "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "data-prepper" } ], diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index 5b038589..ead90ab9 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -6,21 +6,37 @@ "panels": [ { "id": 1, + "type": "text", + "title": "Testing Modes", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 0 + }, + "datasource": null, + "options": { + "mode": "markdown", + "content": "### Atlas Testing\n- **Overview mode**: keep `Suite=All` to compare every project.\n- **Drilldown mode**: choose one suite to isolate quality checks, failures, and trends.\n- Goal line: **95% coverage** and **0 files over 500 LOC** for every suite." + } + }, + { + "id": 2, "type": "stat", - "title": "Platform Test Success Rate (30d)", + "title": "Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 6, + "h": 5, + "w": 4, "x": 0, - "y": 0 + "y": 3 }, "targets": [ { - "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)", + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[24h])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h])) or on() vector(0))), 1)", "refId": "A", "instant": true } @@ -72,26 +88,95 @@ "values": false }, "textMode": "value" - }, - "description": "Overall success rate across tracked suites over the last 30 days." + } }, { - "id": 2, + "id": 3, "type": "stat", - "title": "Platform Test Failures (24h)", + "title": "Success Rate (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 0 + "h": 5, + "w": 4, + "x": 4, + "y": 3 }, "targets": [ { - "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Failures (24h)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "targets": [ + { + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", "refId": "A", "instant": true } @@ -142,77 +227,229 @@ "values": false }, "textMode": "value" - }, - "description": "Total failed runs in the last 24 hours." + } }, { - "id": 3, - "type": "table", - "title": "Platform Test Activity (30d)", + "id": 5, + "type": "stat", + "title": "Runs (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 8, - "w": 12, + "h": 5, + "w": 4, "x": 12, - "y": 0 + "y": 3 }, "targets": [ { - "expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))", + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h])) or on() vector(0))", "refId": "A", "instant": true } ], "fieldConfig": { "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, "unit": "none", "custom": { - "filterable": true + "displayMode": "auto" } }, "overrides": [] }, "options": { - "showHeader": true, - "columnFilters": false - }, - "transformations": [ - { - "id": "labelsToFields", - "options": {} + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ], - "description": "Suite/status event counts over 30 days." + "textMode": "value" + } }, { - "id": 4, + "id": 6, + "type": "stat", + "title": "Avg Coverage (%)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "targets": [ + { + "expr": "(avg(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"})))) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 7, + "type": "stat", + "title": "Suites with LOC >500", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "targets": [ + { + "expr": "(sum(((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\"})) > bool 0)) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, "type": "bargauge", - "title": "Platform Test Failures by Suite (24h)", + "title": "Failures by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, "y": 8 }, "targets": [ { - "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))", + "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status!~\"ok|passed|success\"}[24h])))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -271,22 +508,22 @@ ] }, { - "id": 5, + "id": 9, "type": "bargauge", - "title": "Platform Test Success Rate by Suite (24h, lowest first)", + "title": "Success Rate by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, - "w": 12, - "x": 12, + "w": 8, + "x": 8, "y": 8 }, "targets": [ { - "expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))", + "expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h]))) > 0))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -346,9 +583,85 @@ ] }, { - "id": 6, + "id": 10, + "type": "bargauge", + "title": "Coverage Gap to 95% by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "targets": [ + { + "expr": "sort_desc(clamp_min(95 - ((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"}))), 0))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Gap from the 95% target. 0 means the suite is at or above target." + }, + { + "id": 11, "type": "timeseries", - "title": "Platform Test Success Rate by Suite", + "title": "Success History by Suite", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -361,59 +674,9 @@ }, "targets": [ { + "expr": "(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[$__interval]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[$__interval]))) > 0))", "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "ariadne" - }, - { - "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "metis" - }, - { - "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "ananke" - }, - { - "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "atlasbot" - }, - { - "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "lesavka" - }, - { - "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "pegasus" - }, - { - "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "soteria" - }, - { - "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "titan-iac" - }, - { - "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "bstein-home" - }, - { - "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "arcanagon" - }, - { - "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "data-prepper" + "legendFormat": "{{suite}}" } ], "fieldConfig": { @@ -425,9 +688,9 @@ "drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, - "fillOpacity": 10, + "fillOpacity": 8, "showPoints": "always", - "pointSize": 4, + "pointSize": 3, "spanNulls": true } }, @@ -442,12 +705,161 @@ "mode": "multi" } }, - "description": "Trend line per suite. Flat gaps mean no runs in that interval." + "description": "Trend per suite. In drilldown mode this becomes the selected suite history." }, { - "id": 7, + "id": 12, + "type": "timeseries", + "title": "Run Outcomes (Selected Scope)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 24 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Success" + }, + { + "refId": "B", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status!~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Failure" + }, + { + "refId": "C", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[$__interval])) or on() vector(0)", + "legendFormat": "Total" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "sum" + ] + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 13, + "type": "timeseries", + "title": "Coverage & LOC History (Selected Scope)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 24 + }, + "targets": [ + { + "refId": "A", + "expr": "max_over_time(platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"}[$__interval])", + "legendFormat": "{{suite}} coverage %" + }, + { + "refId": "B", + "expr": "max_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\"}[$__interval])", + "legendFormat": "{{suite}} files >500 LOC" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "max" + ] + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 14, + "type": "piechart", + "title": "Run Status Mix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 24 + }, + "targets": [ + { + "expr": "sum by (status) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d]))", + "refId": "A", + "legendFormat": "{{status}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "displayLabels": [], + "tooltip": { + "mode": "single" + }, + "colorScheme": "interpolateSpectral", + "colorBy": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "id": 15, "type": "bargauge", - "title": "Quality Gate Coverage by Suite (%, gate 95)", + "title": "Latest Test Counters (Suite + Result)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -456,11 +868,173 @@ "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 32 }, "targets": [ { - "expr": "sort(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent))) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))) - 1))", + "expr": "sort_desc(sum by (suite, result) ({__name__=~\".*_quality_gate_tests_total\",suite=~\"${suite}\"}))", + "refId": "A", + "legendFormat": "{{suite}} \u00b7 {{result}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + }, + { + "id": "limit", + "options": { + "limit": 24 + } + } + ], + "description": "Latest emitted test counters per suite/result. If a suite is missing here, that exporter is not sending *_quality_gate_tests_total." + }, + { + "id": 16, + "type": "bargauge", + "title": "Failing Checks (Suite + Check)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "targets": [ + { + "expr": "sort_desc(sum by (suite, check) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",result!~\"ok|passed|success\"}))", + "refId": "A", + "legendFormat": "{{suite}} \u00b7 {{check}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + }, + { + "id": "limit", + "options": { + "limit": 24 + } + } + ], + "description": "Top failing checks in the selected scope. Empty is healthy." + }, + { + "id": 17, + "type": "bargauge", + "title": "Coverage by Suite (Latest, gate 95)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "targets": [ + { + "expr": "sort(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"}))) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d]))) - 1))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -527,13 +1101,12 @@ "order": "asc" } } - ], - "description": "Latest reported per-suite line coverage. The quality gate target is 95%. A value of -1 means that suite has runs but no coverage metric published yet." + ] }, { - "id": 8, + "id": 18, "type": "bargauge", - "title": "Coverage Gap to 95% by Suite", + "title": "Files >500 LOC by Suite (Latest)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -542,87 +1115,11 @@ "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 40 }, "targets": [ { - "expr": "sort_desc(clamp_min(95 - ((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent))), 0))", - "refId": "A", - "legendFormat": "{{suite}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "percent", - "min": 0, - "max": 100, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 5 - }, - { - "color": "red", - "value": 10 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ], - "description": "How far each suite is below the 95% target (0 means at or above target)." - }, - { - "id": 9, - "type": "bargauge", - "title": "Code Smell Infractions by Suite (files >500 LOC)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 32 - }, - "targets": [ - { - "expr": "sort_desc((max by (suite) (platform_quality_gate_source_lines_over_500_total)) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))) - 1))", + "expr": "sort_desc((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\"})) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d]))) - 1))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -692,8 +1189,7 @@ "order": "desc" } } - ], - "description": "Per-suite count of files violating the 500-line hygiene/code-smell threshold. A value of -1 means that suite has runs but no smell-infraction metric published yet." + ] } ], "time": { @@ -709,5 +1205,84 @@ "atlas", "testing", "quality" - ] + ], + "templating": { + "list": [ + { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": "ariadne : ariadne,metis : metis,ananke : ananke,atlasbot : atlasbot,lesavka : lesavka,pegasus : pegasus|pegasus-health|pegasus_health,soteria : soteria,titan-iac : titan-iac|titan_iac,bstein-home : bstein-home|bstein_home,arcanagon : arcanagon,data-prepper : data-prepper|data_prepper", + "current": { + "text": "All", + "value": "$__all", + "selected": true + }, + "options": [ + { + "text": "ariadne", + "value": "ariadne", + "selected": false + }, + { + "text": "metis", + "value": "metis", + "selected": false + }, + { + "text": "ananke", + "value": "ananke", + "selected": false + }, + { + "text": "atlasbot", + "value": "atlasbot", + "selected": false + }, + { + "text": "lesavka", + "value": "lesavka", + "selected": false + }, + { + "text": "pegasus", + "value": "pegasus|pegasus-health|pegasus_health", + "selected": false + }, + { + "text": "soteria", + "value": "soteria", + "selected": false + }, + { + "text": "titan-iac", + "value": "titan-iac|titan_iac", + "selected": false + }, + { + "text": "bstein-home", + "value": "bstein-home|bstein_home", + "selected": false + }, + { + "text": "arcanagon", + "value": "arcanagon", + "selected": false + }, + { + "text": "data-prepper", + "value": "data-prepper|data_prepper", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": true, + "allValue": "ariadne|metis|ananke|atlasbot|lesavka|pegasus|pegasus-health|pegasus_health|soteria|titan-iac|titan_iac|bstein-home|bstein_home|arcanagon|data-prepper|data_prepper", + "refresh": 1, + "sort": 1, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 94a1dd00..a223f180 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -2205,57 +2205,57 @@ data: "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "ananke" }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "lesavka" }, { "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "pegasus" }, { "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "soteria" }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "titan-iac" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "bstein-home" }, { "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "arcanagon" }, { "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))) > 0) or on() vector(0)", "legendFormat": "data-prepper" } ], diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index 40ce6200..0d30a9ec 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -15,21 +15,37 @@ data: "panels": [ { "id": 1, + "type": "text", + "title": "Testing Modes", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 0 + }, + "datasource": null, + "options": { + "mode": "markdown", + "content": "### Atlas Testing\n- **Overview mode**: keep `Suite=All` to compare every project.\n- **Drilldown mode**: choose one suite to isolate quality checks, failures, and trends.\n- Goal line: **95% coverage** and **0 files over 500 LOC** for every suite." + } + }, + { + "id": 2, "type": "stat", - "title": "Platform Test Success Rate (30d)", + "title": "Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 6, + "h": 5, + "w": 4, "x": 0, - "y": 0 + "y": 3 }, "targets": [ { - "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)", + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[24h])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h])) or on() vector(0))), 1)", "refId": "A", "instant": true } @@ -81,26 +97,95 @@ data: "values": false }, "textMode": "value" - }, - "description": "Overall success rate across tracked suites over the last 30 days." + } }, { - "id": 2, + "id": 3, "type": "stat", - "title": "Platform Test Failures (24h)", + "title": "Success Rate (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 0 + "h": 5, + "w": 4, + "x": 4, + "y": 3 }, "targets": [ { - "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Failures (24h)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "targets": [ + { + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", "refId": "A", "instant": true } @@ -151,77 +236,229 @@ data: "values": false }, "textMode": "value" - }, - "description": "Total failed runs in the last 24 hours." + } }, { - "id": 3, - "type": "table", - "title": "Platform Test Activity (30d)", + "id": 5, + "type": "stat", + "title": "Runs (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 8, - "w": 12, + "h": 5, + "w": 4, "x": 12, - "y": 0 + "y": 3 }, "targets": [ { - "expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))", + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h])) or on() vector(0))", "refId": "A", "instant": true } ], "fieldConfig": { "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, "unit": "none", "custom": { - "filterable": true + "displayMode": "auto" } }, "overrides": [] }, "options": { - "showHeader": true, - "columnFilters": false - }, - "transformations": [ - { - "id": "labelsToFields", - "options": {} + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ], - "description": "Suite/status event counts over 30 days." + "textMode": "value" + } }, { - "id": 4, + "id": 6, + "type": "stat", + "title": "Avg Coverage (%)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "targets": [ + { + "expr": "(avg(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"})))) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 7, + "type": "stat", + "title": "Suites with LOC >500", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "targets": [ + { + "expr": "(sum(((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\"})) > bool 0)) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, "type": "bargauge", - "title": "Platform Test Failures by Suite (24h)", + "title": "Failures by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, "y": 8 }, "targets": [ { - "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))", + "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status!~\"ok|passed|success\"}[24h])))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -280,22 +517,22 @@ data: ] }, { - "id": 5, + "id": 9, "type": "bargauge", - "title": "Platform Test Success Rate by Suite (24h, lowest first)", + "title": "Success Rate by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, - "w": 12, - "x": 12, + "w": 8, + "x": 8, "y": 8 }, "targets": [ { - "expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))", + "expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[24h]))) > 0))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -355,9 +592,85 @@ data: ] }, { - "id": 6, + "id": 10, + "type": "bargauge", + "title": "Coverage Gap to 95% by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "targets": [ + { + "expr": "sort_desc(clamp_min(95 - ((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"}))), 0))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Gap from the 95% target. 0 means the suite is at or above target." + }, + { + "id": 11, "type": "timeseries", - "title": "Platform Test Success Rate by Suite", + "title": "Success History by Suite", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -370,59 +683,9 @@ data: }, "targets": [ { + "expr": "(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[$__interval]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[$__interval]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[$__interval]))) > 0))", "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "ariadne" - }, - { - "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "metis" - }, - { - "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "ananke" - }, - { - "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "atlasbot" - }, - { - "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "lesavka" - }, - { - "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "pegasus" - }, - { - "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "soteria" - }, - { - "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "titan-iac" - }, - { - "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "bstein-home" - }, - { - "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "arcanagon" - }, - { - "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "data-prepper" + "legendFormat": "{{suite}}" } ], "fieldConfig": { @@ -434,9 +697,9 @@ data: "drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, - "fillOpacity": 10, + "fillOpacity": 8, "showPoints": "always", - "pointSize": 4, + "pointSize": 3, "spanNulls": true } }, @@ -451,12 +714,161 @@ data: "mode": "multi" } }, - "description": "Trend line per suite. Flat gaps mean no runs in that interval." + "description": "Trend per suite. In drilldown mode this becomes the selected suite history." }, { - "id": 7, + "id": 12, + "type": "timeseries", + "title": "Run Outcomes (Selected Scope)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 24 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status=~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Success" + }, + { + "refId": "B", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",status!~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Failure" + }, + { + "refId": "C", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[$__interval])) or on() vector(0)", + "legendFormat": "Total" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "sum" + ] + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 13, + "type": "timeseries", + "title": "Coverage & LOC History (Selected Scope)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 24 + }, + "targets": [ + { + "refId": "A", + "expr": "max_over_time(platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"}[$__interval])", + "legendFormat": "{{suite}} coverage %" + }, + { + "refId": "B", + "expr": "max_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\"}[$__interval])", + "legendFormat": "{{suite}} files >500 LOC" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "max" + ] + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 14, + "type": "piechart", + "title": "Run Status Mix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 24 + }, + "targets": [ + { + "expr": "sum by (status) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d]))", + "refId": "A", + "legendFormat": "{{status}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "displayLabels": [], + "tooltip": { + "mode": "single" + }, + "colorScheme": "interpolateSpectral", + "colorBy": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "id": 15, "type": "bargauge", - "title": "Quality Gate Coverage by Suite (%, gate 95)", + "title": "Latest Test Counters (Suite + Result)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -465,11 +877,173 @@ data: "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 32 }, "targets": [ { - "expr": "sort(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent))) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))) - 1))", + "expr": "sort_desc(sum by (suite, result) ({__name__=~\".*_quality_gate_tests_total\",suite=~\"${suite}\"}))", + "refId": "A", + "legendFormat": "{{suite}} \u00b7 {{result}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + }, + { + "id": "limit", + "options": { + "limit": 24 + } + } + ], + "description": "Latest emitted test counters per suite/result. If a suite is missing here, that exporter is not sending *_quality_gate_tests_total." + }, + { + "id": 16, + "type": "bargauge", + "title": "Failing Checks (Suite + Check)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "targets": [ + { + "expr": "sort_desc(sum by (suite, check) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",result!~\"ok|passed|success\"}))", + "refId": "A", + "legendFormat": "{{suite}} \u00b7 {{check}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + }, + { + "id": "limit", + "options": { + "limit": 24 + } + } + ], + "description": "Top failing checks in the selected scope. Empty is healthy." + }, + { + "id": 17, + "type": "bargauge", + "title": "Coverage by Suite (Latest, gate 95)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "targets": [ + { + "expr": "sort(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\"}))) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d]))) - 1))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -536,13 +1110,12 @@ data: "order": "asc" } } - ], - "description": "Latest reported per-suite line coverage. The quality gate target is 95%. A value of -1 means that suite has runs but no coverage metric published yet." + ] }, { - "id": 8, + "id": 18, "type": "bargauge", - "title": "Coverage Gap to 95% by Suite", + "title": "Files >500 LOC by Suite (Latest)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -551,87 +1124,11 @@ data: "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 40 }, "targets": [ { - "expr": "sort_desc(clamp_min(95 - ((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent))), 0))", - "refId": "A", - "legendFormat": "{{suite}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "percent", - "min": 0, - "max": 100, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 5 - }, - { - "color": "red", - "value": 10 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ], - "description": "How far each suite is below the 95% target (0 means at or above target)." - }, - { - "id": 9, - "type": "bargauge", - "title": "Code Smell Infractions by Suite (files >500 LOC)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 32 - }, - "targets": [ - { - "expr": "sort_desc((max by (suite) (platform_quality_gate_source_lines_over_500_total)) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))) - 1))", + "expr": "sort_desc((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\"})) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\"}[30d]))) - 1))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -701,8 +1198,7 @@ data: "order": "desc" } } - ], - "description": "Per-suite count of files violating the 500-line hygiene/code-smell threshold. A value of -1 means that suite has runs but no smell-infraction metric published yet." + ] } ], "time": { @@ -718,5 +1214,84 @@ data: "atlas", "testing", "quality" - ] + ], + "templating": { + "list": [ + { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": "ariadne : ariadne,metis : metis,ananke : ananke,atlasbot : atlasbot,lesavka : lesavka,pegasus : pegasus|pegasus-health|pegasus_health,soteria : soteria,titan-iac : titan-iac|titan_iac,bstein-home : bstein-home|bstein_home,arcanagon : arcanagon,data-prepper : data-prepper|data_prepper", + "current": { + "text": "All", + "value": "$__all", + "selected": true + }, + "options": [ + { + "text": "ariadne", + "value": "ariadne", + "selected": false + }, + { + "text": "metis", + "value": "metis", + "selected": false + }, + { + "text": "ananke", + "value": "ananke", + "selected": false + }, + { + "text": "atlasbot", + "value": "atlasbot", + "selected": false + }, + { + "text": "lesavka", + "value": "lesavka", + "selected": false + }, + { + "text": "pegasus", + "value": "pegasus|pegasus-health|pegasus_health", + "selected": false + }, + { + "text": "soteria", + "value": "soteria", + "selected": false + }, + { + "text": "titan-iac", + "value": "titan-iac|titan_iac", + "selected": false + }, + { + "text": "bstein-home", + "value": "bstein-home|bstein_home", + "selected": false + }, + { + "text": "arcanagon", + "value": "arcanagon", + "selected": false + }, + { + "text": "data-prepper", + "value": "data-prepper|data_prepper", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": true, + "allValue": "ariadne|metis|ananke|atlasbot|lesavka|pegasus|pegasus-health|pegasus_health|soteria|titan-iac|titan_iac|bstein-home|bstein_home|arcanagon|data-prepper|data_prepper", + "refresh": 1, + "sort": 1, + "skipUrlSync": false + } + ] + } }