From 944a778c0a3cbdb60d9c316c84b46bfdeb9f1172 Mon Sep 17 00:00:00 2001 From: jenkins Date: Fri, 15 May 2026 19:52:46 -0300 Subject: [PATCH] monitoring: clarify testing dashboard health trends --- scripts/dashboards_render_atlas.py | 135 ++++- scripts/tests/test_dashboards_render_atlas.py | 17 +- .../monitoring/dashboards/atlas-gitops.json | 44 +- .../monitoring/dashboards/atlas-jobs.json | 564 ++++++++++++++---- .../monitoring/dashboards/atlas-mail.json | 34 +- .../monitoring/dashboards/atlas-network.json | 38 +- .../monitoring/dashboards/atlas-nodes.json | 30 +- .../monitoring/dashboards/atlas-overview.json | 182 +++--- .../monitoring/dashboards/atlas-pods.json | 24 +- .../monitoring/dashboards/atlas-power.json | 10 +- .../monitoring/dashboards/atlas-storage.json | 34 +- .../monitoring/dashboards/atlas-testing.json | 564 ++++++++++++++---- .../monitoring/grafana-dashboard-gitops.yaml | 44 +- .../monitoring/grafana-dashboard-jobs.yaml | 564 ++++++++++++++---- .../monitoring/grafana-dashboard-mail.yaml | 34 +- .../monitoring/grafana-dashboard-network.yaml | 38 +- .../monitoring/grafana-dashboard-nodes.yaml | 30 +- .../grafana-dashboard-overview.yaml | 182 +++--- .../monitoring/grafana-dashboard-pods.yaml | 24 +- .../monitoring/grafana-dashboard-power.yaml | 10 +- .../monitoring/grafana-dashboard-storage.yaml | 34 +- .../monitoring/grafana-dashboard-testing.yaml | 564 ++++++++++++++---- .../vmalert-atlas-availability.yaml | 8 +- 23 files changed, 2372 insertions(+), 836 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 98e18aca..c0e0960e 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -36,6 +36,25 @@ PROM_DS = {"type": "prometheus", "uid": "atlas-vm"} PUBLIC_FOLDER = "overview" PRIVATE_FOLDER = "atlas-internal" ASTRAIOS_MOUNTPOINT = "/mnt/astraios" +GLOBAL_STATUS_COLOR_TONES = { + "blue": "dark-blue", + "green": "dark-green", + "yellow": "dark-yellow", + "orange": "dark-orange", + "red": "dark-red", +} +COLOR_VALUE_KEYS = {"color", "fixedColor"} + + +def apply_global_status_palette(value, parent_key=None): + """Normalize generated Grafana status colors to the shared Atlas tones.""" + if isinstance(value, dict): + return {key: apply_global_status_palette(item, key) for key, item in value.items()} + if isinstance(value, list): + return [apply_global_status_palette(item, parent_key) for item in value] + if parent_key in COLOR_VALUE_KEYS and isinstance(value, str): + return GLOBAL_STATUS_COLOR_TONES.get(value, value) + return value PERCENT_THRESHOLDS = { "mode": "absolute", @@ -3367,6 +3386,7 @@ def build_jobs_dashboard(): workspace_coverage_selector = f'suite=~"{suite_var}",{exported}' smell_selector = f'suite=~"{suite_var}",{exported}' test_case_selector = f'suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",{exported}' + all_test_case_selector = f'suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",{exported}' build_info_selector = f'suite=~"{suite_var}",branch=~"{branch_var}",{exported}' selected_suite_universe = ( f'(sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])) >= bool 0)' @@ -3463,38 +3483,70 @@ def build_jobs_dashboard(): check_regex_sonarqube = "sonarqube|sonar" check_regex_supply_chain = "ironbank|supply_chain|image_compliance|artifact_security" - def _check_state_series(regex: str, failed: bool) -> str: - state = f'result!~"{non_failure}"' if failed else f'result=~"{success}"' - core = f'sum by (suite) (increase({{{checks_selector},check=~"{regex}",{state}}}[$__interval]))' - return f'({core}) or on(suite) ({selected_suite_zero})' + def _check_state_percent_series(regex: str, failed: bool) -> str: + state = f'result!~"{non_failure}"' if failed else f'result=~"{non_failure}"' + state_checks = ( + f'sum by (suite) (max by (suite, check) (({{{checks_selector},check=~"{regex}",{state}}} > bool 0)))' + ) + total_checks = ( + f'sum by (suite) (max by (suite, check) (({{{checks_selector},check=~"{regex}"}} > bool 0)))' + ) + return f"(100 * ({state_checks}) / clamp_min(({total_checks}), 1)) and on(suite) (({total_checks}) > 0)" - problematic_tests_history_core = ( - f'topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed"}}))' + rollup_failed_tests = ( + f'sum by (suite, test) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed"}})' ) + raw_failed_tests = ( + f'sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{all_test_case_selector},status="failed"}}[$__interval]))' + ) + problematic_tests_history_core = f"topk(12, (({rollup_failed_tests}) or on(suite, test) ({raw_failed_tests})))" problematic_tests_history = f"({problematic_tests_history_core}) or on() vector(0)" + rollup_failed_tests_30d = ( + f'sum by (suite, test) (sum_over_time(platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed"}}[30d:1h]))' + ) + raw_failed_tests_30d = ( + f'sum by (suite, test) (increase(platform_quality_gate_test_case_result{{{all_test_case_selector},status="failed"}}[30d]))' + ) worst_test_per_suite_core = ( - f'topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))' + f"topk by (suite) (1, (({rollup_failed_tests_30d}) or on(suite, test) ({raw_failed_tests_30d})))" ) worst_test_per_suite = f"({worst_test_per_suite_core}) or on() vector(0)" + + def _selected_status_history(status: str) -> str: + rollup = ( + f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="{status}"}})' + ) + raw = ( + f'sum by (suite) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="{status}"}}[$__interval]))' + ) + return f"(({rollup}) or on(suite) ({raw}) or on(suite) ({selected_suite_zero}))" + + selected_passed_history = _selected_status_history("passed") + selected_failed_history = _selected_status_history("failed") + selected_skipped_history = _selected_status_history("skipped") + selected_total_history = ( + f'(sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status=~"passed|failed|error|skipped"}}) ' + f'or on(suite) sum by (suite) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status=~"passed|failed|error|skipped"}}[$__interval])))' + ) selected_test_pass_fail = [ { "refId": "A", - "expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="passed"}}) or on() vector(0)', + "expr": selected_passed_history, "legendFormat": "{{suite}} passed", }, { "refId": "B", - "expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="failed"}}) or on() vector(0)', + "expr": selected_failed_history, "legendFormat": "{{suite}} failed", }, { "refId": "C", - "expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="skipped"}}) or on() vector(0)', + "expr": selected_skipped_history, "legendFormat": "{{suite}} skipped", }, ] selected_test_pass_rate = ( - f'avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__"}})' + f"((100 * ({selected_passed_history}) / clamp_min(({selected_total_history}), 1)) or on(suite) ({selected_suite_zero}))" ) recent_branch_evidence = ( f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))' @@ -3772,47 +3824,57 @@ def build_jobs_dashboard(): ] def _append_check_trends(start_id: int, title_prefix: str, failed: bool, y: int) -> None: + trend_thresholds = failures_thresholds if failed else success_thresholds + trend_description = ( + "Current bad-state percentage for this check family, evaluated over time. " + "Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." + if failed + else "Current acceptable-state percentage for this check family, evaluated over time. " + "Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." + ) for index, (label, regex) in enumerate(check_dimensions[:4]): panel = timeseries_panel( start_id + index, - f"{title_prefix}: {label}", - _check_state_series(regex, failed), + f"{label} {title_prefix}", + _check_state_percent_series(regex, failed), {"h": 7, "w": 6, "x": index * 6, "y": y}, - unit="none", + unit="percent", legend="{{suite}}", legend_display="list", legend_placement="bottom", legend_calcs=[], ) - panel["description"] = ( - "One line per selected suite, counting check events in this state during each bucket. " - "Intervals without runs stay at zero rather than being treated as failures." - ) + panel["description"] = trend_description + panel["fieldConfig"]["defaults"]["thresholds"] = trend_thresholds panel["fieldConfig"]["defaults"]["min"] = 0 + panel["fieldConfig"]["defaults"]["max"] = 100 panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True + panel["fieldConfig"]["defaults"]["custom"]["showPoints"] = "never" + panel["fieldConfig"]["defaults"]["custom"]["lineWidth"] = 2 panels.append(panel) for index, (label, regex) in enumerate(check_dimensions[4:]): panel = timeseries_panel( start_id + 4 + index, - f"{title_prefix}: {label}", - _check_state_series(regex, failed), + f"{label} {title_prefix}", + _check_state_percent_series(regex, failed), {"h": 7, "w": 8, "x": index * 8, "y": y + 7}, - unit="none", + unit="percent", legend="{{suite}}", legend_display="list", legend_placement="bottom", legend_calcs=[], ) - panel["description"] = ( - "One line per selected suite, counting check events in this state during each bucket. " - "Intervals without runs stay at zero rather than being treated as failures." - ) + panel["description"] = trend_description + panel["fieldConfig"]["defaults"]["thresholds"] = trend_thresholds panel["fieldConfig"]["defaults"]["min"] = 0 + panel["fieldConfig"]["defaults"]["max"] = 100 panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True + panel["fieldConfig"]["defaults"]["custom"]["showPoints"] = "never" + panel["fieldConfig"]["defaults"]["custom"]["lineWidth"] = 2 panels.append(panel) - _append_check_trends(130, "Failure Trend", True, 29) - _append_check_trends(138, "Success Trend", False, 43) + _append_check_trends(130, "Failure Rate", True, 29) + _append_check_trends(138, "Healthy Rate", False, 43) panels.append( timeseries_panel( 145, @@ -4118,10 +4180,18 @@ def build_jobs_dashboard(): 12: {"h": 8, "w": 12, "x": 12, "y": 19}, 13: {"h": 8, "w": 12, "x": 0, "y": 27}, 14: {"h": 8, "w": 12, "x": 12, "y": 27}, - 145: {"h": 10, "w": 24, "x": 0, "y": 63}, - 147: {"h": 8, "w": 24, "x": 0, "y": 74}, + 145: {"h": 8, "w": 12, "x": 0, "y": 74}, + 147: {"h": 8, "w": 12, "x": 12, "y": 74}, 146: {"h": 8, "w": 12, "x": 0, "y": 83}, 152: {"h": 8, "w": 12, "x": 12, "y": 83}, + 27: {"h": 7, "w": 6, "x": 0, "y": 94}, + 28: {"h": 7, "w": 6, "x": 6, "y": 94}, + 29: {"h": 7, "w": 6, "x": 12, "y": 94}, + 30: {"h": 7, "w": 6, "x": 18, "y": 94}, + 148: {"h": 7, "w": 6, "x": 0, "y": 101}, + 151: {"h": 7, "w": 6, "x": 6, "y": 101}, + 149: {"h": 7, "w": 6, "x": 12, "y": 101}, + 150: {"h": 7, "w": 6, "x": 18, "y": 101}, 31: {"h": 6, "w": 4, "x": 0, "y": 111}, 32: {"h": 6, "w": 4, "x": 4, "y": 111}, 33: {"h": 6, "w": 4, "x": 8, "y": 111}, @@ -4136,13 +4206,13 @@ def build_jobs_dashboard(): row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])), row_panel( 501, - "Failure Trends By Check", + "Check Failure Rates By Suite", 19, panels=children([130, 131, 132, 133, 134, 135, 136]), ), row_panel( 502, - "Success Trends By Check", + "Check Healthy Rates By Suite", 20, panels=children([138, 139, 140, 141, 142, 143, 144]), ), @@ -4726,12 +4796,13 @@ DASHBOARDS = { def write_json(uid, data): DASHBOARD_DIR.mkdir(parents=True, exist_ok=True) path = DASHBOARD_DIR / f"{uid}.json" + data = apply_global_status_palette(data) path.write_text(json.dumps(data, indent=2) + "\n") def render_configmap(uid, info): json_path = DASHBOARD_DIR / f"{uid}.json" - payload = json.dumps(json.loads(json_path.read_text()), indent=2) + payload = json.dumps(apply_global_status_palette(json.loads(json_path.read_text())), indent=2) indented = "\n".join(" " + line for line in payload.splitlines()) output_path = info["configmap"] content = CONFIG_TEMPLATE.format( diff --git a/scripts/tests/test_dashboards_render_atlas.py b/scripts/tests/test_dashboards_render_atlas.py index e369e525..5ab27a2a 100644 --- a/scripts/tests/test_dashboards_render_atlas.py +++ b/scripts/tests/test_dashboards_render_atlas.py @@ -158,16 +158,25 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint(): ) assert [row["title"] for row in rows] == [ "Reliability And Run History", - "Failure Trends By Check", - "Success Trends By Check", + "Check Failure Rates By Suite", + "Check Healthy Rates By Suite", "Test Drilldowns And Problem Tests", "Telemetry Completeness And Branches", "SonarQube Project Health", ] assert all(row["collapsed"] for row in rows) - assert "Failure Trend: Coverage" in nested_panels_by_title - assert "Success Trend: Supply Chain" in nested_panels_by_title + assert "Coverage Failure Rate" in nested_panels_by_title + assert "Supply Chain Healthy Rate" in nested_panels_by_title assert "Selected Test Pass Rate History" in nested_panels_by_title assert "Coverage Metrics Present by Suite" in nested_panels_by_title assert "SonarQube API Up" in nested_panels_by_title + + failure_rate_panel = nested_panels_by_title["Coverage Failure Rate"] + assert failure_rate_panel["fieldConfig"]["defaults"]["unit"] == "percent" + assert failure_rate_panel["fieldConfig"]["defaults"]["max"] == 100 + assert "increase(" not in failure_rate_panel["targets"][0]["expr"] + + pass_rate_panel = nested_panels_by_title["Selected Test Pass Rate History"] + assert "platform_quality_gate_test_case_result" in pass_rate_panel["targets"][0]["expr"] + assert "platform_quality:test_case_pass_rate:percent_1h" not in pass_rate_panel["targets"][0]["expr"] diff --git a/services/monitoring/dashboards/atlas-gitops.json b/services/monitoring/dashboards/atlas-gitops.json index 13391827..afb99aaa 100644 --- a/services/monitoring/dashboards/atlas-gitops.json +++ b/services/monitoring/dashboards/atlas-gitops.json @@ -36,11 +36,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -97,15 +97,15 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 99 }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -162,11 +162,11 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "dark-blue", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -222,15 +222,15 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 99 }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -287,11 +287,11 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "dark-blue", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -355,11 +355,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -394,11 +394,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -506,11 +506,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -574,11 +574,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -642,11 +642,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index 47516baa..dbe67191 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -985,7 +985,7 @@ { "id": 501, "type": "row", - "title": "Failure Trends By Check", + "title": "Check Failure Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -997,7 +997,7 @@ { "id": 130, "type": "timeseries", - "title": "Failure Trend: Tests", + "title": "Tests Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1010,17 +1010,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1034,12 +1058,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 131, "type": "timeseries", - "title": "Failure Trend: Coverage", + "title": "Coverage Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1052,17 +1076,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1076,12 +1124,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 132, "type": "timeseries", - "title": "Failure Trend: LOC", + "title": "LOC Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1094,17 +1142,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1118,12 +1190,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 133, "type": "timeseries", - "title": "Failure Trend: Style", + "title": "Style Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1136,17 +1208,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1160,12 +1256,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 134, "type": "timeseries", - "title": "Failure Trend: Gate Glue", + "title": "Gate Glue Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1178,17 +1274,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1202,12 +1322,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 135, "type": "timeseries", - "title": "Failure Trend: SonarQube", + "title": "SonarQube Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1220,17 +1340,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1244,12 +1388,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 136, "type": "timeseries", - "title": "Failure Trend: Supply Chain", + "title": "Supply Chain Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1262,17 +1406,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1286,14 +1454,14 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." } ] }, { "id": 502, "type": "row", - "title": "Success Trends By Check", + "title": "Check Healthy Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -1305,7 +1473,7 @@ { "id": 138, "type": "timeseries", - "title": "Success Trend: Tests", + "title": "Tests Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1318,17 +1486,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1342,12 +1538,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 139, "type": "timeseries", - "title": "Success Trend: Coverage", + "title": "Coverage Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1360,17 +1556,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1384,12 +1608,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 140, "type": "timeseries", - "title": "Success Trend: LOC", + "title": "LOC Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1402,17 +1626,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1426,12 +1678,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 141, "type": "timeseries", - "title": "Success Trend: Style", + "title": "Style Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1444,17 +1696,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1468,12 +1748,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 142, "type": "timeseries", - "title": "Success Trend: Gate Glue", + "title": "Gate Glue Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1486,17 +1766,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1510,12 +1818,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 143, "type": "timeseries", - "title": "Success Trend: SonarQube", + "title": "SonarQube Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1528,17 +1836,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1552,12 +1888,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 144, "type": "timeseries", - "title": "Success Trend: Supply Chain", + "title": "Supply Chain Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1570,17 +1906,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1594,7 +1958,7 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." } ] }, @@ -1619,14 +1983,14 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 10, - "w": 24, + "h": 8, + "w": 12, "x": 0, - "y": 63 + "y": 74 }, "targets": [ { - "expr": "(topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}))) or on() vector(0)", + "expr": "(topk(12, ((sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite, test) (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval])))))) or on() vector(0)", "refId": "A", "legendFormat": "{{suite}} - {{test}}" } @@ -1766,13 +2130,13 @@ }, "gridPos": { "h": 8, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 74 }, "targets": [ { - "expr": "sort_desc((topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\",exported_job=\"platform-quality-ci\"}[30d])))) or on() vector(0))", + "expr": "sort_desc((topk by (suite) (1, ((sum by (suite, test) (sum_over_time(platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}[30d:1h]))) or on(suite, test) (sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[30d])))))) or on() vector(0))", "refId": "A", "legendFormat": "{{suite}} \u00b7 {{test}}", "instant": true @@ -1965,17 +2329,17 @@ "targets": [ { "refId": "A", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} passed" }, { "refId": "B", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} failed" }, { "refId": "C", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"skipped\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} skipped" } ], @@ -2120,7 +2484,7 @@ }, "targets": [ { - "expr": "avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\"})", + "expr": "((100 * (((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))) / clamp_min(((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=~\"passed|failed|error|skipped\"}) or on(suite) sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=~\"passed|failed|error|skipped\"}[$__interval])))), 1)) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2303,7 +2667,7 @@ "h": 7, "w": 6, "x": 0, - "y": 81 + "y": 94 }, "targets": [ { @@ -2385,7 +2749,7 @@ "h": 7, "w": 6, "x": 6, - "y": 81 + "y": 94 }, "targets": [ { @@ -2467,7 +2831,7 @@ "h": 7, "w": 6, "x": 12, - "y": 81 + "y": 94 }, "targets": [ { @@ -2549,7 +2913,7 @@ "h": 7, "w": 6, "x": 18, - "y": 81 + "y": 94 }, "targets": [ { @@ -2628,10 +2992,10 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, + "h": 7, + "w": 6, "x": 0, - "y": 94 + "y": 101 }, "targets": [ { @@ -2710,10 +3074,10 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 94 + "h": 7, + "w": 6, + "x": 6, + "y": 101 }, "targets": [ { @@ -2793,9 +3157,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 100 + "w": 6, + "x": 12, + "y": 101 }, "targets": [ { @@ -2960,9 +3324,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 100 + "w": 6, + "x": 18, + "y": 101 }, "targets": [ { diff --git a/services/monitoring/dashboards/atlas-mail.json b/services/monitoring/dashboards/atlas-mail.json index 67c17669..6d570802 100644 --- a/services/monitoring/dashboards/atlas-mail.json +++ b/services/monitoring/dashboards/atlas-mail.json @@ -38,7 +38,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -99,7 +99,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -164,19 +164,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 5 }, { - "color": "orange", + "color": "dark-orange", "value": 8 }, { - "color": "red", + "color": "dark-red", "value": 10 } ] @@ -254,19 +254,19 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 90 }, { - "color": "yellow", + "color": "dark-yellow", "value": 95 }, { - "color": "green", + "color": "dark-green", "value": 98 } ] @@ -323,19 +323,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 70 }, { - "color": "orange", + "color": "dark-orange", "value": 85 }, { - "color": "red", + "color": "dark-red", "value": 95 } ] @@ -396,7 +396,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -457,7 +457,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -518,7 +518,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] diff --git a/services/monitoring/dashboards/atlas-network.json b/services/monitoring/dashboards/atlas-network.json index 09e93835..9a87dbb1 100644 --- a/services/monitoring/dashboards/atlas-network.json +++ b/services/monitoring/dashboards/atlas-network.json @@ -34,19 +34,19 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 0.995 }, { - "color": "yellow", + "color": "dark-yellow", "value": 0.999 }, { - "color": "green", + "color": "dark-green", "value": 0.9995 } ] @@ -103,19 +103,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 4 } ] @@ -172,19 +172,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 4 } ] @@ -241,19 +241,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 200 }, { - "color": "orange", + "color": "dark-orange", "value": 350 }, { - "color": "red", + "color": "dark-red", "value": 500 } ] @@ -314,7 +314,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -374,7 +374,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -434,7 +434,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index e7d08229..4221e55e 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -38,7 +38,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -99,7 +99,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -160,7 +160,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -216,19 +216,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 0.05 }, { - "color": "orange", + "color": "dark-orange", "value": 0.2 }, { - "color": "red", + "color": "dark-red", "value": 0.5 } ] @@ -285,19 +285,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 250 }, { - "color": "orange", + "color": "dark-orange", "value": 400 }, { - "color": "red", + "color": "dark-red", "value": 600 } ] @@ -354,19 +354,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 100 }, { - "color": "red", + "color": "dark-red", "value": 200 } ] diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 1e487af7..69bbd369 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -35,11 +35,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "green", + "color": "dark-green", "value": 3 } ] @@ -90,19 +90,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -165,19 +165,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -241,23 +241,23 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 0.99 }, { - "color": "yellow", + "color": "dark-yellow", "value": 0.999 }, { - "color": "green", + "color": "dark-green", "value": 0.9999 }, { - "color": "blue", + "color": "dark-blue", "value": 0.99999 } ] @@ -315,19 +315,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -390,19 +390,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -463,19 +463,19 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 18 }, { - "color": "yellow", + "color": "dark-yellow", "value": 19 }, { - "color": "green", + "color": "dark-green", "value": 20 } ] @@ -528,19 +528,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -605,19 +605,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -686,7 +686,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -755,7 +755,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -818,19 +818,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -893,19 +893,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -972,7 +972,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1039,7 +1039,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1108,7 +1108,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1206,7 +1206,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1353,7 +1353,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1451,7 +1451,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1936,11 +1936,11 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -1972,15 +1972,15 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 99 }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -2000,11 +2000,11 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "dark-blue", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -2091,19 +2091,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 6 }, { - "color": "orange", + "color": "dark-orange", "value": 24 }, { - "color": "red", + "color": "dark-red", "value": 48 } ] @@ -2189,7 +2189,7 @@ "id": "color", "value": { "mode": "fixed", - "fixedColor": "green" + "fixedColor": "dark-green" } } ] @@ -2204,7 +2204,7 @@ "id": "color", "value": { "mode": "fixed", - "fixedColor": "red" + "fixedColor": "dark-red" } } ] @@ -2384,7 +2384,7 @@ "id": "color", "value": { "mode": "fixed", - "fixedColor": "green" + "fixedColor": "dark-green" } } ] @@ -2399,7 +2399,7 @@ "id": "color", "value": { "mode": "fixed", - "fixedColor": "red" + "fixedColor": "dark-red" } } ] @@ -2491,7 +2491,7 @@ "id": "color", "value": { "mode": "fixed", - "fixedColor": "green" + "fixedColor": "dark-green" } } ] @@ -2506,7 +2506,7 @@ "id": "color", "value": { "mode": "fixed", - "fixedColor": "red" + "fixedColor": "dark-red" } } ] @@ -2583,19 +2583,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 20 }, { - "color": "orange", + "color": "dark-orange", "value": 40 }, { - "color": "red", + "color": "dark-red", "value": 50 } ] @@ -2668,7 +2668,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -2739,19 +2739,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 5 }, { - "color": "orange", + "color": "dark-orange", "value": 8 }, { - "color": "red", + "color": "dark-red", "value": 10 } ] @@ -2836,19 +2836,19 @@ "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 90 }, { - "color": "yellow", + "color": "dark-yellow", "value": 95 }, { - "color": "green", + "color": "dark-green", "value": 98 } ] @@ -2912,19 +2912,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 70 }, { - "color": "orange", + "color": "dark-orange", "value": 85 }, { - "color": "red", + "color": "dark-red", "value": 95 } ] @@ -2994,7 +2994,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -3057,7 +3057,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -3541,19 +3541,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 100 } ] diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index a20bff4c..a5c630b7 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -34,11 +34,11 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -94,11 +94,11 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -154,11 +154,11 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -214,11 +214,11 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -457,19 +457,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 100 } ] diff --git a/services/monitoring/dashboards/atlas-power.json b/services/monitoring/dashboards/atlas-power.json index 7d336fed..a1e7882b 100644 --- a/services/monitoring/dashboards/atlas-power.json +++ b/services/monitoring/dashboards/atlas-power.json @@ -70,7 +70,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -318,7 +318,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -537,15 +537,15 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 7 }, { - "color": "red", + "color": "dark-red", "value": 9 } ] diff --git a/services/monitoring/dashboards/atlas-storage.json b/services/monitoring/dashboards/atlas-storage.json index 0eca11c4..aaf6612a 100644 --- a/services/monitoring/dashboards/atlas-storage.json +++ b/services/monitoring/dashboards/atlas-storage.json @@ -34,19 +34,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -102,19 +102,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -174,7 +174,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -234,7 +234,7 @@ "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -440,19 +440,19 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -508,15 +508,15 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 3600 }, { - "color": "red", + "color": "dark-red", "value": 10800 } ] diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index 8a50f5f7..4008032e 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -985,7 +985,7 @@ { "id": 501, "type": "row", - "title": "Failure Trends By Check", + "title": "Check Failure Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -997,7 +997,7 @@ { "id": 130, "type": "timeseries", - "title": "Failure Trend: Tests", + "title": "Tests Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1010,17 +1010,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1034,12 +1058,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 131, "type": "timeseries", - "title": "Failure Trend: Coverage", + "title": "Coverage Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1052,17 +1076,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1076,12 +1124,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 132, "type": "timeseries", - "title": "Failure Trend: LOC", + "title": "LOC Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1094,17 +1142,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1118,12 +1190,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 133, "type": "timeseries", - "title": "Failure Trend: Style", + "title": "Style Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1136,17 +1208,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1160,12 +1256,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 134, "type": "timeseries", - "title": "Failure Trend: Gate Glue", + "title": "Gate Glue Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1178,17 +1274,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1202,12 +1322,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 135, "type": "timeseries", - "title": "Failure Trend: SonarQube", + "title": "SonarQube Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1220,17 +1340,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1244,12 +1388,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 136, "type": "timeseries", - "title": "Failure Trend: Supply Chain", + "title": "Supply Chain Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1262,17 +1406,41 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1286,14 +1454,14 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." } ] }, { "id": 502, "type": "row", - "title": "Success Trends By Check", + "title": "Check Healthy Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -1305,7 +1473,7 @@ { "id": 138, "type": "timeseries", - "title": "Success Trend: Tests", + "title": "Tests Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1318,17 +1486,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1342,12 +1538,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 139, "type": "timeseries", - "title": "Success Trend: Coverage", + "title": "Coverage Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1360,17 +1556,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1384,12 +1608,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 140, "type": "timeseries", - "title": "Success Trend: LOC", + "title": "LOC Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1402,17 +1626,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1426,12 +1678,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 141, "type": "timeseries", - "title": "Success Trend: Style", + "title": "Style Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1444,17 +1696,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1468,12 +1748,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 142, "type": "timeseries", - "title": "Success Trend: Gate Glue", + "title": "Gate Glue Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1486,17 +1766,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1510,12 +1818,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 143, "type": "timeseries", - "title": "Success Trend: SonarQube", + "title": "SonarQube Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1528,17 +1836,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1552,12 +1888,12 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 144, "type": "timeseries", - "title": "Success Trend: Supply Chain", + "title": "Supply Chain Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1570,17 +1906,45 @@ }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1594,7 +1958,7 @@ "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." } ] }, @@ -1619,14 +1983,14 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 10, - "w": 24, + "h": 8, + "w": 12, "x": 0, - "y": 63 + "y": 74 }, "targets": [ { - "expr": "(topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}))) or on() vector(0)", + "expr": "(topk(12, ((sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite, test) (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval])))))) or on() vector(0)", "refId": "A", "legendFormat": "{{suite}} - {{test}}" } @@ -1766,13 +2130,13 @@ }, "gridPos": { "h": 8, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 74 }, "targets": [ { - "expr": "sort_desc((topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\",exported_job=\"platform-quality-ci\"}[30d])))) or on() vector(0))", + "expr": "sort_desc((topk by (suite) (1, ((sum by (suite, test) (sum_over_time(platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}[30d:1h]))) or on(suite, test) (sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[30d])))))) or on() vector(0))", "refId": "A", "legendFormat": "{{suite}} \u00b7 {{test}}", "instant": true @@ -1965,17 +2329,17 @@ "targets": [ { "refId": "A", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} passed" }, { "refId": "B", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} failed" }, { "refId": "C", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"skipped\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} skipped" } ], @@ -2120,7 +2484,7 @@ }, "targets": [ { - "expr": "avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\"})", + "expr": "((100 * (((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))) / clamp_min(((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=~\"passed|failed|error|skipped\"}) or on(suite) sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=~\"passed|failed|error|skipped\"}[$__interval])))), 1)) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2303,7 +2667,7 @@ "h": 7, "w": 6, "x": 0, - "y": 81 + "y": 94 }, "targets": [ { @@ -2385,7 +2749,7 @@ "h": 7, "w": 6, "x": 6, - "y": 81 + "y": 94 }, "targets": [ { @@ -2467,7 +2831,7 @@ "h": 7, "w": 6, "x": 12, - "y": 81 + "y": 94 }, "targets": [ { @@ -2549,7 +2913,7 @@ "h": 7, "w": 6, "x": 18, - "y": 81 + "y": 94 }, "targets": [ { @@ -2628,10 +2992,10 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, + "h": 7, + "w": 6, "x": 0, - "y": 94 + "y": 101 }, "targets": [ { @@ -2710,10 +3074,10 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 94 + "h": 7, + "w": 6, + "x": 6, + "y": 101 }, "targets": [ { @@ -2793,9 +3157,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 100 + "w": 6, + "x": 12, + "y": 101 }, "targets": [ { @@ -2960,9 +3324,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 100 + "w": 6, + "x": 18, + "y": 101 }, "targets": [ { diff --git a/services/monitoring/grafana-dashboard-gitops.yaml b/services/monitoring/grafana-dashboard-gitops.yaml index caef98c9..349a7bda 100644 --- a/services/monitoring/grafana-dashboard-gitops.yaml +++ b/services/monitoring/grafana-dashboard-gitops.yaml @@ -45,11 +45,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -106,15 +106,15 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 99 }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -171,11 +171,11 @@ data: "mode": "absolute", "steps": [ { - "color": "blue", + "color": "dark-blue", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -231,15 +231,15 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 99 }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -296,11 +296,11 @@ data: "mode": "absolute", "steps": [ { - "color": "blue", + "color": "dark-blue", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -364,11 +364,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -403,11 +403,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -515,11 +515,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -583,11 +583,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] @@ -651,11 +651,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 1 } ] diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index b785beec..1a87c9a6 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -994,7 +994,7 @@ data: { "id": 501, "type": "row", - "title": "Failure Trends By Check", + "title": "Check Failure Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -1006,7 +1006,7 @@ data: { "id": 130, "type": "timeseries", - "title": "Failure Trend: Tests", + "title": "Tests Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1019,17 +1019,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1043,12 +1067,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 131, "type": "timeseries", - "title": "Failure Trend: Coverage", + "title": "Coverage Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1061,17 +1085,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1085,12 +1133,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 132, "type": "timeseries", - "title": "Failure Trend: LOC", + "title": "LOC Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1103,17 +1151,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1127,12 +1199,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 133, "type": "timeseries", - "title": "Failure Trend: Style", + "title": "Style Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1145,17 +1217,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1169,12 +1265,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 134, "type": "timeseries", - "title": "Failure Trend: Gate Glue", + "title": "Gate Glue Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1187,17 +1283,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1211,12 +1331,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 135, "type": "timeseries", - "title": "Failure Trend: SonarQube", + "title": "SonarQube Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1229,17 +1349,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1253,12 +1397,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 136, "type": "timeseries", - "title": "Failure Trend: Supply Chain", + "title": "Supply Chain Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1271,17 +1415,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1295,14 +1463,14 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." } ] }, { "id": 502, "type": "row", - "title": "Success Trends By Check", + "title": "Check Healthy Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -1314,7 +1482,7 @@ data: { "id": 138, "type": "timeseries", - "title": "Success Trend: Tests", + "title": "Tests Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1327,17 +1495,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1351,12 +1547,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 139, "type": "timeseries", - "title": "Success Trend: Coverage", + "title": "Coverage Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1369,17 +1565,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1393,12 +1617,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 140, "type": "timeseries", - "title": "Success Trend: LOC", + "title": "LOC Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1411,17 +1635,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1435,12 +1687,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 141, "type": "timeseries", - "title": "Success Trend: Style", + "title": "Style Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1453,17 +1705,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1477,12 +1757,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 142, "type": "timeseries", - "title": "Success Trend: Gate Glue", + "title": "Gate Glue Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1495,17 +1775,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1519,12 +1827,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 143, "type": "timeseries", - "title": "Success Trend: SonarQube", + "title": "SonarQube Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1537,17 +1845,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1561,12 +1897,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 144, "type": "timeseries", - "title": "Success Trend: Supply Chain", + "title": "Supply Chain Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1579,17 +1915,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1603,7 +1967,7 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." } ] }, @@ -1628,14 +1992,14 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 10, - "w": 24, + "h": 8, + "w": 12, "x": 0, - "y": 63 + "y": 74 }, "targets": [ { - "expr": "(topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}))) or on() vector(0)", + "expr": "(topk(12, ((sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite, test) (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval])))))) or on() vector(0)", "refId": "A", "legendFormat": "{{suite}} - {{test}}" } @@ -1775,13 +2139,13 @@ data: }, "gridPos": { "h": 8, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 74 }, "targets": [ { - "expr": "sort_desc((topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\",exported_job=\"platform-quality-ci\"}[30d])))) or on() vector(0))", + "expr": "sort_desc((topk by (suite) (1, ((sum by (suite, test) (sum_over_time(platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}[30d:1h]))) or on(suite, test) (sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[30d])))))) or on() vector(0))", "refId": "A", "legendFormat": "{{suite}} \u00b7 {{test}}", "instant": true @@ -1974,17 +2338,17 @@ data: "targets": [ { "refId": "A", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} passed" }, { "refId": "B", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} failed" }, { "refId": "C", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"skipped\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} skipped" } ], @@ -2129,7 +2493,7 @@ data: }, "targets": [ { - "expr": "avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\"})", + "expr": "((100 * (((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))) / clamp_min(((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=~\"passed|failed|error|skipped\"}) or on(suite) sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=~\"passed|failed|error|skipped\"}[$__interval])))), 1)) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2312,7 +2676,7 @@ data: "h": 7, "w": 6, "x": 0, - "y": 81 + "y": 94 }, "targets": [ { @@ -2394,7 +2758,7 @@ data: "h": 7, "w": 6, "x": 6, - "y": 81 + "y": 94 }, "targets": [ { @@ -2476,7 +2840,7 @@ data: "h": 7, "w": 6, "x": 12, - "y": 81 + "y": 94 }, "targets": [ { @@ -2558,7 +2922,7 @@ data: "h": 7, "w": 6, "x": 18, - "y": 81 + "y": 94 }, "targets": [ { @@ -2637,10 +3001,10 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, + "h": 7, + "w": 6, "x": 0, - "y": 94 + "y": 101 }, "targets": [ { @@ -2719,10 +3083,10 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 94 + "h": 7, + "w": 6, + "x": 6, + "y": 101 }, "targets": [ { @@ -2802,9 +3166,9 @@ data: }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 100 + "w": 6, + "x": 12, + "y": 101 }, "targets": [ { @@ -2969,9 +3333,9 @@ data: }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 100 + "w": 6, + "x": 18, + "y": 101 }, "targets": [ { diff --git a/services/monitoring/grafana-dashboard-mail.yaml b/services/monitoring/grafana-dashboard-mail.yaml index 4c011a89..9a10afab 100644 --- a/services/monitoring/grafana-dashboard-mail.yaml +++ b/services/monitoring/grafana-dashboard-mail.yaml @@ -47,7 +47,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -108,7 +108,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -173,19 +173,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 5 }, { - "color": "orange", + "color": "dark-orange", "value": 8 }, { - "color": "red", + "color": "dark-red", "value": 10 } ] @@ -263,19 +263,19 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 90 }, { - "color": "yellow", + "color": "dark-yellow", "value": 95 }, { - "color": "green", + "color": "dark-green", "value": 98 } ] @@ -332,19 +332,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 70 }, { - "color": "orange", + "color": "dark-orange", "value": 85 }, { - "color": "red", + "color": "dark-red", "value": 95 } ] @@ -405,7 +405,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -466,7 +466,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -527,7 +527,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] diff --git a/services/monitoring/grafana-dashboard-network.yaml b/services/monitoring/grafana-dashboard-network.yaml index a87600f3..a87cf866 100644 --- a/services/monitoring/grafana-dashboard-network.yaml +++ b/services/monitoring/grafana-dashboard-network.yaml @@ -43,19 +43,19 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 0.995 }, { - "color": "yellow", + "color": "dark-yellow", "value": 0.999 }, { - "color": "green", + "color": "dark-green", "value": 0.9995 } ] @@ -112,19 +112,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 4 } ] @@ -181,19 +181,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 4 } ] @@ -250,19 +250,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 200 }, { - "color": "orange", + "color": "dark-orange", "value": 350 }, { - "color": "red", + "color": "dark-red", "value": 500 } ] @@ -323,7 +323,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -383,7 +383,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -443,7 +443,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 0131c74e..05895e21 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -47,7 +47,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -108,7 +108,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -169,7 +169,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -225,19 +225,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 0.05 }, { - "color": "orange", + "color": "dark-orange", "value": 0.2 }, { - "color": "red", + "color": "dark-red", "value": 0.5 } ] @@ -294,19 +294,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 250 }, { - "color": "orange", + "color": "dark-orange", "value": 400 }, { - "color": "red", + "color": "dark-red", "value": 600 } ] @@ -363,19 +363,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 100 }, { - "color": "red", + "color": "dark-red", "value": 200 } ] diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 3a1434eb..b5d8d42b 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -44,11 +44,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "green", + "color": "dark-green", "value": 3 } ] @@ -99,19 +99,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -174,19 +174,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -250,23 +250,23 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 0.99 }, { - "color": "yellow", + "color": "dark-yellow", "value": 0.999 }, { - "color": "green", + "color": "dark-green", "value": 0.9999 }, { - "color": "blue", + "color": "dark-blue", "value": 0.99999 } ] @@ -324,19 +324,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -399,19 +399,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 1 }, { - "color": "orange", + "color": "dark-orange", "value": 2 }, { - "color": "red", + "color": "dark-red", "value": 3 } ] @@ -472,19 +472,19 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 18 }, { - "color": "yellow", + "color": "dark-yellow", "value": 19 }, { - "color": "green", + "color": "dark-green", "value": 20 } ] @@ -537,19 +537,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -614,19 +614,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -695,7 +695,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -764,7 +764,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -827,19 +827,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -902,19 +902,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -981,7 +981,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1048,7 +1048,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1117,7 +1117,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1215,7 +1215,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1362,7 +1362,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1460,7 +1460,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -1945,11 +1945,11 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -1981,15 +1981,15 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 99 }, { - "color": "blue", + "color": "dark-blue", "value": 100 } ] @@ -2009,11 +2009,11 @@ data: "mode": "absolute", "steps": [ { - "color": "blue", + "color": "dark-blue", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -2100,19 +2100,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 6 }, { - "color": "orange", + "color": "dark-orange", "value": 24 }, { - "color": "red", + "color": "dark-red", "value": 48 } ] @@ -2198,7 +2198,7 @@ data: "id": "color", "value": { "mode": "fixed", - "fixedColor": "green" + "fixedColor": "dark-green" } } ] @@ -2213,7 +2213,7 @@ data: "id": "color", "value": { "mode": "fixed", - "fixedColor": "red" + "fixedColor": "dark-red" } } ] @@ -2393,7 +2393,7 @@ data: "id": "color", "value": { "mode": "fixed", - "fixedColor": "green" + "fixedColor": "dark-green" } } ] @@ -2408,7 +2408,7 @@ data: "id": "color", "value": { "mode": "fixed", - "fixedColor": "red" + "fixedColor": "dark-red" } } ] @@ -2500,7 +2500,7 @@ data: "id": "color", "value": { "mode": "fixed", - "fixedColor": "green" + "fixedColor": "dark-green" } } ] @@ -2515,7 +2515,7 @@ data: "id": "color", "value": { "mode": "fixed", - "fixedColor": "red" + "fixedColor": "dark-red" } } ] @@ -2592,19 +2592,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 20 }, { - "color": "orange", + "color": "dark-orange", "value": 40 }, { - "color": "red", + "color": "dark-red", "value": 50 } ] @@ -2677,7 +2677,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -2748,19 +2748,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 5 }, { - "color": "orange", + "color": "dark-orange", "value": 8 }, { - "color": "red", + "color": "dark-red", "value": 10 } ] @@ -2845,19 +2845,19 @@ data: "mode": "absolute", "steps": [ { - "color": "red", + "color": "dark-red", "value": null }, { - "color": "orange", + "color": "dark-orange", "value": 90 }, { - "color": "yellow", + "color": "dark-yellow", "value": 95 }, { - "color": "green", + "color": "dark-green", "value": 98 } ] @@ -2921,19 +2921,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 70 }, { - "color": "orange", + "color": "dark-orange", "value": 85 }, { - "color": "red", + "color": "dark-red", "value": 95 } ] @@ -3003,7 +3003,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -3066,7 +3066,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -3550,19 +3550,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 100 } ] diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 419b2839..3cd54a77 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -43,11 +43,11 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -103,11 +103,11 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -163,11 +163,11 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -223,11 +223,11 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "red", + "color": "dark-red", "value": 1 } ] @@ -466,19 +466,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 100 } ] diff --git a/services/monitoring/grafana-dashboard-power.yaml b/services/monitoring/grafana-dashboard-power.yaml index 762cb733..08fb6f37 100644 --- a/services/monitoring/grafana-dashboard-power.yaml +++ b/services/monitoring/grafana-dashboard-power.yaml @@ -79,7 +79,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -327,7 +327,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -546,15 +546,15 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 7 }, { - "color": "red", + "color": "dark-red", "value": 9 } ] diff --git a/services/monitoring/grafana-dashboard-storage.yaml b/services/monitoring/grafana-dashboard-storage.yaml index d25e922a..21912d61 100644 --- a/services/monitoring/grafana-dashboard-storage.yaml +++ b/services/monitoring/grafana-dashboard-storage.yaml @@ -43,19 +43,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -111,19 +111,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -183,7 +183,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -243,7 +243,7 @@ data: "value": null }, { - "color": "green", + "color": "dark-green", "value": 1 } ] @@ -449,19 +449,19 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 50 }, { - "color": "orange", + "color": "dark-orange", "value": 75 }, { - "color": "red", + "color": "dark-red", "value": 91.5 } ] @@ -517,15 +517,15 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "dark-green", "value": null }, { - "color": "yellow", + "color": "dark-yellow", "value": 3600 }, { - "color": "red", + "color": "dark-red", "value": 10800 } ] diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index 321d9ea5..84cf9ca7 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -994,7 +994,7 @@ data: { "id": 501, "type": "row", - "title": "Failure Trends By Check", + "title": "Check Failure Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -1006,7 +1006,7 @@ data: { "id": 130, "type": "timeseries", - "title": "Failure Trend: Tests", + "title": "Tests Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1019,17 +1019,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1043,12 +1067,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 131, "type": "timeseries", - "title": "Failure Trend: Coverage", + "title": "Coverage Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1061,17 +1085,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1085,12 +1133,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 132, "type": "timeseries", - "title": "Failure Trend: LOC", + "title": "LOC Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1103,17 +1151,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1127,12 +1199,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 133, "type": "timeseries", - "title": "Failure Trend: Style", + "title": "Style Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1145,17 +1217,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1169,12 +1265,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 134, "type": "timeseries", - "title": "Failure Trend: Gate Glue", + "title": "Gate Glue Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1187,17 +1283,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1211,12 +1331,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 135, "type": "timeseries", - "title": "Failure Trend: SonarQube", + "title": "SonarQube Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1229,17 +1349,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1253,12 +1397,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." }, { "id": 136, "type": "timeseries", - "title": "Failure Trend: Supply Chain", + "title": "Supply Chain Failure Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1271,17 +1415,41 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 1 + }, + { + "color": "dark-orange", + "value": 3 + }, + { + "color": "dark-red", + "value": 5 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1295,14 +1463,14 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current bad-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart." } ] }, { "id": 502, "type": "row", - "title": "Success Trends By Check", + "title": "Check Healthy Rates By Suite", "gridPos": { "h": 1, "w": 24, @@ -1314,7 +1482,7 @@ data: { "id": 138, "type": "timeseries", - "title": "Success Trend: Tests", + "title": "Tests Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1327,17 +1495,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1351,12 +1547,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 139, "type": "timeseries", - "title": "Success Trend: Coverage", + "title": "Coverage Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1369,17 +1565,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1393,12 +1617,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 140, "type": "timeseries", - "title": "Success Trend: LOC", + "title": "LOC Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1411,17 +1635,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1435,12 +1687,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 141, "type": "timeseries", - "title": "Success Trend: Style", + "title": "Style Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1453,17 +1705,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1477,12 +1757,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 142, "type": "timeseries", - "title": "Success Trend: Gate Glue", + "title": "Gate Glue Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1495,17 +1775,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1519,12 +1827,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 143, "type": "timeseries", - "title": "Success Trend: SonarQube", + "title": "SonarQube Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1537,17 +1845,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1561,12 +1897,12 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." }, { "id": 144, "type": "timeseries", - "title": "Success Trend: Supply Chain", + "title": "Supply Chain Healthy Rate", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1579,17 +1915,45 @@ data: }, "targets": [ { - "expr": "(sum by (suite) (increase({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))", + "expr": "(100 * (sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0)))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1)) and on(suite) ((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))) > 0)", "refId": "A", "legendFormat": "{{suite}}" } ], "fieldConfig": { "defaults": { - "unit": "none", + "unit": "percent", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, "min": 0, + "max": 100, "custom": { - "spanNulls": true + "spanNulls": true, + "showPoints": "never", + "lineWidth": 2 } }, "overrides": [] @@ -1603,7 +1967,7 @@ data: "mode": "multi" } }, - "description": "One line per selected suite, counting check events in this state during each bucket. Intervals without runs stay at zero rather than being treated as failures." + "description": "Current acceptable-state percentage for this check family, evaluated over time. Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence." } ] }, @@ -1628,14 +1992,14 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 10, - "w": 24, + "h": 8, + "w": 12, "x": 0, - "y": 63 + "y": 74 }, "targets": [ { - "expr": "(topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}))) or on() vector(0)", + "expr": "(topk(12, ((sum by (suite, test) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite, test) (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval])))))) or on() vector(0)", "refId": "A", "legendFormat": "{{suite}} - {{test}}" } @@ -1775,13 +2139,13 @@ data: }, "gridPos": { "h": 8, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 74 }, "targets": [ { - "expr": "sort_desc((topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\",exported_job=\"platform-quality-ci\"}[30d])))) or on() vector(0))", + "expr": "sort_desc((topk by (suite) (1, ((sum by (suite, test) (sum_over_time(platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}[30d:1h]))) or on(suite, test) (sum by (suite, test) (increase(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[30d])))))) or on() vector(0))", "refId": "A", "legendFormat": "{{suite}} \u00b7 {{test}}", "instant": true @@ -1974,17 +2338,17 @@ data: "targets": [ { "refId": "A", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} passed" }, { "refId": "B", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"failed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"failed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} failed" }, { "refId": "C", - "expr": "sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"}) or on() vector(0)", + "expr": "((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"skipped\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"skipped\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "legendFormat": "{{suite}} skipped" } ], @@ -2129,7 +2493,7 @@ data: }, "targets": [ { - "expr": "avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\"})", + "expr": "((100 * (((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=\"passed\"})) or on(suite) (sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=\"passed\"}[$__interval]))) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))) / clamp_min(((sum by (suite) (platform_quality:test_case_status:count_1h{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",status=~\"passed|failed|error|skipped\"}) or on(suite) sum by (suite) (max_over_time(platform_quality_gate_test_case_result{suite=~\"${suite:regex}\",branch=~\"${branch:regex}\",test=~\"${test:regex}\",test!=\"__no_test_cases__\",exported_job=\"platform-quality-ci\",status=~\"passed|failed|error|skipped\"}[$__interval])))), 1)) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2312,7 +2676,7 @@ data: "h": 7, "w": 6, "x": 0, - "y": 81 + "y": 94 }, "targets": [ { @@ -2394,7 +2758,7 @@ data: "h": 7, "w": 6, "x": 6, - "y": 81 + "y": 94 }, "targets": [ { @@ -2476,7 +2840,7 @@ data: "h": 7, "w": 6, "x": 12, - "y": 81 + "y": 94 }, "targets": [ { @@ -2558,7 +2922,7 @@ data: "h": 7, "w": 6, "x": 18, - "y": 81 + "y": 94 }, "targets": [ { @@ -2637,10 +3001,10 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, + "h": 7, + "w": 6, "x": 0, - "y": 94 + "y": 101 }, "targets": [ { @@ -2719,10 +3083,10 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 94 + "h": 7, + "w": 6, + "x": 6, + "y": 101 }, "targets": [ { @@ -2802,9 +3166,9 @@ data: }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 100 + "w": 6, + "x": 12, + "y": 101 }, "targets": [ { @@ -2969,9 +3333,9 @@ data: }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 100 + "w": 6, + "x": 18, + "y": 101 }, "targets": [ { diff --git a/services/monitoring/vmalert-atlas-availability.yaml b/services/monitoring/vmalert-atlas-availability.yaml index 3e82baf3..b80525eb 100644 --- a/services/monitoring/vmalert-atlas-availability.yaml +++ b/services/monitoring/vmalert-atlas-availability.yaml @@ -147,7 +147,7 @@ data: - record: platform_quality:test_case_status:count_1h expr: | sum by (suite, branch, test, status) ( - max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__"}[1h]) + max_over_time(platform_quality_gate_test_case_result{exported_job="platform-quality-ci",test!="__no_test_cases__"}[1h]) ) labels: rollup: hourly @@ -155,13 +155,13 @@ data: expr: | 100 * ( sum by (suite, branch, test) ( - max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status="passed"}[1h]) + max_over_time(platform_quality_gate_test_case_result{exported_job="platform-quality-ci",test!="__no_test_cases__",status="passed"}[1h]) ) ) / clamp_min( sum by (suite, branch, test) ( - max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status=~"passed|failed|error|skipped"}[1h]) + max_over_time(platform_quality_gate_test_case_result{exported_job="platform-quality-ci",test!="__no_test_cases__",status=~"passed|failed|error|skipped"}[1h]) ), 1 ) @@ -196,7 +196,7 @@ spec: labels: app: vmalert-atlas-availability annotations: - bstein.dev/rules-revision: "2026-05-15-platform-quality-rollups-v1" + bstein.dev/rules-revision: "2026-05-15-platform-quality-rollups-v2" spec: serviceAccountName: vmalert-atlas-availability affinity: