monitoring(testing): surface current gate health

2026-05-20 11:01:28 -03:00 · 2026-05-20 11:01:28 -03:00 · fe37f12e32
commit fe37f12e32
parent f9641a22b8
7 changed files with 686 additions and 273 deletions
--- a/scripts/dashboards_render_atlas.py
+++ b/scripts/dashboards_render_atlas.py
@ -803,10 +803,24 @@ PLATFORM_TEST_CHECK_ROLLUP_OK_FLAGS = (
    f'clamp_max(max by (suite, check) (({PLATFORM_TEST_CHECK_ROLLUP_OK_SELECTOR}) > 0), 1) '
    f'unless on(suite, check) ({PLATFORM_TEST_CHECK_ROLLUP_FAILED_FLAGS})'
 )
-PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE = (
+PLATFORM_TEST_CURRENT_GATE_CHECK_HEALTH_BY_SUITE = (
    f'(100 * sum by (suite) ({PLATFORM_TEST_CHECK_ROLLUP_OK_FLAGS}) '
    f'/ clamp_min(sum by (suite) ({PLATFORM_TEST_CHECK_ROLLUP_SEEN_FLAGS}), 1))'
 )
 PLATFORM_TEST_CATEGORY_HEALTH_BY_SUITE = (
    'min by (suite) (platform_quality:test_case_health_rate:percent_1h{'
    f'suite=~"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}",branch!="",'
    f'branch=~"main|master|origin/main|origin/master",test!="",'
    f'test!="__no_test_cases__",category=~"{PLATFORM_TEST_CATEGORY_REGEX}"'
    "})"
 )
 PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE = (
    f'min by (suite) (({PLATFORM_TEST_CURRENT_GATE_CHECK_HEALTH_BY_SUITE}) '
    f'or ({PLATFORM_TEST_CATEGORY_HEALTH_BY_SUITE}))'
 )
 PLATFORM_TEST_CURRENT_GATE_HEALTH = (
    f"(avg(({PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE})) or on() vector(0))"
 )
 PVC_BACKUP_AGE_HOURS_BY_PVC = "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))"
 ANANKE_SELECTOR = 'job="ananke-power"'
 ANANKE_UPS_DB_NAME = "Pyrphoros"
@ -1742,6 +1756,7 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
    "Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.",
    "Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.",
    "Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.",
    "Current Gate Health": "Current gate-check health across suites; skipped or not-applicable checks count as healthy, failures lower it.",
    "CI Run Success (24h)": "Percent of published quality-gate CI runs that completed successfully in 24h; this is automation health, not raw test pass rate.",
    "Failed Runs (24h)": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look.",
    "Suites With Runs (24h)": "Configured suites with at least one published quality-gate run in 24h; full count means the dashboard is fresh.",
@ -1750,7 +1765,7 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
    "GitOps Health": "Flux readiness and suspension health over time; blue is perfect, warmer colors mean drift or pause.",
    "One-off Job Pods (age hours)": "Temporary job pods by age; low or empty is good, old pods usually need cleanup.",
    "Ariadne Run Volume": "Ariadne automation attempts and failures; attempts show activity, failures show work to investigate.",
-    "Test Category Pass Rate": "Pass rate by test category across all suites; blue means clean categories, warmer colors show problem areas.",
+    "Test Category Health": "Current category health across suites; skipped tests count as healthy, failures lower the lane.",
    "Jenkins Last Success (h, newest first)": "Age of recent Jenkins successes; lower is fresher and better.",
    "Jenkins Last Failure (h, newest first)": "Age of recent Jenkins failures; lower means a failure happened more recently.",
    "PVC Backup Health / Age": "Restic backup age by PVC; lower is better, very old backups mean restore risk.",
@ -1778,19 +1793,21 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
 TESTING_PANEL_DESCRIPTIONS = {
    "Current Gate Health (%)": "Average latest required gate checks passing across selected suites; this is the current quality state.",
    "CI Run Success Rate (24h)": "Percent of selected quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate.",
    "CI Run Success Rate (30d)": "Percent of selected quality-gate CI runs that completed successfully in 30d; higher means more stable automation.",
    "Failed Runs (24h)": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look.",
    "CI Runs (24h)": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale.",
    "Suite Freshness (24h)": "Percent of selected suites with at least one quality-gate CI run in 24h; 100% means inputs are fresh.",
    "Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.",
    "Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.",
-    "Latest Gate Checks Passing by Suite": "Latest required gate checks passing by suite; this includes tests plus coverage, LOC, style, and other gates.",
+    "Latest Gate Health by Suite": "Latest required gate health by suite; skipped and not-applicable results are healthy, failures lower it.",
    "CI Run Success by Suite (24h)": "24h CI run success rate by suite; lower rows mean recent jobs failed, aborted, or could not complete cleanly.",
    "Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.",
    "Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.",
    "CI Runs And Test Result History": "Recent CI run, coverage, LOC, and raw test-result trends for selected suites.",
    "CI Run Success by Suite (7d rolling)": "Seven-day rolling CI run success rate by suite; this is run completion history, not raw test pass history.",
-    "Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.",
+    "Test Category Health History": "Health by test category; skipped tests count as healthy, failures lower the lane.",
    "Daily Run Volume (Selected Scope)": "Rolling daily counts of published quality-gate runs; volume explains confidence.",
    "Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.",
    "Files <=500 LOC History by Suite": "LOC compliance over time; blue lanes mean files stay within the size limit.",
@ -2184,9 +2201,8 @@ def build_overview():
        ],
    }
    overview_avg_coverage = f"(avg(({QUALITY_GATE_COVERAGE_BY_SUITE})) or on() vector(0))"
-    overview_loc_clean_suites = f"(sum(({QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE}) == bool 0) or on() vector(0))"
+    overview_category_health = (
-    overview_category_pass_rate = (
+        'avg by (category) (platform_quality:test_case_health_rate:percent_1h{'
        'avg by (category) (platform_quality:test_case_pass_rate:percent_1h{'
        f'suite=~"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}",branch!="",branch=~"main|master|origin/main|origin/master",'
        f'test!="",test!="__no_test_cases__",category=~"{PLATFORM_TEST_OVERVIEW_CATEGORY_REGEX}"'
        "})"
@ -2449,11 +2465,11 @@ def build_overview():
    flux_source["options"]["text"] = {"titleSize": 10, "valueSize": 14}
    panels.append(flux_source)
    for panel_id, title, expr, y_pos, unit, decimals, thresholds, links in [
-        (151, "CI Run Success (24h)", TEST_SUCCESS_RATE_24H, 9, "percent", 1, test_success_thresholds, "atlas-testing"),
+        (151, "Current Gate Health", PLATFORM_TEST_CURRENT_GATE_HEALTH, 9, "percent", 1, test_success_thresholds, "atlas-testing"),
-        (152, "Failed Runs (24h)", TEST_FAILURES_24H_TOTAL, 11, "none", 0, failure_count_thresholds, "atlas-testing"),
+        (152, "CI Run Success (24h)", TEST_SUCCESS_RATE_24H, 11, "percent", 1, test_success_thresholds, "atlas-testing"),
-        (153, "Suites With Runs (24h)", PLATFORM_TEST_ACTIVE_SUITES_24H, 13, "none", 0, perfect_count_thresholds, "atlas-testing"),
+        (153, "Failed Runs (24h)", TEST_FAILURES_24H_TOTAL, 13, "none", 0, failure_count_thresholds, "atlas-testing"),
-        (154, "Avg Coverage", overview_avg_coverage, 15, "percent", 1, test_success_thresholds, "atlas-testing"),
+        (154, "Suites With Runs (24h)", PLATFORM_TEST_ACTIVE_SUITES_24H, 15, "none", 0, perfect_count_thresholds, "atlas-testing"),
-        (155, "LOC Clean Suites", overview_loc_clean_suites, 17, "none", 0, perfect_count_thresholds, "atlas-testing"),
+        (155, "Avg Coverage", overview_avg_coverage, 17, "percent", 1, test_success_thresholds, "atlas-testing"),
    ]:
        rail_panel = stat_panel(
            panel_id,
@ -2522,8 +2538,8 @@ def build_overview():
    panels.append(
        state_timeline_panel(
            46,
-            "Test Category Pass Rate",
+            "Test Category Health",
-            overview_category_pass_rate,
+            overview_category_health,
            {"h": 6, "w": 6, "x": 15, "y": 13},
            unit="percent",
            min_value=0,
@ -2531,7 +2547,7 @@ def build_overview():
            legend="{{category}}",
            thresholds=test_success_thresholds,
            links=overview_link("atlas-testing"),
-            description="Pass rate by major test category across all suites over the last 24 hours. Blue is clean; warmer colors show categories needing attention.",
+            description="Health by major test category across all suites over the last 24 hours. Skipped tests are healthy; failures and errors lower the lane.",
        )
    )
    panels[-1]["options"]["legend"] = {"displayMode": "list", "placement": "bottom", "showLegend": False}
@ -3933,10 +3949,26 @@ def build_jobs_dashboard():
    current_gate_seen_checks = (
        f"sum by (suite) ({current_gate_seen_vector})"
    )
-    current_gate_health_by_suite = (
+    current_gate_check_health_by_suite = (
        f"((100 * ({current_gate_ok_checks}) / clamp_min(({current_gate_seen_checks}), 1)) "
        f"or on(suite) ({selected_suite_missing}))"
    )
    current_category_health_by_suite = (
        f'min by (suite) (platform_quality:test_case_health_rate:percent_1h{{suite=~"{suite_var}",'
        f'branch!="",branch=~"{branch_var}",test!="",test!="__no_test_cases__",'
        f'category=~"{PLATFORM_TEST_CATEGORY_REGEX}"}})'
    )
    current_gate_health_by_suite = (
        f"((min by (suite) (({current_gate_check_health_by_suite}) "
        f"or ({current_category_health_by_suite}))) or on(suite) ({selected_suite_missing}))"
    )
    current_gate_health = (
        f"(avg(clamp_min(({current_gate_health_by_suite}), 0)) or on() vector(0))"
    )
    suite_freshness_24h = (
        f"100 * (sum(({runs_by_suite_24h}) > bool 0) or on() vector(0)) "
        f"/ clamp_min(count(({selected_suite_universe})), 1)"
    )
    success_history_runs = f'sum by (suite) ({platform_runs_increase(runs_selector, "7d")})'
    success_history_by_suite = (
        f'(100 * sum by (suite) ({platform_runs_increase(runs_success_selector, "7d")}) '
@ -4094,7 +4126,7 @@ def build_jobs_dashboard():
        f'branch!="",branch=~"{branch_var}",test!="",test=~"{test_var}",test!="__no_test_cases__"}})'
    )
    category_pass_rate_history = (
-        f'avg by (category) (platform_quality:test_case_pass_rate:percent_1h{{suite=~"{suite_var}",'
+        f'avg by (category) (platform_quality:test_case_health_rate:percent_1h{{suite=~"{suite_var}",'
        f'branch!="",branch=~"{branch_var}",test!="",test!="__no_test_cases__",'
        f'category=~"{PLATFORM_TEST_CATEGORY_REGEX}"}})'
    )
@ -4195,6 +4227,18 @@ def build_jobs_dashboard():
        ],
    }
    panels.append(
        stat_panel(
            156,
            "Current Gate Health (%)",
            current_gate_health,
            {"h": 5, "w": 3, "x": 0, "y": 0},
            unit="percent",
            decimals=2,
            instant=True,
            thresholds=success_thresholds,
        )
    )
    panels.append(
        stat_panel(
            2,
@ -4244,6 +4288,18 @@ def build_jobs_dashboard():
            },
        )
    )
    panels.append(
        stat_panel(
            157,
            "Suite Freshness (24h)",
            suite_freshness_24h,
            {"h": 5, "w": 3, "x": 15, "y": 0},
            unit="percent",
            decimals=0,
            instant=True,
            thresholds=success_thresholds,
        )
    )
    panels.append(
        stat_panel(
            6,
@ -4271,7 +4327,7 @@ def build_jobs_dashboard():
    panels.append(
        bargauge_panel(
            8,
-            "Latest Gate Checks Passing by Suite",
+            "Latest Gate Health by Suite",
            current_gate_health_by_suite,
            {"h": 8, "w": 8, "x": 0, "y": 5},
            unit="percent",
@ -4286,8 +4342,8 @@ def build_jobs_dashboard():
        {"type": "value", "options": {"-1": {"text": "missing"}}}
    ]
    panels[-1]["description"] = (
-        "Latest pass percentage across required gate checks in the daily freshness window. "
+        "Current health by suite from required gate checks, capped by category-level test health. "
-        "100% means tests and supporting gates recently passed; raw per-test history is tracked separately."
+        "Skipped and not-applicable results are healthy; failures and errors lower the value."
    )
    reliability_suite_panel = bargauge_panel(
        9,
@ -4502,14 +4558,14 @@ def build_jobs_dashboard():
    panels.append(selected_pass_rate_panel)
    category_pass_rate_panel = state_timeline_panel(
        153,
-        "Test Category Pass Rate History",
+        "Test Category Health History",
        category_pass_rate_history,
        {"h": 8, "w": 12, "x": 12, "y": 21},
        thresholds=success_thresholds,
        legend="{{category}}",
        description=(
-            "Pass rate by test category from current per-test metrics. Use the Suite filter to focus one "
+            "Health by test category from current per-test metrics. Use the Suite filter to focus one "
-            "project; no data means that suite has not published category-aware results yet."
+            "project; skipped tests are healthy, while failures and errors lower the lane."
        ),
    )
    category_pass_rate_panel["links"] = jenkins_suite_links()
@ -4734,12 +4790,14 @@ def build_jobs_dashboard():
    # and legend before the operator asks for them.
    panel_by_id = {panel["id"]: panel for panel in panels}
    visible_layout = {
-        2: {"h": 4, "w": 4, "x": 0, "y": 0},
+        156: {"h": 4, "w": 3, "x": 0, "y": 0},
-        3: {"h": 4, "w": 4, "x": 4, "y": 0},
+        2: {"h": 4, "w": 3, "x": 3, "y": 0},
-        4: {"h": 4, "w": 4, "x": 8, "y": 0},
+        3: {"h": 4, "w": 3, "x": 6, "y": 0},
-        5: {"h": 4, "w": 4, "x": 12, "y": 0},
+        4: {"h": 4, "w": 3, "x": 9, "y": 0},
-        6: {"h": 4, "w": 4, "x": 16, "y": 0},
+        5: {"h": 4, "w": 3, "x": 12, "y": 0},
-        7: {"h": 4, "w": 4, "x": 20, "y": 0},
+        157: {"h": 4, "w": 3, "x": 15, "y": 0},
        6: {"h": 4, "w": 3, "x": 18, "y": 0},
        7: {"h": 4, "w": 3, "x": 21, "y": 0},
        8: {"h": 7, "w": 6, "x": 0, "y": 4},
        9: {"h": 7, "w": 6, "x": 6, "y": 4},
        17: {"h": 7, "w": 6, "x": 12, "y": 4},
--- a/scripts/tests/test_dashboards_render_atlas.py
+++ b/scripts/tests/test_dashboards_render_atlas.py
@ -72,14 +72,14 @@ def test_overview_uses_readable_quality_power_and_gitops_panels():
    ]
    assert "atlas-jobs" not in repr(dashboard)
    assert "Platform Test Success Rate" not in panels_by_title
-    assert panels_by_title["Test Category Pass Rate"]["type"] == "state-timeline"
+    assert panels_by_title["Test Category Health"]["type"] == "state-timeline"
-    assert panels_by_title["Test Category Pass Rate"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 13}
+    assert panels_by_title["Test Category Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 13}
-    assert panels_by_title["Test Category Pass Rate"]["targets"][0]["legendFormat"] == "{{category}}"
+    assert panels_by_title["Test Category Health"]["targets"][0]["legendFormat"] == "{{category}}"
-    assert "${overview_suite:regex}" not in panels_by_title["Test Category Pass Rate"]["targets"][0]["expr"]
+    assert "${overview_suite:regex}" not in panels_by_title["Test Category Health"]["targets"][0]["expr"]
-    assert mod.PLATFORM_TEST_SUITE_CANONICAL_MATCHER in panels_by_title["Test Category Pass Rate"]["targets"][0]["expr"]
+    assert mod.PLATFORM_TEST_SUITE_CANONICAL_MATCHER in panels_by_title["Test Category Health"]["targets"][0]["expr"]
-    assert "platform_quality:test_case_pass_rate:percent_1h" in panels_by_title["Test Category Pass Rate"]["targets"][0]["expr"]
+    assert "platform_quality:test_case_health_rate:percent_1h" in panels_by_title["Test Category Health"]["targets"][0]["expr"]
-    assert panels_by_title["Test Category Pass Rate"]["timeFrom"] == "24h"
+    assert panels_by_title["Test Category Health"]["timeFrom"] == "24h"
-    assert f'category=~"{mod.PLATFORM_TEST_OVERVIEW_CATEGORY_REGEX}"' in panels_by_title["Test Category Pass Rate"]["targets"][0]["expr"]
+    assert f'category=~"{mod.PLATFORM_TEST_OVERVIEW_CATEGORY_REGEX}"' in panels_by_title["Test Category Health"]["targets"][0]["expr"]
    assert "manual" not in mod.PLATFORM_TEST_OVERVIEW_CATEGORY_REGEX
    assert "unit" not in mod.PLATFORM_TEST_OVERVIEW_CATEGORY_REGEX
    assert panels_by_title["UPS History (Power Draw)"]["gridPos"] == {"h": 6, "w": 6, "x": 3, "y": 7}
@ -124,25 +124,28 @@ def test_overview_uses_readable_quality_power_and_gitops_panels():
    assert panels_by_title["Flux Source"]["type"] == "stat"
    assert panels_by_title["Flux Source"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 7}
    assert panels_by_title["Flux Source"]["targets"][0]["legendFormat"] == "{{branch}}"
-    assert panels_by_title["CI Run Success (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 9}
+    assert panels_by_title["Current Gate Health"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 9}
-    assert panels_by_title["Suites With Runs (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 13}
+    assert "platform_quality:test_case_health_rate:percent_1h" in panels_by_title["Current Gate Health"]["targets"][0]["expr"]
    assert panels_by_title["CI Run Success (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 11}
    assert panels_by_title["Suites With Runs (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 15}
    suites_reporting_expr = panels_by_title["Suites With Runs (24h)"]["targets"][0]["expr"]
    assert "> bool 0" in suites_reporting_expr
    assert mod.PLATFORM_TEST_SUITE_CANONICAL_MATCHER in suites_reporting_expr
    assert "bstein-home" not in suites_reporting_expr
    assert "published quality-gate run" in panels_by_title["Suites With Runs (24h)"]["description"]
-    assert panels_by_title["LOC Clean Suites"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17}
+    assert panels_by_title["Avg Coverage"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17}
    assert "LOC Clean Suites" not in panels_by_title
    assert panels_by_title["GitOps Health"]["type"] == "state-timeline"
    assert panels_by_title["GitOps Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 7}
    gitops_expr = panels_by_title["GitOps Health"]["targets"][0]["expr"]
    assert "Kustomizations Not Suspended" in gitops_expr
    assert "HelmReleases Not Suspended" in gitops_expr
-    assert panels_by_title["Test Category Pass Rate"]["type"] == "state-timeline"
+    assert panels_by_title["Test Category Health"]["type"] == "state-timeline"
-    assert panels_by_title["Test Category Pass Rate"]["options"]["legend"]["showLegend"] is False
+    assert panels_by_title["Test Category Health"]["options"]["legend"]["showLegend"] is False
-    assert panels_by_title["Test Category Pass Rate"]["options"]["mergeValues"] is False
+    assert panels_by_title["Test Category Health"]["options"]["mergeValues"] is False
-    assert panels_by_title["Test Category Pass Rate"]["options"]["showValue"] == "auto"
+    assert panels_by_title["Test Category Health"]["options"]["showValue"] == "auto"
-    assert panels_by_title["Test Category Pass Rate"]["options"]["rowHeight"] == 0.9
+    assert panels_by_title["Test Category Health"]["options"]["rowHeight"] == 0.9
-    assert panels_by_title["Test Category Pass Rate"]["targets"][0]["legendFormat"] == "{{category}}"
+    assert panels_by_title["Test Category Health"]["targets"][0]["legendFormat"] == "{{category}}"
    assert not any(variable["name"] == "overview_suite" for variable in dashboard["templating"]["list"])
    pvc_backup_expr = panels_by_title["PVC Backup Health / Age"]["targets"][0]["expr"]
@ -214,7 +217,10 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability():
    dashboard = mod.build_jobs_dashboard()
    panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
-    assert "Latest Gate Checks Passing by Suite" in panels_by_title
+    assert "Current Gate Health (%)" in panels_by_title
    assert "Suite Freshness (24h)" in panels_by_title
    assert "Latest Gate Health by Suite" in panels_by_title
    assert "Latest Gate Checks Passing by Suite" not in panels_by_title
    assert "CI Run Success by Suite (24h)" in panels_by_title
    assert "CI Run Success by Suite (7d rolling)" in panels_by_title
    assert "Daily Run Volume (Selected Scope)" in panels_by_title
@ -227,9 +233,10 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability():
    assert "Failures by Suite (24h)" not in panels_by_title
    assert "Success Rate by Suite (24h)" not in panels_by_title
-    current_gate_expr = panels_by_title["Latest Gate Checks Passing by Suite"]["targets"][0]["expr"]
+    current_gate_expr = panels_by_title["Latest Gate Health by Suite"]["targets"][0]["expr"]
    assert 'check)' in current_gate_expr
    assert "platform_quality:check_status:present_1h" in current_gate_expr
    assert "platform_quality:test_case_health_rate:percent_1h" in current_gate_expr
    assert '.*_quality_gate_checks_total' not in current_gate_expr
    assert "last_over_time" not in current_gate_expr
    assert 'label_replace' not in current_gate_expr
@ -237,7 +244,10 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability():
    assert 'status!~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
    assert "unless on(suite, check)" in current_gate_expr
    assert "tlast_over_time" not in current_gate_expr
-    assert panels_by_title["Latest Gate Checks Passing by Suite"]["gridPos"]["w"] == 6
+    assert panels_by_title["Current Gate Health (%)"]["gridPos"] == {"h": 4, "w": 3, "x": 0, "y": 0}
    assert "platform_quality:test_case_health_rate:percent_1h" in panels_by_title["Current Gate Health (%)"]["targets"][0]["expr"]
    assert panels_by_title["Suite Freshness (24h)"]["gridPos"] == {"h": 4, "w": 3, "x": 15, "y": 0}
    assert panels_by_title["Latest Gate Health by Suite"]["gridPos"]["w"] == 6
    assert panels_by_title["CI Run Success by Suite (24h)"]["gridPos"]["w"] == 6
    assert panels_by_title["Coverage by Suite (Latest, gate 95)"]["gridPos"] == {"h": 7, "w": 6, "x": 12, "y": 4}
    assert panels_by_title["Files <=500 LOC by Suite (Latest)"]["gridPos"] == {"h": 7, "w": 6, "x": 18, "y": 4}
@ -255,13 +265,14 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability():
    rolling_panel = panels_by_title["CI Run Success by Suite (7d rolling)"]
    assert rolling_panel["type"] == "state-timeline"
    assert "[7d:1m]" in rolling_panel["targets"][0]["expr"]
-    category_panel = panels_by_title["Test Category Pass Rate History"]
+    category_panel = panels_by_title["Test Category Health History"]
    assert category_panel["type"] == "state-timeline"
    assert "category" in category_panel["targets"][0]["expr"]
    assert "platform_quality:test_case_health_rate:percent_1h" in category_panel["targets"][0]["expr"]
    assert f'category=~"{mod.PLATFORM_TEST_CATEGORY_REGEX}"' in category_panel["targets"][0]["expr"]
    assert "installer" not in mod.PLATFORM_TEST_CATEGORY_REGEX
    assert "Use the Suite filter" in category_panel["description"]
-    assert "category-aware results" in category_panel["description"]
+    assert "skipped tests are healthy" in category_panel["description"]
    coverage_panel = panels_by_title["Coverage History by Suite"]
    loc_panel = panels_by_title["Files <=500 LOC History by Suite"]
@ -311,9 +322,9 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
        for child in row.get("panels", [])
    }
-    assert len(panels) == 16
+    assert len(panels) == 18
-    assert len(visible_query_panels) == 10
+    assert len(visible_query_panels) == 12
-    assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 10
+    assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 12
    assert all(
        panel["title"] != "Coverage Gap to 95% by Suite"
        for panel in visible_query_panels
@ -330,7 +341,7 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
    assert "Coverage Failure Rate" in nested_panels_by_title
    assert "Supply Chain Healthy Rate" in nested_panels_by_title
-    assert "Test Category Pass Rate History" in nested_panels_by_title
+    assert "Test Category Health History" in nested_panels_by_title
    assert "Selected Test Pass Rate History" in nested_panels_by_title
    assert "Coverage Metrics Present by Suite" in nested_panels_by_title
    assert "SonarQube API Up" in nested_panels_by_title
--- a/services/monitoring/dashboards/atlas-overview.json
+++ b/services/monitoring/dashboards/atlas-overview.json
@ -2114,7 +2114,7 @@
    {
      "id": 151,
      "type": "stat",
-      "title": "CI Run Success (24h)",
+      "title": "Current Gate Health",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
@ -2125,6 +2125,92 @@
        "x": 21,
        "y": 9
      },
      "targets": [
        {
          "expr": "(avg((min by (suite) (((100 * sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch=~\"main|master|origin/main|origin/master\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch=~\"main|master|origin/main|origin/master\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))) / clamp_min(sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch=~\"main|master|origin/main|origin/master\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1)), 1))) or (min by (suite) (platform_quality:test_case_health_rate:percent_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch!=\"\",branch=~\"main|master|origin/main|origin/master\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"}))))) or on() vector(0))",
          "refId": "A",
          "instant": true
        }
      ],
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "dark-red",
                "value": null
              },
              {
                "color": "dark-orange",
                "value": 70
              },
              {
                "color": "dark-yellow",
                "value": 85
              },
              {
                "color": "dark-green",
                "value": 95
              },
              {
                "color": "dark-blue",
                "value": 100
              }
            ]
          },
          "unit": "percent",
          "custom": {
            "displayMode": "auto"
          },
          "decimals": 1
        },
        "overrides": []
      },
      "options": {
        "colorMode": "value",
        "graphMode": "none",
        "justifyMode": "center",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "value",
        "text": {
          "titleSize": 10,
          "valueSize": 19
        }
      },
      "links": [
        {
          "title": "Open atlas-testing dashboard",
          "url": "/d/atlas-testing",
          "targetBlank": true
        }
      ],
      "description": "Current gate-check health across suites; skipped or not-applicable checks count as healthy, failures lower it."
    },
    {
      "id": 152,
      "type": "stat",
      "title": "CI Run Success (24h)",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
      },
      "gridPos": {
        "h": 2,
        "w": 3,
        "x": 21,
        "y": 11
      },
      "targets": [
        {
          "expr": "100 * ((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|pegasus-health|pegasus_health|soteria|titan_iac|titan-iac|typhon|bstein_home|bstein-home|data_prepper|data-prepper\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}))[24h:1m])) or on() vector(0))) / clamp_min(((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|pegasus-health|pegasus_health|soteria|titan_iac|titan-iac|typhon|bstein_home|bstein-home|data_prepper|data-prepper\",exported_job=\"platform-quality-ci\"}))[24h:1m])) or on() vector(0))), 1)",
@ -2198,7 +2284,7 @@
      "description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is automation health, not raw test pass rate."
    },
    {
-      "id": 152,
+      "id": 153,
      "type": "stat",
      "title": "Failed Runs (24h)",
      "datasource": {
@ -2209,7 +2295,7 @@
        "h": 2,
        "w": 3,
        "x": 21,
-        "y": 11
+        "y": 13
      },
      "targets": [
        {
@ -2280,7 +2366,7 @@
      "description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look."
    },
    {
-      "id": 153,
+      "id": 154,
      "type": "stat",
      "title": "Suites With Runs (24h)",
      "datasource": {
@ -2291,7 +2377,7 @@
        "h": 2,
        "w": 3,
        "x": 21,
-        "y": 13
+        "y": 15
      },
      "targets": [
        {
@ -2362,7 +2448,7 @@
      "description": "Configured suites with at least one published quality-gate run in 24h; full count means the dashboard is fresh."
    },
    {
-      "id": 154,
+      "id": 155,
      "type": "stat",
      "title": "Avg Coverage",
      "datasource": {
@ -2373,7 +2459,7 @@
        "h": 2,
        "w": 3,
        "x": 21,
-        "y": 15
+        "y": 17
      },
      "targets": [
        {
@ -2447,88 +2533,6 @@
      ],
      "description": "Average latest line coverage across suites; higher means code is better protected by tests."
    },
    {
      "id": 155,
      "type": "stat",
      "title": "LOC Clean Suites",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
      },
      "gridPos": {
        "h": 2,
        "w": 3,
        "x": 21,
        "y": 17
      },
      "targets": [
        {
          "expr": "(sum((max by (suite) ((last_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"}[30d])) and (topk by (suite) (1, tlast_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"}[30d]))))) == bool 0) or on() vector(0))",
          "refId": "A",
          "instant": true
        }
      ],
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "dark-red",
                "value": null
              },
              {
                "color": "dark-yellow",
                "value": 9
              },
              {
                "color": "dark-green",
                "value": 10
              },
              {
                "color": "dark-blue",
                "value": 11
              }
            ]
          },
          "unit": "none",
          "custom": {
            "displayMode": "auto"
          },
          "decimals": 0
        },
        "overrides": []
      },
      "options": {
        "colorMode": "value",
        "graphMode": "none",
        "justifyMode": "center",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "value",
        "text": {
          "titleSize": 10,
          "valueSize": 19
        }
      },
      "links": [
        {
          "title": "Open atlas-testing dashboard",
          "url": "/d/atlas-testing",
          "targetBlank": true
        }
      ],
      "description": "Suites with no source files over 500 LOC; full count is good for maintainability."
    },
    {
      "id": 150,
      "type": "state-timeline",
@ -2795,8 +2799,8 @@
    {
      "id": 46,
      "type": "state-timeline",
-      "title": "Test Category Pass Rate",
+      "title": "Test Category Health",
-      "description": "Pass rate by major test category across all suites over the last 24 hours. Blue is clean; warmer colors show categories needing attention.",
+      "description": "Health by major test category across all suites over the last 24 hours. Skipped tests are healthy; failures and errors lower the lane.",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
@ -2809,7 +2813,7 @@
      },
      "targets": [
        {
-          "expr": "avg by (category) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch!=\"\",branch=~\"main|master|origin/main|origin/master\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|performance|regression|reliability|security|smoke|system|ui\"})",
+          "expr": "avg by (category) (platform_quality:test_case_health_rate:percent_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch!=\"\",branch=~\"main|master|origin/main|origin/master\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|performance|regression|reliability|security|smoke|system|ui\"})",
          "refId": "A",
          "legendFormat": "{{category}}"
        }
--- a/services/monitoring/dashboards/atlas-testing.json
+++ b/services/monitoring/dashboards/atlas-testing.json
@ -4,6 +4,81 @@
  "folderUid": "atlas-public",
  "editable": false,
  "panels": [
    {
      "id": 156,
      "type": "stat",
      "title": "Current Gate Health (%)",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
      },
      "gridPos": {
        "h": 4,
        "w": 3,
        "x": 0,
        "y": 0
      },
      "targets": [
        {
          "expr": "(avg(clamp_min((((min by (suite) ((((100 * (sum by (suite) (((clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1)) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))))) / clamp_min((sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))) or (min by (suite) (platform_quality:test_case_health_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})))) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))), 0)) or on() vector(0))",
          "refId": "A",
          "instant": true
        }
      ],
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "dark-red",
                "value": null
              },
              {
                "color": "dark-orange",
                "value": 90
              },
              {
                "color": "dark-yellow",
                "value": 93
              },
              {
                "color": "dark-green",
                "value": 95
              },
              {
                "color": "dark-blue",
                "value": 100
              }
            ]
          },
          "unit": "percent",
          "custom": {
            "displayMode": "auto"
          },
          "decimals": 2
        },
        "overrides": []
      },
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "center",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "value"
      },
      "description": "Average latest required gate checks passing across selected suites; this is the current quality state."
    },
    {
      "id": 2,
      "type": "stat",
@ -14,8 +89,8 @@
      },
      "gridPos": {
        "h": 4,
-        "w": 4,
+        "w": 3,
-        "x": 0,
+        "x": 3,
        "y": 0
      },
      "targets": [
@ -89,8 +164,8 @@
      },
      "gridPos": {
        "h": 4,
-        "w": 4,
+        "w": 3,
-        "x": 4,
+        "x": 6,
        "y": 0
      },
      "targets": [
@ -164,8 +239,8 @@
      },
      "gridPos": {
        "h": 4,
-        "w": 4,
+        "w": 3,
-        "x": 8,
+        "x": 9,
        "y": 0
      },
      "targets": [
@ -238,7 +313,7 @@
      },
      "gridPos": {
        "h": 4,
-        "w": 4,
+        "w": 3,
        "x": 12,
        "y": 0
      },
@ -290,6 +365,81 @@
      },
      "description": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale."
    },
    {
      "id": 157,
      "type": "stat",
      "title": "Suite Freshness (24h)",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
      },
      "gridPos": {
        "h": 4,
        "w": 3,
        "x": 15,
        "y": 0
      },
      "targets": [
        {
          "expr": "100 * (sum((sum by (suite) (increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}))[24h:1m]))) > bool 0) or on() vector(0)) / clamp_min(count(((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))), 1)",
          "refId": "A",
          "instant": true
        }
      ],
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "dark-red",
                "value": null
              },
              {
                "color": "dark-orange",
                "value": 90
              },
              {
                "color": "dark-yellow",
                "value": 93
              },
              {
                "color": "dark-green",
                "value": 95
              },
              {
                "color": "dark-blue",
                "value": 100
              }
            ]
          },
          "unit": "percent",
          "custom": {
            "displayMode": "auto"
          },
          "decimals": 0
        },
        "overrides": []
      },
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "center",
        "reduceOptions": {
          "calcs": [
            "lastNotNull"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "value"
      },
      "description": "Percent of selected suites with at least one quality-gate CI run in 24h; 100% means inputs are fresh."
    },
    {
      "id": 6,
      "type": "stat",
@ -300,8 +450,8 @@
      },
      "gridPos": {
        "h": 4,
-        "w": 4,
+        "w": 3,
-        "x": 16,
+        "x": 18,
        "y": 0
      },
      "targets": [
@ -375,8 +525,8 @@
      },
      "gridPos": {
        "h": 4,
-        "w": 4,
+        "w": 3,
-        "x": 20,
+        "x": 21,
        "y": 0
      },
      "targets": [
@ -442,7 +592,7 @@
    {
      "id": 8,
      "type": "bargauge",
-      "title": "Latest Gate Checks Passing by Suite",
+      "title": "Latest Gate Health by Suite",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
@ -455,7 +605,7 @@
      },
      "targets": [
        {
-          "expr": "sort(((100 * (sum by (suite) (((clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1)) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))))) / clamp_min((sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1))))",
+          "expr": "sort(((min by (suite) ((((100 * (sum by (suite) (((clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1)) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))))) / clamp_min((sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))) or (min by (suite) (platform_quality:test_case_health_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})))) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1))))",
          "refId": "A",
          "legendFormat": "{{suite}}",
          "instant": true
@ -530,7 +680,7 @@
          }
        }
      ],
-      "description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; raw per-test history is tracked separately."
+      "description": "Current health by suite from required gate checks, capped by category-level test health. Skipped and not-applicable results are healthy; failures and errors lower the value."
    },
    {
      "id": 9,
@ -902,8 +1052,8 @@
        {
          "id": 153,
          "type": "state-timeline",
-          "title": "Test Category Pass Rate History",
+          "title": "Test Category Health History",
-          "description": "Pass rate by test category from current per-test metrics. Use the Suite filter to focus one project; no data means that suite has not published category-aware results yet.",
+          "description": "Health by test category from current per-test metrics. Use the Suite filter to focus one project; skipped tests are healthy, while failures and errors lower the lane.",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
@ -916,7 +1066,7 @@
          },
          "targets": [
            {
-              "expr": "avg by (category) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})",
+              "expr": "avg by (category) (platform_quality:test_case_health_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})",
              "refId": "A",
              "legendFormat": "{{category}}"
            }
--- a/services/monitoring/grafana-dashboard-overview.yaml
+++ b/services/monitoring/grafana-dashboard-overview.yaml
@ -2123,7 +2123,7 @@ data:
        {
          "id": 151,
          "type": "stat",
-          "title": "CI Run Success (24h)",
+          "title": "Current Gate Health",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
@ -2134,6 +2134,92 @@ data:
            "x": 21,
            "y": 9
          },
          "targets": [
            {
              "expr": "(avg((min by (suite) (((100 * sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch=~\"main|master|origin/main|origin/master\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch=~\"main|master|origin/main|origin/master\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))) / clamp_min(sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch=~\"main|master|origin/main|origin/master\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1)), 1))) or (min by (suite) (platform_quality:test_case_health_rate:percent_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch!=\"\",branch=~\"main|master|origin/main|origin/master\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"}))))) or on() vector(0))",
              "refId": "A",
              "instant": true
            }
          ],
          "fieldConfig": {
            "defaults": {
              "color": {
                "mode": "thresholds"
              },
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "dark-red",
                    "value": null
                  },
                  {
                    "color": "dark-orange",
                    "value": 70
                  },
                  {
                    "color": "dark-yellow",
                    "value": 85
                  },
                  {
                    "color": "dark-green",
                    "value": 95
                  },
                  {
                    "color": "dark-blue",
                    "value": 100
                  }
                ]
              },
              "unit": "percent",
              "custom": {
                "displayMode": "auto"
              },
              "decimals": 1
            },
            "overrides": []
          },
          "options": {
            "colorMode": "value",
            "graphMode": "none",
            "justifyMode": "center",
            "reduceOptions": {
              "calcs": [
                "lastNotNull"
              ],
              "fields": "",
              "values": false
            },
            "textMode": "value",
            "text": {
              "titleSize": 10,
              "valueSize": 19
            }
          },
          "links": [
            {
              "title": "Open atlas-testing dashboard",
              "url": "/d/atlas-testing",
              "targetBlank": true
            }
          ],
          "description": "Current gate-check health across suites; skipped or not-applicable checks count as healthy, failures lower it."
        },
        {
          "id": 152,
          "type": "stat",
          "title": "CI Run Success (24h)",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
          },
          "gridPos": {
            "h": 2,
            "w": 3,
            "x": 21,
            "y": 11
          },
          "targets": [
            {
              "expr": "100 * ((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|pegasus-health|pegasus_health|soteria|titan_iac|titan-iac|typhon|bstein_home|bstein-home|data_prepper|data-prepper\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}))[24h:1m])) or on() vector(0))) / clamp_min(((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|pegasus-health|pegasus_health|soteria|titan_iac|titan-iac|typhon|bstein_home|bstein-home|data_prepper|data-prepper\",exported_job=\"platform-quality-ci\"}))[24h:1m])) or on() vector(0))), 1)",
@ -2207,7 +2293,7 @@ data:
          "description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is automation health, not raw test pass rate."
        },
        {
-          "id": 152,
+          "id": 153,
          "type": "stat",
          "title": "Failed Runs (24h)",
          "datasource": {
@ -2218,7 +2304,7 @@ data:
            "h": 2,
            "w": 3,
            "x": 21,
-            "y": 11
+            "y": 13
          },
          "targets": [
            {
@ -2289,7 +2375,7 @@ data:
          "description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look."
        },
        {
-          "id": 153,
+          "id": 154,
          "type": "stat",
          "title": "Suites With Runs (24h)",
          "datasource": {
@ -2300,7 +2386,7 @@ data:
            "h": 2,
            "w": 3,
            "x": 21,
-            "y": 13
+            "y": 15
          },
          "targets": [
            {
@ -2371,7 +2457,7 @@ data:
          "description": "Configured suites with at least one published quality-gate run in 24h; full count means the dashboard is fresh."
        },
        {
-          "id": 154,
+          "id": 155,
          "type": "stat",
          "title": "Avg Coverage",
          "datasource": {
@ -2382,7 +2468,7 @@ data:
            "h": 2,
            "w": 3,
            "x": 21,
-            "y": 15
+            "y": 17
          },
          "targets": [
            {
@ -2456,88 +2542,6 @@ data:
          ],
          "description": "Average latest line coverage across suites; higher means code is better protected by tests."
        },
        {
          "id": 155,
          "type": "stat",
          "title": "LOC Clean Suites",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
          },
          "gridPos": {
            "h": 2,
            "w": 3,
            "x": 21,
            "y": 17
          },
          "targets": [
            {
              "expr": "(sum((max by (suite) ((last_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"}[30d])) and (topk by (suite) (1, tlast_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"}[30d]))))) == bool 0) or on() vector(0))",
              "refId": "A",
              "instant": true
            }
          ],
          "fieldConfig": {
            "defaults": {
              "color": {
                "mode": "thresholds"
              },
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "dark-red",
                    "value": null
                  },
                  {
                    "color": "dark-yellow",
                    "value": 9
                  },
                  {
                    "color": "dark-green",
                    "value": 10
                  },
                  {
                    "color": "dark-blue",
                    "value": 11
                  }
                ]
              },
              "unit": "none",
              "custom": {
                "displayMode": "auto"
              },
              "decimals": 0
            },
            "overrides": []
          },
          "options": {
            "colorMode": "value",
            "graphMode": "none",
            "justifyMode": "center",
            "reduceOptions": {
              "calcs": [
                "lastNotNull"
              ],
              "fields": "",
              "values": false
            },
            "textMode": "value",
            "text": {
              "titleSize": 10,
              "valueSize": 19
            }
          },
          "links": [
            {
              "title": "Open atlas-testing dashboard",
              "url": "/d/atlas-testing",
              "targetBlank": true
            }
          ],
          "description": "Suites with no source files over 500 LOC; full count is good for maintainability."
        },
        {
          "id": 150,
          "type": "state-timeline",
@ -2804,8 +2808,8 @@ data:
        {
          "id": 46,
          "type": "state-timeline",
-          "title": "Test Category Pass Rate",
+          "title": "Test Category Health",
-          "description": "Pass rate by major test category across all suites over the last 24 hours. Blue is clean; warmer colors show categories needing attention.",
+          "description": "Health by major test category across all suites over the last 24 hours. Skipped tests are healthy; failures and errors lower the lane.",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
@ -2818,7 +2822,7 @@ data:
          },
          "targets": [
            {
-              "expr": "avg by (category) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch!=\"\",branch=~\"main|master|origin/main|origin/master\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|performance|regression|reliability|security|smoke|system|ui\"})",
+              "expr": "avg by (category) (platform_quality:test_case_health_rate:percent_1h{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan_iac|typhon|bstein_home|data_prepper\",branch!=\"\",branch=~\"main|master|origin/main|origin/master\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|performance|regression|reliability|security|smoke|system|ui\"})",
              "refId": "A",
              "legendFormat": "{{category}}"
            }
--- a/services/monitoring/grafana-dashboard-testing.yaml
+++ b/services/monitoring/grafana-dashboard-testing.yaml
@ -13,6 +13,81 @@ data:
      "folderUid": "atlas-public",
      "editable": false,
      "panels": [
        {
          "id": 156,
          "type": "stat",
          "title": "Current Gate Health (%)",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
          },
          "gridPos": {
            "h": 4,
            "w": 3,
            "x": 0,
            "y": 0
          },
          "targets": [
            {
              "expr": "(avg(clamp_min((((min by (suite) ((((100 * (sum by (suite) (((clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1)) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))))) / clamp_min((sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))) or (min by (suite) (platform_quality:test_case_health_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})))) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))), 0)) or on() vector(0))",
              "refId": "A",
              "instant": true
            }
          ],
          "fieldConfig": {
            "defaults": {
              "color": {
                "mode": "thresholds"
              },
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "dark-red",
                    "value": null
                  },
                  {
                    "color": "dark-orange",
                    "value": 90
                  },
                  {
                    "color": "dark-yellow",
                    "value": 93
                  },
                  {
                    "color": "dark-green",
                    "value": 95
                  },
                  {
                    "color": "dark-blue",
                    "value": 100
                  }
                ]
              },
              "unit": "percent",
              "custom": {
                "displayMode": "auto"
              },
              "decimals": 2
            },
            "overrides": []
          },
          "options": {
            "colorMode": "value",
            "graphMode": "area",
            "justifyMode": "center",
            "reduceOptions": {
              "calcs": [
                "lastNotNull"
              ],
              "fields": "",
              "values": false
            },
            "textMode": "value"
          },
          "description": "Average latest required gate checks passing across selected suites; this is the current quality state."
        },
        {
          "id": 2,
          "type": "stat",
@ -23,8 +98,8 @@ data:
          },
          "gridPos": {
            "h": 4,
-            "w": 4,
+            "w": 3,
-            "x": 0,
+            "x": 3,
            "y": 0
          },
          "targets": [
@ -98,8 +173,8 @@ data:
          },
          "gridPos": {
            "h": 4,
-            "w": 4,
+            "w": 3,
-            "x": 4,
+            "x": 6,
            "y": 0
          },
          "targets": [
@ -173,8 +248,8 @@ data:
          },
          "gridPos": {
            "h": 4,
-            "w": 4,
+            "w": 3,
-            "x": 8,
+            "x": 9,
            "y": 0
          },
          "targets": [
@ -247,7 +322,7 @@ data:
          },
          "gridPos": {
            "h": 4,
-            "w": 4,
+            "w": 3,
            "x": 12,
            "y": 0
          },
@ -299,6 +374,81 @@ data:
          },
          "description": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale."
        },
        {
          "id": 157,
          "type": "stat",
          "title": "Suite Freshness (24h)",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
          },
          "gridPos": {
            "h": 4,
            "w": 3,
            "x": 15,
            "y": 0
          },
          "targets": [
            {
              "expr": "100 * (sum((sum by (suite) (increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}))[24h:1m]))) > bool 0) or on() vector(0)) / clamp_min(count(((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0))), 1)",
              "refId": "A",
              "instant": true
            }
          ],
          "fieldConfig": {
            "defaults": {
              "color": {
                "mode": "thresholds"
              },
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "dark-red",
                    "value": null
                  },
                  {
                    "color": "dark-orange",
                    "value": 90
                  },
                  {
                    "color": "dark-yellow",
                    "value": 93
                  },
                  {
                    "color": "dark-green",
                    "value": 95
                  },
                  {
                    "color": "dark-blue",
                    "value": 100
                  }
                ]
              },
              "unit": "percent",
              "custom": {
                "displayMode": "auto"
              },
              "decimals": 0
            },
            "overrides": []
          },
          "options": {
            "colorMode": "value",
            "graphMode": "area",
            "justifyMode": "center",
            "reduceOptions": {
              "calcs": [
                "lastNotNull"
              ],
              "fields": "",
              "values": false
            },
            "textMode": "value"
          },
          "description": "Percent of selected suites with at least one quality-gate CI run in 24h; 100% means inputs are fresh."
        },
        {
          "id": 6,
          "type": "stat",
@ -309,8 +459,8 @@ data:
          },
          "gridPos": {
            "h": 4,
-            "w": 4,
+            "w": 3,
-            "x": 16,
+            "x": 18,
            "y": 0
          },
          "targets": [
@ -384,8 +534,8 @@ data:
          },
          "gridPos": {
            "h": 4,
-            "w": 4,
+            "w": 3,
-            "x": 20,
+            "x": 21,
            "y": 0
          },
          "targets": [
@ -451,7 +601,7 @@ data:
        {
          "id": 8,
          "type": "bargauge",
-          "title": "Latest Gate Checks Passing by Suite",
+          "title": "Latest Gate Health by Suite",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
@ -464,7 +614,7 @@ data:
          },
          "targets": [
            {
-              "expr": "sort(((100 * (sum by (suite) (((clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1)) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))))) / clamp_min((sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1))))",
+              "expr": "sort(((min by (suite) ((((100 * (sum by (suite) (((clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1)) unless on(suite, check) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"})) > 0), 1))))) / clamp_min((sum by (suite) (clamp_max(max by (suite, check) ((sum by (suite, branch, check, status) (platform_quality:check_status:present_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",check=~\"tests|coverage|loc|style|docs_naming|gate_glue|sonarqube|supply_chain\",status!=\"\"})) > 0), 1))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))) or (min by (suite) (platform_quality:test_case_health_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})))) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1))))",
              "refId": "A",
              "legendFormat": "{{suite}}",
              "instant": true
@ -539,7 +689,7 @@ data:
              }
            }
          ],
-          "description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; raw per-test history is tracked separately."
+          "description": "Current health by suite from required gate checks, capped by category-level test health. Skipped and not-applicable results are healthy; failures and errors lower the value."
        },
        {
          "id": 9,
@ -911,8 +1061,8 @@ data:
            {
              "id": 153,
              "type": "state-timeline",
-              "title": "Test Category Pass Rate History",
+              "title": "Test Category Health History",
-              "description": "Pass rate by test category from current per-test metrics. Use the Suite filter to focus one project; no data means that suite has not published category-aware results yet.",
+              "description": "Health by test category from current per-test metrics. Use the Suite filter to focus one project; skipped tests are healthy, while failures and errors lower the lane.",
              "datasource": {
                "type": "prometheus",
                "uid": "atlas-vm"
@ -925,7 +1075,7 @@ data:
              },
              "targets": [
                {
-                  "expr": "avg by (category) (platform_quality:test_case_pass_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})",
+                  "expr": "avg by (category) (platform_quality:test_case_health_rate:percent_1h{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",test!=\"\",test!=\"__no_test_cases__\",category=~\"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit\"})",
                  "refId": "A",
                  "legendFormat": "{{category}}"
                }
--- a/services/monitoring/vmalert-atlas-availability.yaml
+++ b/services/monitoring/vmalert-atlas-availability.yaml
@ -191,6 +191,42 @@ data:
              )
            labels:
              rollup: hourly
          - record: platform_quality:test_case_health_rate:percent_1h
            expr: |
              100 * (
                (
                  sum by (suite, branch, test, category) (
                    platform_quality_gate_test_case_result{exported_job="platform-quality-ci",branch!="",test!="",test!="__no_test_cases__",category=~"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit",status=~"passed|skipped|not_applicable|na|n/a"}
                    or label_replace(
                      platform_quality_gate_test_case_result{exported_job="platform-quality-ci",branch!="",test!="",test!="__no_test_cases__",category="",status=~"passed|skipped|not_applicable|na|n/a"},
                      "category", "uncategorized", "__name__", ".*"
                    )
                  )
                  or on(suite, branch, test, category)
                  (
                    0 * sum by (suite, branch, test, category) (
                      platform_quality_gate_test_case_result{exported_job="platform-quality-ci",branch!="",test!="",test!="__no_test_cases__",category=~"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit",status=~"passed|failed|error|skipped|not_applicable|na|n/a"}
                      or label_replace(
                        platform_quality_gate_test_case_result{exported_job="platform-quality-ci",branch!="",test!="",test!="__no_test_cases__",category="",status=~"passed|failed|error|skipped|not_applicable|na|n/a"},
                        "category", "uncategorized", "__name__", ".*"
                      )
                    )
                  )
                )
              )
              /
              clamp_min(
                sum by (suite, branch, test, category) (
                  platform_quality_gate_test_case_result{exported_job="platform-quality-ci",branch!="",test!="",test!="__no_test_cases__",category=~"api|chaos|compatibility|component|contract|e2e|integration|manual|performance|regression|reliability|security|smoke|system|ui|unit",status=~"passed|failed|error|skipped|not_applicable|na|n/a"}
                  or label_replace(
                    platform_quality_gate_test_case_result{exported_job="platform-quality-ci",branch!="",test!="",test!="__no_test_cases__",category="",status=~"passed|failed|error|skipped|not_applicable|na|n/a"},
                    "category", "uncategorized", "__name__", ".*"
                  )
                ),
                1
              )
            labels:
              rollup: hourly
          - record: platform_quality:check_status:present_1h
            expr: |
              sum by (suite, branch, check, status) (