From 3102862ee99556dbd75c632551e54287dca1bafc Mon Sep 17 00:00:00 2001
From: jenkins <jenkins@bstein.dev>
Date: Mon, 18 May 2026 21:04:14 -0300
Subject: [PATCH] monitoring(testing): clarify CI run and test history labels

---
 scripts/dashboards_render_atlas.py             | 18 +++++++++---------
 .../monitoring/dashboards/atlas-overview.json  |  2 +-
 .../monitoring/dashboards/atlas-testing.json   | 10 +++++-----
 .../monitoring/grafana-dashboard-overview.yaml |  2 +-
 .../monitoring/grafana-dashboard-testing.yaml  | 10 +++++-----
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py
index 11f91b98..dac81afe 100644
--- a/scripts/dashboards_render_atlas.py
+++ b/scripts/dashboards_render_atlas.py
@@ -1641,7 +1641,7 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
     "Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.",
     "Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.",
     "Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.",
-    "CI Run Success (24h)": "Percent of published quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate.",
+    "CI Run Success (24h)": "Percent of published quality-gate CI runs that completed successfully in 24h; this is automation health, not raw test pass rate.",
     "Failed Runs (24h)": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look.",
     "Suites With Runs (24h)": "Configured suites with at least one published quality-gate run in 24h; full count means the dashboard is fresh.",
     "Avg Coverage": "Average latest line coverage across suites; higher means code is better protected by tests.",
@@ -1683,12 +1683,12 @@ TESTING_PANEL_DESCRIPTIONS = {
     "CI Runs (24h)": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale.",
     "Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.",
     "Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.",
-    "Latest Gate Checks Passing by Suite": "Latest required gate checks passing by suite in the daily freshness window; this includes tests, coverage, LOC, style, and related gates.",
+    "Latest Gate Checks Passing by Suite": "Latest required gate checks passing by suite; this includes tests plus coverage, LOC, style, and other gates.",
     "CI Run Success by Suite (24h)": "24h CI run success rate by suite; lower rows mean recent jobs failed, aborted, or could not complete cleanly.",
     "Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.",
     "Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.",
-    "CI Runs And Test History": "Recent CI run, coverage, LOC, and test-category trends for selected suites.",
-    "CI Run Success by Suite (7d rolling)": "Seven-day rolling CI run success rate by suite; blue lanes mean recent runs are completing cleanly.",
+    "CI Runs And Test Result History": "Recent CI run, coverage, LOC, and raw test-result trends for selected suites.",
+    "CI Run Success by Suite (7d rolling)": "Seven-day rolling CI run success rate by suite; this is run completion history, not raw test pass history.",
     "Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.",
     "Daily Run Volume (Selected Scope)": "Rolling daily counts of published quality-gate runs; volume explains confidence.",
     "Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.",
@@ -4184,7 +4184,7 @@ def build_jobs_dashboard():
     ]
     panels[-1]["description"] = (
         "Latest pass percentage across required gate checks in the daily freshness window. "
-        "100% means tests and supporting gates recently passed; missing means no fresh gate data."
+        "100% means tests and supporting gates recently passed; raw per-test history is tracked separately."
     )
     reliability_suite_panel = bargauge_panel(
         9,
@@ -4199,8 +4199,8 @@ def build_jobs_dashboard():
         decimals=2,
     )
     reliability_suite_panel["description"] = (
-        "24h CI run success rate. This can stay low after failed, aborted, or debug runs even "
-        "when the latest gate checks are green."
+        "24h CI run success rate. This is whether automation finished cleanly, so it can stay low "
+        "after failed or aborted runs even when tests and latest gate checks are green."
     )
     reliability_suite_panel["fieldConfig"]["defaults"]["mappings"] = [
         {"type": "value", "options": {"-1": {"text": "no runs"}}}
@@ -4214,7 +4214,7 @@ def build_jobs_dashboard():
         thresholds=success_thresholds,
         description=(
             "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, "
-            "so failed or aborted runs lower the lane color without creating unreadable 0/100 spikes."
+            "so failed or aborted runs lower the lane color without implying raw test failures."
         ),
     )
     panels.append(history_panel)
@@ -4680,7 +4680,7 @@ def build_jobs_dashboard():
 
     compact_panels.extend(
         [
-            row_panel(500, "CI Runs And Test History", 11, panels=children([11, 153, 12, 13, 14])),
+            row_panel(500, "CI Runs And Test Result History", 11, panels=children([11, 153, 12, 13, 14])),
             row_panel(
                 501,
                 "Check Failure Rates By Suite",
diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json
index cdbdd90f..24b24518 100644
--- a/services/monitoring/dashboards/atlas-overview.json
+++ b/services/monitoring/dashboards/atlas-overview.json
@@ -2195,7 +2195,7 @@
           "targetBlank": true
         }
       ],
-      "description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate."
+      "description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is automation health, not raw test pass rate."
     },
     {
       "id": 152,
diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json
index 0aa447bb..848ee4b7 100644
--- a/services/monitoring/dashboards/atlas-testing.json
+++ b/services/monitoring/dashboards/atlas-testing.json
@@ -530,7 +530,7 @@
           }
         }
       ],
-      "description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; missing means no fresh gate data."
+      "description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; raw per-test history is tracked separately."
     },
     {
       "id": 9,
@@ -623,7 +623,7 @@
           }
         }
       ],
-      "description": "24h CI run success rate. This can stay low after failed, aborted, or debug runs even when the latest gate checks are green."
+      "description": "24h CI run success rate. This is whether automation finished cleanly, so it can stay low after failed or aborted runs even when tests and latest gate checks are green."
     },
     {
       "id": 17,
@@ -814,7 +814,7 @@
     {
       "id": 500,
       "type": "row",
-      "title": "CI Runs And Test History",
+      "title": "CI Runs And Test Result History",
       "gridPos": {
         "h": 1,
         "w": 24,
@@ -827,7 +827,7 @@
           "id": 11,
           "type": "state-timeline",
           "title": "CI Run Success by Suite (7d rolling)",
-          "description": "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, so failed or aborted runs lower the lane color without creating unreadable 0/100 spikes.",
+          "description": "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, so failed or aborted runs lower the lane color without implying raw test failures.",
           "datasource": {
             "type": "prometheus",
             "uid": "atlas-vm"
@@ -1299,7 +1299,7 @@
           }
         }
       ],
-      "description": "Recent CI run, coverage, LOC, and test-category trends for selected suites."
+      "description": "Recent CI run, coverage, LOC, and raw test-result trends for selected suites."
     },
     {
       "id": 501,
diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml
index df603bbc..77f49e72 100644
--- a/services/monitoring/grafana-dashboard-overview.yaml
+++ b/services/monitoring/grafana-dashboard-overview.yaml
@@ -2204,7 +2204,7 @@ data:
               "targetBlank": true
             }
           ],
-          "description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate."
+          "description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is automation health, not raw test pass rate."
         },
         {
           "id": 152,
diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml
index 6e8498f2..e82ebc0c 100644
--- a/services/monitoring/grafana-dashboard-testing.yaml
+++ b/services/monitoring/grafana-dashboard-testing.yaml
@@ -539,7 +539,7 @@ data:
               }
             }
           ],
-          "description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; missing means no fresh gate data."
+          "description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; raw per-test history is tracked separately."
         },
         {
           "id": 9,
@@ -632,7 +632,7 @@ data:
               }
             }
           ],
-          "description": "24h CI run success rate. This can stay low after failed, aborted, or debug runs even when the latest gate checks are green."
+          "description": "24h CI run success rate. This is whether automation finished cleanly, so it can stay low after failed or aborted runs even when tests and latest gate checks are green."
         },
         {
           "id": 17,
@@ -823,7 +823,7 @@ data:
         {
           "id": 500,
           "type": "row",
-          "title": "CI Runs And Test History",
+          "title": "CI Runs And Test Result History",
           "gridPos": {
             "h": 1,
             "w": 24,
@@ -836,7 +836,7 @@ data:
               "id": 11,
               "type": "state-timeline",
               "title": "CI Run Success by Suite (7d rolling)",
-              "description": "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, so failed or aborted runs lower the lane color without creating unreadable 0/100 spikes.",
+              "description": "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, so failed or aborted runs lower the lane color without implying raw test failures.",
               "datasource": {
                 "type": "prometheus",
                 "uid": "atlas-vm"
@@ -1308,7 +1308,7 @@ data:
               }
             }
           ],
-          "description": "Recent CI run, coverage, LOC, and test-category trends for selected suites."
+          "description": "Recent CI run, coverage, LOC, and raw test-result trends for selected suites."
         },
         {
           "id": 501,