diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 8032f0cc..aecdcb31 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -1618,9 +1618,9 @@ OVERVIEW_PANEL_DESCRIPTIONS = { "Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.", "Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.", "Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.", - "Run Reliability (24h)": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence.", - "Failed Runs (24h)": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy.", - "Suites Reporting (24h)": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh.", + "Run Reliability (24h)": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal.", + "Failed Runs (24h)": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look.", + "Suites Reporting (24h)": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal.", "Avg Coverage": "Average latest line coverage across suites; higher means code is better protected by tests.", "LOC Clean Suites": "Suites with no source files over 500 LOC; full count is good for maintainability.", "GitOps Health": "Flux readiness and suspension health over time; blue is perfect, warmer colors mean drift or pause.", @@ -1654,20 +1654,20 @@ OVERVIEW_PANEL_DESCRIPTIONS = { TESTING_PANEL_DESCRIPTIONS = { - "Run Reliability (24h)": "Percent of selected CI runs that finished successfully in 24h; higher is better.", - "Run Reliability (30d)": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation.", - "Failed Runs (24h)": "Selected CI runs that failed in 24h; zero is good and anything else needs a look.", - "Runs (24h)": "Selected CI run count in 24h; zero means the dashboard may be stale.", + "Run Reliability (24h)": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal.", + "Run Reliability (30d)": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation.", + "Failed Runs (24h)": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look.", + "Runs (24h)": "Selected quality-gate run count in 24h; zero means the dashboard may be stale.", "Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.", "Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.", "Current Gate Health by Suite": "Latest gate pass percent per suite; 100% means all required checks currently pass.", - "Run Reliability by Suite (24h)": "24h run success by suite; lower rows are worse and can lag after failed/debug runs.", + "Run Reliability by Suite (24h)": "24h quality-gate pass rate by suite; lower rows are worse and can lag after failed/debug runs.", "Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.", "Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.", "Reliability And Run History": "Recent run, coverage, LOC, and category trends for selected suites.", - "Run Reliability by Suite (7d rolling)": "Seven-day rolling run success by suite; blue lanes mean stable CI.", + "Run Reliability by Suite (7d rolling)": "Seven-day rolling quality-gate pass rate by suite; blue lanes mean stable tests.", "Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.", - "Daily Run Volume (Selected Scope)": "Rolling daily counts of successful and failed runs; volume explains confidence.", + "Daily Run Volume (Selected Scope)": "Rolling daily counts of published quality-gate runs; volume explains confidence.", "Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.", "Files <=500 LOC History by Suite": "LOC compliance over time; blue lanes mean files stay within the size limit.", "Check Failure Rates By Suite": "Failure percent by check family; blue is zero failures, warmer colors show blockers.", @@ -4122,7 +4122,7 @@ def build_jobs_dashboard(): decimals=2, ) reliability_suite_panel["description"] = ( - "Rolling CI run success rate. This can stay low after failed/debug runs even when " + "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when " "Current Gate Health is green." ) reliability_suite_panel["fieldConfig"]["defaults"]["mappings"] = [ @@ -4136,7 +4136,7 @@ def build_jobs_dashboard(): {"h": 8, "w": 24, "x": 0, "y": 13}, thresholds=success_thresholds, description=( - "Seven-day rolling run success rate per suite. Each suite gets its own lane, " + "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, " "so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes." ), ) @@ -4157,7 +4157,7 @@ def build_jobs_dashboard(): legend_calcs=[], ) run_volume_panel["description"] = ( - "Twenty-four-hour rolling run counts for the selected suite/branch scope. " + "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. " "This is volume, not a pass-rate percentage." ) run_volume_panel["fieldConfig"]["defaults"]["min"] = 0 @@ -4251,8 +4251,8 @@ def build_jobs_dashboard(): max_value=None, legend="{{suite}} - {{test}}", description=( - "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel " - "shows a larger total from earlier hours." + "Top failing tests inside each hourly bucket. Short current bars can still belong to tests " + "with larger long-window totals." ), ) ) diff --git a/scripts/tests/test_dashboards_render_atlas.py b/scripts/tests/test_dashboards_render_atlas.py index 1f8d210b..057f20dd 100644 --- a/scripts/tests/test_dashboards_render_atlas.py +++ b/scripts/tests/test_dashboards_render_atlas.py @@ -122,7 +122,7 @@ def test_overview_uses_readable_quality_power_and_gitops_panels(): assert panels_by_title["Suites Reporting (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 13} suites_reporting_expr = panels_by_title["Suites Reporting (24h)"]["targets"][0]["expr"] assert "> bool 0" in suites_reporting_expr - assert "Suites with at least one CI run" in panels_by_title["Suites Reporting (24h)"]["description"] + assert "published quality-gate run" in panels_by_title["Suites Reporting (24h)"]["description"] assert panels_by_title["LOC Clean Suites"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17} assert panels_by_title["GitOps Health"]["type"] == "state-timeline" assert panels_by_title["GitOps Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 7} diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index e8645980..40e55004 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -2195,7 +2195,7 @@ "targetBlank": true } ], - "description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence." + "description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal." }, { "id": 152, @@ -2277,7 +2277,7 @@ "targetBlank": true } ], - "description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy." + "description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look." }, { "id": 153, @@ -2359,7 +2359,7 @@ "targetBlank": true } ], - "description": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh." + "description": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal." }, { "id": 154, diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index 4417cf5c..6a7c3822 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -77,7 +77,7 @@ }, "textMode": "value" }, - "description": "Percent of selected CI runs that finished successfully in 24h; higher is better." + "description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal." }, { "id": 3, @@ -152,7 +152,7 @@ }, "textMode": "value" }, - "description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation." + "description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation." }, { "id": 4, @@ -226,7 +226,7 @@ }, "textMode": "value" }, - "description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look." + "description": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look." }, { "id": 5, @@ -288,7 +288,7 @@ }, "textMode": "value" }, - "description": "Selected CI run count in 24h; zero means the dashboard may be stale." + "description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale." }, { "id": 6, @@ -623,7 +623,7 @@ } } ], - "description": "Rolling CI run success rate. This can stay low after failed/debug runs even when Current Gate Health is green." + "description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green." }, { "id": 17, @@ -827,7 +827,7 @@ "id": 11, "type": "state-timeline", "title": "Run Reliability by Suite (7d rolling)", - "description": "Seven-day rolling run success rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.", + "description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1144,7 +1144,7 @@ "mode": "multi" } }, - "description": "Twenty-four-hour rolling run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage." + "description": "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage." }, { "id": 13, @@ -2411,7 +2411,7 @@ "id": 145, "type": "state-timeline", "title": "Problematic Tests Over Time (Top failures)", - "description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.", + "description": "Top failing tests inside each hourly bucket. Short current bars can still belong to tests with larger long-window totals.", "datasource": { "type": "prometheus", "uid": "atlas-vm" diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index ab532468..fc01a2db 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -2204,7 +2204,7 @@ data: "targetBlank": true } ], - "description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence." + "description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal." }, { "id": 152, @@ -2286,7 +2286,7 @@ data: "targetBlank": true } ], - "description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy." + "description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look." }, { "id": 153, @@ -2368,7 +2368,7 @@ data: "targetBlank": true } ], - "description": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh." + "description": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal." }, { "id": 154, diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index 8fddc01c..2813bf27 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -86,7 +86,7 @@ data: }, "textMode": "value" }, - "description": "Percent of selected CI runs that finished successfully in 24h; higher is better." + "description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal." }, { "id": 3, @@ -161,7 +161,7 @@ data: }, "textMode": "value" }, - "description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation." + "description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation." }, { "id": 4, @@ -235,7 +235,7 @@ data: }, "textMode": "value" }, - "description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look." + "description": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look." }, { "id": 5, @@ -297,7 +297,7 @@ data: }, "textMode": "value" }, - "description": "Selected CI run count in 24h; zero means the dashboard may be stale." + "description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale." }, { "id": 6, @@ -632,7 +632,7 @@ data: } } ], - "description": "Rolling CI run success rate. This can stay low after failed/debug runs even when Current Gate Health is green." + "description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green." }, { "id": 17, @@ -836,7 +836,7 @@ data: "id": 11, "type": "state-timeline", "title": "Run Reliability by Suite (7d rolling)", - "description": "Seven-day rolling run success rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.", + "description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1153,7 +1153,7 @@ data: "mode": "multi" } }, - "description": "Twenty-four-hour rolling run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage." + "description": "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage." }, { "id": 13, @@ -2420,7 +2420,7 @@ data: "id": 145, "type": "state-timeline", "title": "Problematic Tests Over Time (Top failures)", - "description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.", + "description": "Top failing tests inside each hourly bucket. Short current bars can still belong to tests with larger long-window totals.", "datasource": { "type": "prometheus", "uid": "atlas-vm"