monitoring: clarify quality gate dashboard tooltips

This commit is contained in:
jenkins 2026-05-16 17:03:58 -03:00
parent ad86195436
commit 588cc3aa14
6 changed files with 38 additions and 38 deletions

View File

@ -1618,9 +1618,9 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
"Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.",
"Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.",
"Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.",
"Run Reliability (24h)": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence.",
"Failed Runs (24h)": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy.",
"Suites Reporting (24h)": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh.",
"Run Reliability (24h)": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal.",
"Failed Runs (24h)": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look.",
"Suites Reporting (24h)": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal.",
"Avg Coverage": "Average latest line coverage across suites; higher means code is better protected by tests.",
"LOC Clean Suites": "Suites with no source files over 500 LOC; full count is good for maintainability.",
"GitOps Health": "Flux readiness and suspension health over time; blue is perfect, warmer colors mean drift or pause.",
@ -1654,20 +1654,20 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
TESTING_PANEL_DESCRIPTIONS = {
"Run Reliability (24h)": "Percent of selected CI runs that finished successfully in 24h; higher is better.",
"Run Reliability (30d)": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation.",
"Failed Runs (24h)": "Selected CI runs that failed in 24h; zero is good and anything else needs a look.",
"Runs (24h)": "Selected CI run count in 24h; zero means the dashboard may be stale.",
"Run Reliability (24h)": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal.",
"Run Reliability (30d)": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation.",
"Failed Runs (24h)": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look.",
"Runs (24h)": "Selected quality-gate run count in 24h; zero means the dashboard may be stale.",
"Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.",
"Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.",
"Current Gate Health by Suite": "Latest gate pass percent per suite; 100% means all required checks currently pass.",
"Run Reliability by Suite (24h)": "24h run success by suite; lower rows are worse and can lag after failed/debug runs.",
"Run Reliability by Suite (24h)": "24h quality-gate pass rate by suite; lower rows are worse and can lag after failed/debug runs.",
"Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.",
"Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.",
"Reliability And Run History": "Recent run, coverage, LOC, and category trends for selected suites.",
"Run Reliability by Suite (7d rolling)": "Seven-day rolling run success by suite; blue lanes mean stable CI.",
"Run Reliability by Suite (7d rolling)": "Seven-day rolling quality-gate pass rate by suite; blue lanes mean stable tests.",
"Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.",
"Daily Run Volume (Selected Scope)": "Rolling daily counts of successful and failed runs; volume explains confidence.",
"Daily Run Volume (Selected Scope)": "Rolling daily counts of published quality-gate runs; volume explains confidence.",
"Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.",
"Files <=500 LOC History by Suite": "LOC compliance over time; blue lanes mean files stay within the size limit.",
"Check Failure Rates By Suite": "Failure percent by check family; blue is zero failures, warmer colors show blockers.",
@ -4122,7 +4122,7 @@ def build_jobs_dashboard():
decimals=2,
)
reliability_suite_panel["description"] = (
"Rolling CI run success rate. This can stay low after failed/debug runs even when "
"Rolling quality-gate pass rate. This can stay low after failed/debug runs even when "
"Current Gate Health is green."
)
reliability_suite_panel["fieldConfig"]["defaults"]["mappings"] = [
@ -4136,7 +4136,7 @@ def build_jobs_dashboard():
{"h": 8, "w": 24, "x": 0, "y": 13},
thresholds=success_thresholds,
description=(
"Seven-day rolling run success rate per suite. Each suite gets its own lane, "
"Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, "
"so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes."
),
)
@ -4157,7 +4157,7 @@ def build_jobs_dashboard():
legend_calcs=[],
)
run_volume_panel["description"] = (
"Twenty-four-hour rolling run counts for the selected suite/branch scope. "
"Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. "
"This is volume, not a pass-rate percentage."
)
run_volume_panel["fieldConfig"]["defaults"]["min"] = 0
@ -4251,8 +4251,8 @@ def build_jobs_dashboard():
max_value=None,
legend="{{suite}} - {{test}}",
description=(
"Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel "
"shows a larger total from earlier hours."
"Top failing tests inside each hourly bucket. Short current bars can still belong to tests "
"with larger long-window totals."
),
)
)

View File

@ -122,7 +122,7 @@ def test_overview_uses_readable_quality_power_and_gitops_panels():
assert panels_by_title["Suites Reporting (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 13}
suites_reporting_expr = panels_by_title["Suites Reporting (24h)"]["targets"][0]["expr"]
assert "> bool 0" in suites_reporting_expr
assert "Suites with at least one CI run" in panels_by_title["Suites Reporting (24h)"]["description"]
assert "published quality-gate run" in panels_by_title["Suites Reporting (24h)"]["description"]
assert panels_by_title["LOC Clean Suites"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17}
assert panels_by_title["GitOps Health"]["type"] == "state-timeline"
assert panels_by_title["GitOps Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 7}

View File

@ -2195,7 +2195,7 @@
"targetBlank": true
}
],
"description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence."
"description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal."
},
{
"id": 152,
@ -2277,7 +2277,7 @@
"targetBlank": true
}
],
"description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy."
"description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look."
},
{
"id": 153,
@ -2359,7 +2359,7 @@
"targetBlank": true
}
],
"description": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh."
"description": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal."
},
{
"id": 154,

View File

@ -77,7 +77,7 @@
},
"textMode": "value"
},
"description": "Percent of selected CI runs that finished successfully in 24h; higher is better."
"description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal."
},
{
"id": 3,
@ -152,7 +152,7 @@
},
"textMode": "value"
},
"description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation."
"description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation."
},
{
"id": 4,
@ -226,7 +226,7 @@
},
"textMode": "value"
},
"description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look."
"description": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look."
},
{
"id": 5,
@ -288,7 +288,7 @@
},
"textMode": "value"
},
"description": "Selected CI run count in 24h; zero means the dashboard may be stale."
"description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale."
},
{
"id": 6,
@ -623,7 +623,7 @@
}
}
],
"description": "Rolling CI run success rate. This can stay low after failed/debug runs even when Current Gate Health is green."
"description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green."
},
{
"id": 17,
@ -827,7 +827,7 @@
"id": 11,
"type": "state-timeline",
"title": "Run Reliability by Suite (7d rolling)",
"description": "Seven-day rolling run success rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
"description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1144,7 +1144,7 @@
"mode": "multi"
}
},
"description": "Twenty-four-hour rolling run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
"description": "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
},
{
"id": 13,
@ -2411,7 +2411,7 @@
"id": 145,
"type": "state-timeline",
"title": "Problematic Tests Over Time (Top failures)",
"description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.",
"description": "Top failing tests inside each hourly bucket. Short current bars can still belong to tests with larger long-window totals.",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"

View File

@ -2204,7 +2204,7 @@ data:
"targetBlank": true
}
],
"description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence."
"description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal."
},
{
"id": 152,
@ -2286,7 +2286,7 @@ data:
"targetBlank": true
}
],
"description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy."
"description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look."
},
{
"id": 153,
@ -2368,7 +2368,7 @@ data:
"targetBlank": true
}
],
"description": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh."
"description": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal."
},
{
"id": 154,

View File

@ -86,7 +86,7 @@ data:
},
"textMode": "value"
},
"description": "Percent of selected CI runs that finished successfully in 24h; higher is better."
"description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal."
},
{
"id": 3,
@ -161,7 +161,7 @@ data:
},
"textMode": "value"
},
"description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation."
"description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation."
},
{
"id": 4,
@ -235,7 +235,7 @@ data:
},
"textMode": "value"
},
"description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look."
"description": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look."
},
{
"id": 5,
@ -297,7 +297,7 @@ data:
},
"textMode": "value"
},
"description": "Selected CI run count in 24h; zero means the dashboard may be stale."
"description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale."
},
{
"id": 6,
@ -632,7 +632,7 @@ data:
}
}
],
"description": "Rolling CI run success rate. This can stay low after failed/debug runs even when Current Gate Health is green."
"description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green."
},
{
"id": 17,
@ -836,7 +836,7 @@ data:
"id": 11,
"type": "state-timeline",
"title": "Run Reliability by Suite (7d rolling)",
"description": "Seven-day rolling run success rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
"description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -1153,7 +1153,7 @@ data:
"mode": "multi"
}
},
"description": "Twenty-four-hour rolling run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
"description": "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
},
{
"id": 13,
@ -2420,7 +2420,7 @@ data:
"id": 145,
"type": "state-timeline",
"title": "Problematic Tests Over Time (Top failures)",
"description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.",
"description": "Top failing tests inside each hourly bucket. Short current bars can still belong to tests with larger long-window totals.",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"