monitoring: clarify quality gate dashboard tooltips
This commit is contained in:
parent
ad86195436
commit
588cc3aa14
@ -1618,9 +1618,9 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
|
||||
"Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.",
|
||||
"Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.",
|
||||
"Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.",
|
||||
"Run Reliability (24h)": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence.",
|
||||
"Failed Runs (24h)": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy.",
|
||||
"Suites Reporting (24h)": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh.",
|
||||
"Run Reliability (24h)": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal.",
|
||||
"Failed Runs (24h)": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look.",
|
||||
"Suites Reporting (24h)": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal.",
|
||||
"Avg Coverage": "Average latest line coverage across suites; higher means code is better protected by tests.",
|
||||
"LOC Clean Suites": "Suites with no source files over 500 LOC; full count is good for maintainability.",
|
||||
"GitOps Health": "Flux readiness and suspension health over time; blue is perfect, warmer colors mean drift or pause.",
|
||||
@ -1654,20 +1654,20 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
|
||||
|
||||
|
||||
TESTING_PANEL_DESCRIPTIONS = {
|
||||
"Run Reliability (24h)": "Percent of selected CI runs that finished successfully in 24h; higher is better.",
|
||||
"Run Reliability (30d)": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation.",
|
||||
"Failed Runs (24h)": "Selected CI runs that failed in 24h; zero is good and anything else needs a look.",
|
||||
"Runs (24h)": "Selected CI run count in 24h; zero means the dashboard may be stale.",
|
||||
"Run Reliability (24h)": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal.",
|
||||
"Run Reliability (30d)": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation.",
|
||||
"Failed Runs (24h)": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look.",
|
||||
"Runs (24h)": "Selected quality-gate run count in 24h; zero means the dashboard may be stale.",
|
||||
"Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.",
|
||||
"Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.",
|
||||
"Current Gate Health by Suite": "Latest gate pass percent per suite; 100% means all required checks currently pass.",
|
||||
"Run Reliability by Suite (24h)": "24h run success by suite; lower rows are worse and can lag after failed/debug runs.",
|
||||
"Run Reliability by Suite (24h)": "24h quality-gate pass rate by suite; lower rows are worse and can lag after failed/debug runs.",
|
||||
"Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.",
|
||||
"Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.",
|
||||
"Reliability And Run History": "Recent run, coverage, LOC, and category trends for selected suites.",
|
||||
"Run Reliability by Suite (7d rolling)": "Seven-day rolling run success by suite; blue lanes mean stable CI.",
|
||||
"Run Reliability by Suite (7d rolling)": "Seven-day rolling quality-gate pass rate by suite; blue lanes mean stable tests.",
|
||||
"Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.",
|
||||
"Daily Run Volume (Selected Scope)": "Rolling daily counts of successful and failed runs; volume explains confidence.",
|
||||
"Daily Run Volume (Selected Scope)": "Rolling daily counts of published quality-gate runs; volume explains confidence.",
|
||||
"Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.",
|
||||
"Files <=500 LOC History by Suite": "LOC compliance over time; blue lanes mean files stay within the size limit.",
|
||||
"Check Failure Rates By Suite": "Failure percent by check family; blue is zero failures, warmer colors show blockers.",
|
||||
@ -4122,7 +4122,7 @@ def build_jobs_dashboard():
|
||||
decimals=2,
|
||||
)
|
||||
reliability_suite_panel["description"] = (
|
||||
"Rolling CI run success rate. This can stay low after failed/debug runs even when "
|
||||
"Rolling quality-gate pass rate. This can stay low after failed/debug runs even when "
|
||||
"Current Gate Health is green."
|
||||
)
|
||||
reliability_suite_panel["fieldConfig"]["defaults"]["mappings"] = [
|
||||
@ -4136,7 +4136,7 @@ def build_jobs_dashboard():
|
||||
{"h": 8, "w": 24, "x": 0, "y": 13},
|
||||
thresholds=success_thresholds,
|
||||
description=(
|
||||
"Seven-day rolling run success rate per suite. Each suite gets its own lane, "
|
||||
"Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, "
|
||||
"so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes."
|
||||
),
|
||||
)
|
||||
@ -4157,7 +4157,7 @@ def build_jobs_dashboard():
|
||||
legend_calcs=[],
|
||||
)
|
||||
run_volume_panel["description"] = (
|
||||
"Twenty-four-hour rolling run counts for the selected suite/branch scope. "
|
||||
"Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. "
|
||||
"This is volume, not a pass-rate percentage."
|
||||
)
|
||||
run_volume_panel["fieldConfig"]["defaults"]["min"] = 0
|
||||
@ -4251,8 +4251,8 @@ def build_jobs_dashboard():
|
||||
max_value=None,
|
||||
legend="{{suite}} - {{test}}",
|
||||
description=(
|
||||
"Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel "
|
||||
"shows a larger total from earlier hours."
|
||||
"Top failing tests inside each hourly bucket. Short current bars can still belong to tests "
|
||||
"with larger long-window totals."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
@ -122,7 +122,7 @@ def test_overview_uses_readable_quality_power_and_gitops_panels():
|
||||
assert panels_by_title["Suites Reporting (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 13}
|
||||
suites_reporting_expr = panels_by_title["Suites Reporting (24h)"]["targets"][0]["expr"]
|
||||
assert "> bool 0" in suites_reporting_expr
|
||||
assert "Suites with at least one CI run" in panels_by_title["Suites Reporting (24h)"]["description"]
|
||||
assert "published quality-gate run" in panels_by_title["Suites Reporting (24h)"]["description"]
|
||||
assert panels_by_title["LOC Clean Suites"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17}
|
||||
assert panels_by_title["GitOps Health"]["type"] == "state-timeline"
|
||||
assert panels_by_title["GitOps Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 7}
|
||||
|
||||
@ -2195,7 +2195,7 @@
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence."
|
||||
"description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
},
|
||||
{
|
||||
"id": 152,
|
||||
@ -2277,7 +2277,7 @@
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy."
|
||||
"description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look."
|
||||
},
|
||||
{
|
||||
"id": 153,
|
||||
@ -2359,7 +2359,7 @@
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh."
|
||||
"description": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal."
|
||||
},
|
||||
{
|
||||
"id": 154,
|
||||
|
||||
@ -77,7 +77,7 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected CI runs that finished successfully in 24h; higher is better."
|
||||
"description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
@ -152,7 +152,7 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation."
|
||||
"description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
@ -226,7 +226,7 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look."
|
||||
"description": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look."
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
@ -288,7 +288,7 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Selected CI run count in 24h; zero means the dashboard may be stale."
|
||||
"description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale."
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
@ -623,7 +623,7 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Rolling CI run success rate. This can stay low after failed/debug runs even when Current Gate Health is green."
|
||||
"description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green."
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
@ -827,7 +827,7 @@
|
||||
"id": 11,
|
||||
"type": "state-timeline",
|
||||
"title": "Run Reliability by Suite (7d rolling)",
|
||||
"description": "Seven-day rolling run success rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1144,7 +1144,7 @@
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Twenty-four-hour rolling run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
|
||||
"description": "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
@ -2411,7 +2411,7 @@
|
||||
"id": 145,
|
||||
"type": "state-timeline",
|
||||
"title": "Problematic Tests Over Time (Top failures)",
|
||||
"description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.",
|
||||
"description": "Top failing tests inside each hourly bucket. Short current bars can still belong to tests with larger long-window totals.",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
|
||||
@ -2204,7 +2204,7 @@ data:
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence."
|
||||
"description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
},
|
||||
{
|
||||
"id": 152,
|
||||
@ -2286,7 +2286,7 @@ data:
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy."
|
||||
"description": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look."
|
||||
},
|
||||
{
|
||||
"id": 153,
|
||||
@ -2368,7 +2368,7 @@ data:
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Suites with at least one CI run in 24h; full count means the dashboard signal is fresh."
|
||||
"description": "Suites with at least one published quality-gate run in 24h; higher means fresher dashboard signal."
|
||||
},
|
||||
{
|
||||
"id": 154,
|
||||
|
||||
@ -86,7 +86,7 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected CI runs that finished successfully in 24h; higher is better."
|
||||
"description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
@ -161,7 +161,7 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation."
|
||||
"description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
@ -235,7 +235,7 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look."
|
||||
"description": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look."
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
@ -297,7 +297,7 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Selected CI run count in 24h; zero means the dashboard may be stale."
|
||||
"description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale."
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
@ -632,7 +632,7 @@ data:
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Rolling CI run success rate. This can stay low after failed/debug runs even when Current Gate Health is green."
|
||||
"description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green."
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
@ -836,7 +836,7 @@ data:
|
||||
"id": 11,
|
||||
"type": "state-timeline",
|
||||
"title": "Run Reliability by Suite (7d rolling)",
|
||||
"description": "Seven-day rolling run success rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1153,7 +1153,7 @@ data:
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Twenty-four-hour rolling run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
|
||||
"description": "Twenty-four-hour rolling quality-gate run counts for the selected suite/branch scope. This is volume, not a pass-rate percentage."
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
@ -2420,7 +2420,7 @@ data:
|
||||
"id": 145,
|
||||
"type": "state-timeline",
|
||||
"title": "Problematic Tests Over Time (Top failures)",
|
||||
"description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.",
|
||||
"description": "Top failing tests inside each hourly bucket. Short current bars can still belong to tests with larger long-window totals.",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user