monitoring(testing): clarify CI run health labels
This commit is contained in:
parent
17628a060f
commit
0de90d622a
@ -1628,7 +1628,7 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
|
||||
"Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.",
|
||||
"Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.",
|
||||
"Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.",
|
||||
"Run Reliability (24h)": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal.",
|
||||
"CI Run Success (24h)": "Percent of published quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate.",
|
||||
"Failed Runs (24h)": "Published quality-gate runs that failed in 24h; zero is good, any value needs a look.",
|
||||
"Suites With Runs (24h)": "Configured suites with at least one published quality-gate run in 24h; full count means the dashboard is fresh.",
|
||||
"Avg Coverage": "Average latest line coverage across suites; higher means code is better protected by tests.",
|
||||
@ -1664,18 +1664,18 @@ OVERVIEW_PANEL_DESCRIPTIONS = {
|
||||
|
||||
|
||||
TESTING_PANEL_DESCRIPTIONS = {
|
||||
"Run Reliability (24h)": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal.",
|
||||
"Run Reliability (30d)": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation.",
|
||||
"CI Run Success Rate (24h)": "Percent of selected quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate.",
|
||||
"CI Run Success Rate (30d)": "Percent of selected quality-gate CI runs that completed successfully in 30d; higher means more stable automation.",
|
||||
"Failed Runs (24h)": "Selected quality-gate runs that failed in 24h; zero is good and anything else needs a look.",
|
||||
"Runs (24h)": "Selected quality-gate run count in 24h; zero means the dashboard may be stale.",
|
||||
"CI Runs (24h)": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale.",
|
||||
"Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.",
|
||||
"Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.",
|
||||
"Current Gate Health by Suite": "Latest gate pass percent per suite from the daily freshness window; 100% means required checks recently passed.",
|
||||
"Run Reliability by Suite (24h)": "24h quality-gate pass rate by suite; lower rows are worse and can lag after failed/debug runs.",
|
||||
"Latest Gate Checks Passing by Suite": "Latest required gate checks passing by suite in the daily freshness window; this includes tests, coverage, LOC, style, and related gates.",
|
||||
"CI Run Success by Suite (24h)": "24h CI run success rate by suite; lower rows mean recent jobs failed, aborted, or could not complete cleanly.",
|
||||
"Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.",
|
||||
"Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.",
|
||||
"Reliability And Run History": "Recent run, coverage, LOC, and category trends for selected suites.",
|
||||
"Run Reliability by Suite (7d rolling)": "Seven-day rolling quality-gate pass rate by suite; blue lanes mean stable tests.",
|
||||
"CI Runs And Test History": "Recent CI run, coverage, LOC, and test-category trends for selected suites.",
|
||||
"CI Run Success by Suite (7d rolling)": "Seven-day rolling CI run success rate by suite; blue lanes mean recent runs are completing cleanly.",
|
||||
"Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.",
|
||||
"Daily Run Volume (Selected Scope)": "Rolling daily counts of published quality-gate runs; volume explains confidence.",
|
||||
"Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.",
|
||||
@ -2336,7 +2336,7 @@ def build_overview():
|
||||
flux_source["options"]["text"] = {"titleSize": 10, "valueSize": 14}
|
||||
panels.append(flux_source)
|
||||
for panel_id, title, expr, y_pos, unit, decimals, thresholds, links in [
|
||||
(151, "Run Reliability (24h)", TEST_SUCCESS_RATE_24H, 9, "percent", 1, test_success_thresholds, "atlas-testing"),
|
||||
(151, "CI Run Success (24h)", TEST_SUCCESS_RATE_24H, 9, "percent", 1, test_success_thresholds, "atlas-testing"),
|
||||
(152, "Failed Runs (24h)", TEST_FAILURES_24H_TOTAL, 11, "none", 0, failure_count_thresholds, "atlas-testing"),
|
||||
(153, "Suites With Runs (24h)", PLATFORM_TEST_ACTIVE_SUITES_24H, 13, "none", 0, perfect_count_thresholds, "atlas-testing"),
|
||||
(154, "Avg Coverage", overview_avg_coverage, 15, "percent", 1, test_success_thresholds, "atlas-testing"),
|
||||
@ -4079,7 +4079,7 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
stat_panel(
|
||||
2,
|
||||
"Run Reliability (24h)",
|
||||
"CI Run Success Rate (24h)",
|
||||
success_rate_24h,
|
||||
{"h": 5, "w": 4, "x": 0, "y": 0},
|
||||
unit="percent",
|
||||
@ -4091,7 +4091,7 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
stat_panel(
|
||||
3,
|
||||
"Run Reliability (30d)",
|
||||
"CI Run Success Rate (30d)",
|
||||
success_rate_30d,
|
||||
{"h": 5, "w": 4, "x": 4, "y": 0},
|
||||
unit="percent",
|
||||
@ -4114,7 +4114,7 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
stat_panel(
|
||||
5,
|
||||
"Runs (24h)",
|
||||
"CI Runs (24h)",
|
||||
runs_24h,
|
||||
{"h": 5, "w": 4, "x": 12, "y": 0},
|
||||
unit="none",
|
||||
@ -4152,7 +4152,7 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
8,
|
||||
"Current Gate Health by Suite",
|
||||
"Latest Gate Checks Passing by Suite",
|
||||
current_gate_health_by_suite,
|
||||
{"h": 8, "w": 8, "x": 0, "y": 5},
|
||||
unit="percent",
|
||||
@ -4167,12 +4167,12 @@ def build_jobs_dashboard():
|
||||
{"type": "value", "options": {"-1": {"text": "missing"}}}
|
||||
]
|
||||
panels[-1]["description"] = (
|
||||
"Latest pass percentage across required gate dimensions in the daily freshness window. "
|
||||
"100% is clean; missing means the suite has not published recent gate data."
|
||||
"Latest pass percentage across required gate checks in the daily freshness window. "
|
||||
"100% means tests and supporting gates recently passed; missing means no fresh gate data."
|
||||
)
|
||||
reliability_suite_panel = bargauge_panel(
|
||||
9,
|
||||
"Run Reliability by Suite (24h)",
|
||||
"CI Run Success by Suite (24h)",
|
||||
success_rate_by_suite_24h,
|
||||
{"h": 8, "w": 8, "x": 8, "y": 5},
|
||||
unit="percent",
|
||||
@ -4183,8 +4183,8 @@ def build_jobs_dashboard():
|
||||
decimals=2,
|
||||
)
|
||||
reliability_suite_panel["description"] = (
|
||||
"Rolling quality-gate pass rate. This can stay low after failed/debug runs even when "
|
||||
"Current Gate Health is green."
|
||||
"24h CI run success rate. This can stay low after failed, aborted, or debug runs even "
|
||||
"when the latest gate checks are green."
|
||||
)
|
||||
reliability_suite_panel["fieldConfig"]["defaults"]["mappings"] = [
|
||||
{"type": "value", "options": {"-1": {"text": "no runs"}}}
|
||||
@ -4192,13 +4192,13 @@ def build_jobs_dashboard():
|
||||
panels.append(reliability_suite_panel)
|
||||
history_panel = state_timeline_panel(
|
||||
11,
|
||||
"Run Reliability by Suite (7d rolling)",
|
||||
"CI Run Success by Suite (7d rolling)",
|
||||
success_history_by_suite,
|
||||
{"h": 8, "w": 24, "x": 0, "y": 13},
|
||||
thresholds=success_thresholds,
|
||||
description=(
|
||||
"Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, "
|
||||
"so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes."
|
||||
"Seven-day rolling CI run success rate per suite. Each suite gets its own lane, "
|
||||
"so failed or aborted runs lower the lane color without creating unreadable 0/100 spikes."
|
||||
),
|
||||
)
|
||||
panels.append(history_panel)
|
||||
@ -4664,7 +4664,7 @@ def build_jobs_dashboard():
|
||||
|
||||
compact_panels.extend(
|
||||
[
|
||||
row_panel(500, "Reliability And Run History", 11, panels=children([11, 153, 12, 13, 14])),
|
||||
row_panel(500, "CI Runs And Test History", 11, panels=children([11, 153, 12, 13, 14])),
|
||||
row_panel(
|
||||
501,
|
||||
"Check Failure Rates By Suite",
|
||||
|
||||
@ -2114,7 +2114,7 @@
|
||||
{
|
||||
"id": 151,
|
||||
"type": "stat",
|
||||
"title": "Run Reliability (24h)",
|
||||
"title": "CI Run Success (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -2195,7 +2195,7 @@
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
"description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate."
|
||||
},
|
||||
{
|
||||
"id": 152,
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Run Reliability (24h)",
|
||||
"title": "CI Run Success Rate (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -77,12 +77,12 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
"description": "Percent of selected quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "Run Reliability (30d)",
|
||||
"title": "CI Run Success Rate (30d)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -152,7 +152,7 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation."
|
||||
"description": "Percent of selected quality-gate CI runs that completed successfully in 30d; higher means more stable automation."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
@ -231,7 +231,7 @@
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "Runs (24h)",
|
||||
"title": "CI Runs (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -288,7 +288,7 @@
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale."
|
||||
"description": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale."
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
@ -442,7 +442,7 @@
|
||||
{
|
||||
"id": 8,
|
||||
"type": "bargauge",
|
||||
"title": "Current Gate Health by Suite",
|
||||
"title": "Latest Gate Checks Passing by Suite",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -530,12 +530,12 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Latest pass percentage across required gate dimensions in the daily freshness window. 100% is clean; missing means the suite has not published recent gate data."
|
||||
"description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; missing means no fresh gate data."
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"type": "bargauge",
|
||||
"title": "Run Reliability by Suite (24h)",
|
||||
"title": "CI Run Success by Suite (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -623,7 +623,7 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green."
|
||||
"description": "24h CI run success rate. This can stay low after failed, aborted, or debug runs even when the latest gate checks are green."
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
@ -814,7 +814,7 @@
|
||||
{
|
||||
"id": 500,
|
||||
"type": "row",
|
||||
"title": "Reliability And Run History",
|
||||
"title": "CI Runs And Test History",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@ -826,8 +826,8 @@
|
||||
{
|
||||
"id": 11,
|
||||
"type": "state-timeline",
|
||||
"title": "Run Reliability by Suite (7d rolling)",
|
||||
"description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"title": "CI Run Success by Suite (7d rolling)",
|
||||
"description": "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, so failed or aborted runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1299,7 +1299,7 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Recent run, coverage, LOC, and category trends for selected suites."
|
||||
"description": "Recent CI run, coverage, LOC, and test-category trends for selected suites."
|
||||
},
|
||||
{
|
||||
"id": 501,
|
||||
|
||||
@ -2123,7 +2123,7 @@ data:
|
||||
{
|
||||
"id": 151,
|
||||
"type": "stat",
|
||||
"title": "Run Reliability (24h)",
|
||||
"title": "CI Run Success (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -2204,7 +2204,7 @@ data:
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Percent of published quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
"description": "Percent of published quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate."
|
||||
},
|
||||
{
|
||||
"id": 152,
|
||||
|
||||
@ -16,7 +16,7 @@ data:
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Run Reliability (24h)",
|
||||
"title": "CI Run Success Rate (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -86,12 +86,12 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected quality-gate runs that passed in 24h; higher means fresher healthy test signal."
|
||||
"description": "Percent of selected quality-gate CI runs that completed successfully in 24h; this is run health, not individual test pass rate."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "Run Reliability (30d)",
|
||||
"title": "CI Run Success Rate (30d)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -161,7 +161,7 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Percent of selected quality-gate runs that passed in 30d; higher means more stable test automation."
|
||||
"description": "Percent of selected quality-gate CI runs that completed successfully in 30d; higher means more stable automation."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
@ -240,7 +240,7 @@ data:
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "Runs (24h)",
|
||||
"title": "CI Runs (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -297,7 +297,7 @@ data:
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Selected quality-gate run count in 24h; zero means the dashboard may be stale."
|
||||
"description": "Selected quality-gate CI run count in 24h; zero means the dashboard may be stale."
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
@ -451,7 +451,7 @@ data:
|
||||
{
|
||||
"id": 8,
|
||||
"type": "bargauge",
|
||||
"title": "Current Gate Health by Suite",
|
||||
"title": "Latest Gate Checks Passing by Suite",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -539,12 +539,12 @@ data:
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Latest pass percentage across required gate dimensions in the daily freshness window. 100% is clean; missing means the suite has not published recent gate data."
|
||||
"description": "Latest pass percentage across required gate checks in the daily freshness window. 100% means tests and supporting gates recently passed; missing means no fresh gate data."
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"type": "bargauge",
|
||||
"title": "Run Reliability by Suite (24h)",
|
||||
"title": "CI Run Success by Suite (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -632,7 +632,7 @@ data:
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Rolling quality-gate pass rate. This can stay low after failed/debug runs even when Current Gate Health is green."
|
||||
"description": "24h CI run success rate. This can stay low after failed, aborted, or debug runs even when the latest gate checks are green."
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
@ -823,7 +823,7 @@ data:
|
||||
{
|
||||
"id": 500,
|
||||
"type": "row",
|
||||
"title": "Reliability And Run History",
|
||||
"title": "CI Runs And Test History",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@ -835,8 +835,8 @@ data:
|
||||
{
|
||||
"id": 11,
|
||||
"type": "state-timeline",
|
||||
"title": "Run Reliability by Suite (7d rolling)",
|
||||
"description": "Seven-day rolling quality-gate pass rate per suite. Each suite gets its own lane, so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"title": "CI Run Success by Suite (7d rolling)",
|
||||
"description": "Seven-day rolling CI run success rate per suite. Each suite gets its own lane, so failed or aborted runs lower the lane color without creating unreadable 0/100 spikes.",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1308,7 +1308,7 @@ data:
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Recent run, coverage, LOC, and category trends for selected suites."
|
||||
"description": "Recent CI run, coverage, LOC, and test-category trends for selected suites."
|
||||
},
|
||||
{
|
||||
"id": 501,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user