monitoring(testing): derive gate health from raw checks
This commit is contained in:
parent
813d057c6d
commit
ba9b72312a
@ -627,6 +627,49 @@ PLATFORM_TEST_ACTIVE_SUITES_24H = (
|
||||
PLATFORM_TEST_POINT_WINDOW = "1h"
|
||||
PLATFORM_TEST_FRESH_WINDOW = "30h"
|
||||
PLATFORM_TEST_LATEST_WINDOW = "30d"
|
||||
|
||||
|
||||
def platform_check_status_expr(
|
||||
suite_matcher: str,
|
||||
*,
|
||||
branch_matcher: str = 'branch!=""',
|
||||
check_matcher: str = 'check!=""',
|
||||
status_matcher: str = 'status!=""',
|
||||
window: str = PLATFORM_TEST_FRESH_WINDOW,
|
||||
) -> str:
|
||||
"""Return recent check gauges normalized to a status label."""
|
||||
result_matcher = status_matcher.replace("status", "result", 1)
|
||||
base = (
|
||||
f'__name__=~".*_quality_gate_checks_total",{suite_matcher},'
|
||||
f'{PLATFORM_TEST_EXPORT_FILTER},{check_matcher}'
|
||||
)
|
||||
build_info = (
|
||||
f'max by (suite, branch) (last_over_time(platform_quality_gate_build_info'
|
||||
f'{{{suite_matcher},{PLATFORM_TEST_EXPORT_FILTER},{branch_matcher}}}[{window}]))'
|
||||
)
|
||||
with_status = (
|
||||
f'last_over_time({{{base},{branch_matcher},{status_matcher}}}[{window}])'
|
||||
)
|
||||
with_result = (
|
||||
f'label_replace(last_over_time({{{base},{branch_matcher},{result_matcher},status=""}}'
|
||||
f'[{window}]), "status", "$1", "result", "(.*)")'
|
||||
)
|
||||
inferred_status = (
|
||||
f'(last_over_time({{{base},branch="",{status_matcher}}}[{window}]) '
|
||||
f'* on (suite) group_left(branch) ({build_info}))'
|
||||
)
|
||||
inferred_result = (
|
||||
f'(label_replace(last_over_time({{{base},branch="",{result_matcher},status=""}}'
|
||||
f'[{window}]), "status", "$1", "result", "(.*)") '
|
||||
f'* on (suite) group_left(branch) ({build_info}))'
|
||||
)
|
||||
return (
|
||||
f"sum by (suite, branch, check, status) ("
|
||||
f"{with_status} or {with_result} or {inferred_status} or {inferred_result}"
|
||||
f")"
|
||||
)
|
||||
|
||||
|
||||
PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [
|
||||
{
|
||||
"refId": chr(ord("A") + index),
|
||||
@ -679,18 +722,26 @@ PLATFORM_TEST_CHECKS_SELECTOR = (
|
||||
f"{PLATFORM_TEST_EXPORT_FILTER}"
|
||||
)
|
||||
PLATFORM_TEST_CHECK_ROLLUP_MATCHERS = (
|
||||
f'suite=~"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}",branch!="",check!=""'
|
||||
f'suite=~"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}"'
|
||||
)
|
||||
# This recording rule already folds recent scrape freshness into the current sample.
|
||||
# Do not wrap it in a multi-hour range for "latest" panels, or old failures linger.
|
||||
PLATFORM_TEST_CHECK_ROLLUP_SELECTOR = (
|
||||
f'platform_quality:check_status:present_1h{{{PLATFORM_TEST_CHECK_ROLLUP_MATCHERS},status!=""}}'
|
||||
PLATFORM_TEST_PRIMARY_BRANCH_MATCHER = 'branch=~"main|master|origin/main|origin/master"'
|
||||
PLATFORM_TEST_CHECK_ROLLUP_SELECTOR = platform_check_status_expr(
|
||||
PLATFORM_TEST_CHECK_ROLLUP_MATCHERS,
|
||||
branch_matcher=PLATFORM_TEST_PRIMARY_BRANCH_MATCHER,
|
||||
)
|
||||
PLATFORM_TEST_CHECK_ROLLUP_OK_SELECTOR = (
|
||||
f'platform_quality:check_status:present_1h{{{PLATFORM_TEST_CHECK_ROLLUP_MATCHERS},status=~"{PLATFORM_TEST_NON_FAILURE_STATUS}"}}'
|
||||
platform_check_status_expr(
|
||||
PLATFORM_TEST_CHECK_ROLLUP_MATCHERS,
|
||||
branch_matcher=PLATFORM_TEST_PRIMARY_BRANCH_MATCHER,
|
||||
status_matcher=f'status=~"{PLATFORM_TEST_NON_FAILURE_STATUS}"',
|
||||
)
|
||||
)
|
||||
PLATFORM_TEST_CHECK_ROLLUP_FAILED_SELECTOR = (
|
||||
f'platform_quality:check_status:present_1h{{{PLATFORM_TEST_CHECK_ROLLUP_MATCHERS},status!~"{PLATFORM_TEST_NON_FAILURE_STATUS}"}}'
|
||||
platform_check_status_expr(
|
||||
PLATFORM_TEST_CHECK_ROLLUP_MATCHERS,
|
||||
branch_matcher=PLATFORM_TEST_PRIMARY_BRANCH_MATCHER,
|
||||
status_matcher=f'status!~"{PLATFORM_TEST_NON_FAILURE_STATUS}"',
|
||||
)
|
||||
)
|
||||
PLATFORM_TEST_CHECK_ROLLUP_SEEN_FLAGS = (
|
||||
f'clamp_max(max by (suite, check) (({PLATFORM_TEST_CHECK_ROLLUP_SELECTOR}) > 0), 1)'
|
||||
@ -3776,9 +3827,11 @@ def build_jobs_dashboard():
|
||||
check_matcher: str = 'check!=""',
|
||||
status_matcher: str = 'status!=""',
|
||||
) -> str:
|
||||
selector = (
|
||||
f'platform_quality:check_status:present_1h{{suite=~"{suite_var}",branch!="",'
|
||||
f'branch=~"{branch_var}",{check_matcher},{status_matcher}}}'
|
||||
selector = platform_check_status_expr(
|
||||
f'suite=~"{suite_var}"',
|
||||
branch_matcher=f'branch!="",branch=~"{branch_var}"',
|
||||
check_matcher=check_matcher,
|
||||
status_matcher=status_matcher,
|
||||
)
|
||||
return selector
|
||||
|
||||
|
||||
@ -227,8 +227,11 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability():
|
||||
|
||||
current_gate_expr = panels_by_title["Latest Gate Checks Passing by Suite"]["targets"][0]["expr"]
|
||||
assert 'check)' in current_gate_expr
|
||||
assert "platform_quality:check_status:present_1h" in current_gate_expr
|
||||
assert "platform_quality_gate_checks_total" not in current_gate_expr
|
||||
assert "platform_quality:check_status:present_1h" not in current_gate_expr
|
||||
assert '.*_quality_gate_checks_total' in current_gate_expr
|
||||
assert "last_over_time" in current_gate_expr
|
||||
assert 'label_replace' in current_gate_expr
|
||||
assert 'result=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
|
||||
assert 'status=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
|
||||
assert 'status!~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
|
||||
assert "unless on(suite, check)" in current_gate_expr
|
||||
@ -337,8 +340,10 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
||||
assert failure_rate_panel["fieldConfig"]["defaults"]["max"] == 100
|
||||
assert failure_rate_panel["fieldConfig"]["defaults"]["thresholds"]["steps"][0]["color"] == "dark-blue"
|
||||
assert "increase(" not in failure_rate_panel["targets"][0]["expr"]
|
||||
assert "platform_quality:check_status:present_1h" in failure_rate_panel["targets"][0]["expr"]
|
||||
assert "platform_quality_gate_checks_total" not in failure_rate_panel["targets"][0]["expr"]
|
||||
assert "platform_quality:check_status:present_1h" not in failure_rate_panel["targets"][0]["expr"]
|
||||
assert '.*_quality_gate_checks_total' in failure_rate_panel["targets"][0]["expr"]
|
||||
assert "last_over_time" in failure_rate_panel["targets"][0]["expr"]
|
||||
assert 'label_replace' in failure_rate_panel["targets"][0]["expr"]
|
||||
assert "0 *" in failure_rate_panel["targets"][0]["expr"]
|
||||
assert "and on(suite)" not in failure_rate_panel["targets"][0]["expr"]
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user