monitoring: tune testing dashboard and gate rollups

This commit is contained in:
jenkins 2026-05-15 14:26:06 -03:00
parent 0c11a64d25
commit 6adbe457c4
8 changed files with 2853 additions and 2903 deletions

View File

@ -31,6 +31,7 @@
"id": "KSV-0014", "id": "KSV-0014",
"targets": [ "targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml", "infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml", "infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml", "infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml", "infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
@ -98,6 +99,7 @@
"services/keycloak/oneoffs/ldap-federation-job.yaml", "services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml", "services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml", "services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml", "services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml", "services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml", "services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
@ -152,6 +154,7 @@
"services/monitoring/platform-quality-gateway-deployment.yaml", "services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml", "services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml", "services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml", "services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud-mail-sync/cronjob.yaml", "services/nextcloud-mail-sync/cronjob.yaml",
"services/nextcloud/collabora.yaml", "services/nextcloud/collabora.yaml",
@ -247,6 +250,7 @@
"targets": [ "targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml", "infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/coredns-deployment.yaml", "infrastructure/core/coredns-deployment.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml", "infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml", "infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml", "infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
@ -310,6 +314,7 @@
"services/keycloak/oneoffs/ldap-federation-job.yaml", "services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml", "services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml", "services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml", "services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml", "services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml", "services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
@ -364,6 +369,7 @@
"services/monitoring/platform-quality-gateway-deployment.yaml", "services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml", "services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml", "services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml", "services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud/collabora.yaml", "services/nextcloud/collabora.yaml",
"services/oauth2-proxy/deployment.yaml", "services/oauth2-proxy/deployment.yaml",

View File

@ -3346,9 +3346,10 @@ def build_jobs_dashboard():
f'/ clamp_min(sum by (suite) (max by (suite, check) (({{{checks_selector}}} > bool 0))), 1)) ' f'/ clamp_min(sum by (suite) (max by (suite, check) (({{{checks_selector}}} > bool 0))), 1)) '
f'or on(suite) ({selected_suite_zero})' f'or on(suite) ({selected_suite_zero})'
) )
success_history_runs = f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))'
success_history_by_suite = ( success_history_by_suite = (
f'100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) ' f'(100 * sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) '
f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))), 1))' f'/ ({success_history_runs})) and on(suite) (({success_history_runs}) > 0)'
) )
coverage_by_suite = ( coverage_by_suite = (
f'(max by (suite) ({{{coverage_metric_selector}}})) ' f'(max by (suite) ({{{coverage_metric_selector}}})) '
@ -3357,7 +3358,6 @@ def build_jobs_dashboard():
coverage_with_missing = ( coverage_with_missing = (
f"({coverage_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)" f"({coverage_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
) )
coverage_gap = f"clamp_min(95 - ({coverage_by_suite}), 0)"
smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})' smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})'
loc_files_by_suite = f'max by (suite) (platform_quality_gate_source_files_total{{{smell_selector}}})' loc_files_by_suite = f'max by (suite) (platform_quality_gate_source_files_total{{{smell_selector}}})'
smell_with_missing = ( smell_with_missing = (
@ -3384,6 +3384,14 @@ def build_jobs_dashboard():
f"or on(suite) (100 * (1 - clamp_max(({loc_violations_history}), 1))) " f"or on(suite) (100 * (1 - clamp_max(({loc_violations_history}), 1))) "
f"or on(suite) ({selected_suite_zero})" f"or on(suite) ({selected_suite_zero})"
) )
coverage_loc_compliance_history = (
"min by (suite) ("
f'label_replace(max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval]), '
'"dimension", "coverage", "__name__", ".*") '
"or "
f'label_replace(({loc_limit_compliance_history}), "dimension", "loc", "__name__", ".*")'
")"
)
average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))" average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))"
suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))' suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))'
@ -3397,39 +3405,36 @@ def build_jobs_dashboard():
def _check_state_series(regex: str, failed: bool) -> str: def _check_state_series(regex: str, failed: bool) -> str:
state = f'result!~"{non_failure}"' if failed else f'result=~"{success}"' state = f'result!~"{non_failure}"' if failed else f'result=~"{success}"'
core = ( core = f'sum by (suite) (increase({{{checks_selector},check=~"{regex}",{state}}}[$__interval]))'
f'sum by (suite) (max_over_time(({{{checks_selector},check=~"{regex}",{state}}})[$__interval]))'
)
return f'({core}) or on(suite) ({selected_suite_zero})' return f'({core}) or on(suite) ({selected_suite_zero})'
problematic_tests_history_core = ( problematic_tests_history_core = (
f'topk(12, sum by (suite, test, jenkins_job) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[$__interval])))' f'topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed"}}))'
) )
problematic_tests_history = f"({problematic_tests_history_core}) or on() vector(0)" problematic_tests_history = f"({problematic_tests_history_core}) or on() vector(0)"
worst_test_per_suite_core = ( worst_test_per_suite_core = (
f'topk by (suite) (1, sum by (suite, test, jenkins_job) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))' f'topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))'
) )
worst_test_per_suite = f"({worst_test_per_suite_core}) or on() vector(0)" worst_test_per_suite = f"({worst_test_per_suite_core}) or on() vector(0)"
selected_test_pass_fail = [ selected_test_pass_fail = [
{ {
"refId": "A", "refId": "A",
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="passed"}}[$__interval])) or on() vector(0)', "expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="passed"}}) or on() vector(0)',
"legendFormat": "passed · {{suite}} · #{{build_number}}", "legendFormat": "{{suite}} passed",
}, },
{ {
"refId": "B", "refId": "B",
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="failed"}}[$__interval])) or on() vector(0)', "expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="failed"}}) or on() vector(0)',
"legendFormat": "failed · {{suite}} · #{{build_number}}", "legendFormat": "{{suite}} failed",
}, },
{ {
"refId": "C", "refId": "C",
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="skipped"}}[$__interval])) or on() vector(0)', "expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="skipped"}}) or on() vector(0)',
"legendFormat": "skipped · {{suite}} · #{{build_number}}", "legendFormat": "{{suite}} skipped",
}, },
] ]
selected_test_pass_rate = ( selected_test_pass_rate = (
f'100 * (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="passed"}}[$__interval]))) ' f'avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__"}})'
f'/ clamp_min((sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status=~"passed|failed|error|skipped"}}[$__interval]))), 1)'
) )
recent_branch_evidence = ( recent_branch_evidence = (
f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))' f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))'
@ -3442,70 +3447,70 @@ def build_jobs_dashboard():
missing = f"(({suite_universe}) unless on(suite) {presence_expr})" missing = f"(({suite_universe}) unless on(suite) {presence_expr})"
return f"({missing}) or on(suite) (0 * ({suite_universe}))" return f"({missing}) or on(suite) (0 * ({suite_universe}))"
missing_tests_by_suite = _missing_suite_series( def _present_suite_percent(presence_expr: str) -> str:
present = f"(({suite_universe}) and on(suite) {presence_expr})"
return f"(100 * ({present})) or on(suite) (0 * ({suite_universe}))"
present_tests_by_suite = _present_suite_percent(
f'count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}})' f'count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}})'
) )
missing_checks_by_suite = _missing_suite_series( present_checks_by_suite = _present_suite_percent(
f'count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}})' f'count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}})'
) )
missing_coverage_by_suite = _missing_suite_series( present_coverage_by_suite = _present_suite_percent(
f"count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}})" f"count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}})"
) )
missing_loc_by_suite = _missing_suite_series( present_loc_by_suite = _present_suite_percent(
f"count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}) " f"count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}) "
f"and on(suite) count by (suite) (platform_quality_gate_source_files_total{{{exported}}})" f"and on(suite) count by (suite) (platform_quality_gate_source_files_total{{{exported}}})"
) )
missing_test_case_by_suite = _missing_suite_series( present_test_case_by_suite = _present_suite_percent(
f"count by (suite) (platform_quality_gate_test_case_result{{{exported}}})" f"count by (suite) (platform_quality_gate_test_case_result{{{exported}}})"
) )
placeholder_test_case_by_suite = _missing_suite_series( real_test_case_by_suite = _present_suite_percent(
f'count by (suite) (platform_quality_gate_test_case_result{{{exported},test!="__no_test_cases__"}})' f'count by (suite) (platform_quality_gate_test_case_result{{{exported},test!="__no_test_cases__"}})'
) )
dark_red = "dark-red"
dark_orange = "dark-orange"
dark_yellow = "dark-yellow"
dark_green = "dark-green"
dark_blue = "dark-blue"
success_thresholds = { success_thresholds = {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "red", "value": None}, {"color": dark_red, "value": None},
{"color": "orange", "value": 90}, {"color": dark_orange, "value": 90},
{"color": "yellow", "value": 93}, {"color": dark_yellow, "value": 93},
{"color": "green", "value": 95}, {"color": dark_green, "value": 95},
{"color": "blue", "value": 100}, {"color": dark_blue, "value": 100},
], ],
} }
coverage_thresholds = success_thresholds coverage_thresholds = success_thresholds
failures_thresholds = { failures_thresholds = {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "green", "value": None}, {"color": dark_green, "value": None},
{"color": "yellow", "value": 1}, {"color": dark_yellow, "value": 1},
{"color": "orange", "value": 3}, {"color": dark_orange, "value": 3},
{"color": "red", "value": 5}, {"color": dark_red, "value": 5},
],
}
coverage_gap_thresholds = {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 5},
{"color": "red", "value": 10},
], ],
} }
smell_thresholds = { smell_thresholds = {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "red", "value": None}, {"color": dark_red, "value": None},
{"color": "green", "value": 0}, {"color": dark_green, "value": 0},
{"color": "yellow", "value": 1}, {"color": dark_yellow, "value": 1},
{"color": "orange", "value": 3}, {"color": dark_orange, "value": 3},
{"color": "red", "value": 5}, {"color": dark_red, "value": 5},
], ],
} }
missing_thresholds = { missing_thresholds = {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "green", "value": None}, {"color": dark_green, "value": None},
{"color": "red", "value": 1}, {"color": dark_red, "value": 1},
], ],
} }
@ -3554,7 +3559,7 @@ def build_jobs_dashboard():
instant=True, instant=True,
thresholds={ thresholds={
"mode": "absolute", "mode": "absolute",
"steps": [{"color": "red", "value": None}, {"color": "green", "value": 1}], "steps": [{"color": dark_red, "value": None}, {"color": dark_green, "value": 1}],
}, },
) )
) )
@ -3620,21 +3625,6 @@ def build_jobs_dashboard():
{"type": "value", "options": {"-1": {"text": "no runs"}}} {"type": "value", "options": {"-1": {"text": "no runs"}}}
] ]
panels.append(reliability_suite_panel) panels.append(reliability_suite_panel)
coverage_gap_panel = bargauge_panel(
10,
"Coverage Gap to 95% by Suite",
coverage_gap,
{"h": 8, "w": 8, "x": 16, "y": 5},
unit="percent",
instant=True,
legend="{{suite}}",
sort_order="desc",
thresholds=coverage_gap_thresholds,
decimals=2,
)
coverage_gap_panel["description"] = "Gap from the 95% target. 0 means the suite is at or above target."
panels.append(coverage_gap_panel)
history_panel = timeseries_panel( history_panel = timeseries_panel(
11, 11,
"Run Reliability History by Suite", "Run Reliability History by Suite",
@ -3676,15 +3666,10 @@ def build_jobs_dashboard():
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[$__interval])) or on() vector(0)', "expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[$__interval])) or on() vector(0)',
"legendFormat": "Failure", "legendFormat": "Failure",
}, },
{
"refId": "C",
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval])) or on() vector(0)',
"legendFormat": "Total",
},
], ],
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
legend_calcs=["lastNotNull", "sum"], legend_calcs=[],
) )
) )
panels.append( panels.append(
@ -3697,18 +3682,13 @@ def build_jobs_dashboard():
targets=[ targets=[
{ {
"refId": "A", "refId": "A",
"expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval])', "expr": coverage_loc_compliance_history,
"legendFormat": "{{suite}} coverage %", "legendFormat": "{{suite}}",
},
{
"refId": "B",
"expr": loc_limit_compliance_history,
"legendFormat": "{{suite}} files <=500 LOC %",
}, },
], ],
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
legend_calcs=["lastNotNull", "max"], legend_calcs=[],
) )
) )
run_mix_panel = pie_panel( run_mix_panel = pie_panel(
@ -3742,12 +3722,14 @@ def build_jobs_dashboard():
legend="{{suite}}", legend="{{suite}}",
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
legend_calcs=["lastNotNull", "max"], legend_calcs=[],
) )
panel["description"] = ( panel["description"] = (
"One line per selected suite. 1 means this check dimension was in that state during the bucket; " "One line per selected suite, counting check events in this state during each bucket. "
"0 means the suite reported the dimension and it was not in that state." "Intervals without runs stay at zero rather than being treated as failures."
) )
panel["fieldConfig"]["defaults"]["min"] = 0
panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True
panels.append(panel) panels.append(panel)
for index, (label, regex) in enumerate(check_dimensions[4:]): for index, (label, regex) in enumerate(check_dimensions[4:]):
panel = timeseries_panel( panel = timeseries_panel(
@ -3759,12 +3741,14 @@ def build_jobs_dashboard():
legend="{{suite}}", legend="{{suite}}",
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
legend_calcs=["lastNotNull", "max"], legend_calcs=[],
) )
panel["description"] = ( panel["description"] = (
"One line per selected suite. 1 means this check dimension was in that state during the bucket; " "One line per selected suite, counting check events in this state during each bucket. "
"0 means the suite reported the dimension and it was not in that state." "Intervals without runs stay at zero rather than being treated as failures."
) )
panel["fieldConfig"]["defaults"]["min"] = 0
panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True
panels.append(panel) panels.append(panel)
_append_check_trends(130, "Failure Trend", True, 29) _append_check_trends(130, "Failure Trend", True, 29)
@ -3776,10 +3760,10 @@ def build_jobs_dashboard():
problematic_tests_history, problematic_tests_history,
{"h": 8, "w": 12, "x": 0, "y": 57}, {"h": 8, "w": 12, "x": 0, "y": 57},
unit="none", unit="none",
legend="{{suite}} · {{test}}", legend="{{suite}} - {{test}}",
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="right",
legend_calcs=["lastNotNull", "max", "sum"], legend_calcs=[],
links=jenkins_suite_links(), links=jenkins_suite_links(),
data_links=jenkins_latest_artifact_data_links(), data_links=jenkins_latest_artifact_data_links(),
) )
@ -3810,7 +3794,7 @@ def build_jobs_dashboard():
targets=selected_test_pass_fail, targets=selected_test_pass_fail,
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
legend_calcs=["lastNotNull", "sum"], legend_calcs=[],
links=jenkins_suite_links(), links=jenkins_suite_links(),
data_links=jenkins_artifact_data_links(), data_links=jenkins_artifact_data_links(),
) )
@ -3821,10 +3805,10 @@ def build_jobs_dashboard():
selected_test_pass_rate, selected_test_pass_rate,
{"h": 8, "w": 12, "x": 12, "y": 65}, {"h": 8, "w": 12, "x": 12, "y": 65},
unit="percent", unit="percent",
legend="{{suite}} · {{test}}", legend="{{suite}}",
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
legend_calcs=["lastNotNull", "min"], legend_calcs=[],
links=jenkins_suite_links(), links=jenkins_suite_links(),
data_links=jenkins_artifact_data_links(), data_links=jenkins_artifact_data_links(),
) )
@ -3871,56 +3855,56 @@ def build_jobs_dashboard():
panels.append( panels.append(
bargauge_panel( bargauge_panel(
27, 27,
"Missing Tests Metrics by Suite", "Tests Metrics Present by Suite",
missing_tests_by_suite, present_tests_by_suite,
{"h": 7, "w": 6, "x": 0, "y": 81}, {"h": 7, "w": 6, "x": 0, "y": 81},
unit="none", unit="percent",
instant=True, instant=True,
legend="{{suite}}", legend="{{suite}}",
sort_order="desc", sort_order="desc",
thresholds=missing_thresholds, thresholds=success_thresholds,
decimals=0, decimals=0,
) )
) )
panels.append( panels.append(
bargauge_panel( bargauge_panel(
28, 28,
"Missing Checks Metrics by Suite", "Checks Metrics Present by Suite",
missing_checks_by_suite, present_checks_by_suite,
{"h": 7, "w": 6, "x": 6, "y": 81}, {"h": 7, "w": 6, "x": 6, "y": 81},
unit="none", unit="percent",
instant=True, instant=True,
legend="{{suite}}", legend="{{suite}}",
sort_order="desc", sort_order="desc",
thresholds=missing_thresholds, thresholds=success_thresholds,
decimals=0, decimals=0,
) )
) )
panels.append( panels.append(
bargauge_panel( bargauge_panel(
29, 29,
"Missing Coverage Metrics by Suite", "Coverage Metrics Present by Suite",
missing_coverage_by_suite, present_coverage_by_suite,
{"h": 7, "w": 6, "x": 12, "y": 81}, {"h": 7, "w": 6, "x": 12, "y": 81},
unit="none", unit="percent",
instant=True, instant=True,
legend="{{suite}}", legend="{{suite}}",
sort_order="desc", sort_order="desc",
thresholds=missing_thresholds, thresholds=success_thresholds,
decimals=0, decimals=0,
) )
) )
panels.append( panels.append(
bargauge_panel( bargauge_panel(
30, 30,
"Missing LOC Compliance Metrics by Suite", "LOC Compliance Metrics Present by Suite",
missing_loc_by_suite, present_loc_by_suite,
{"h": 7, "w": 6, "x": 18, "y": 81}, {"h": 7, "w": 6, "x": 18, "y": 81},
unit="none", unit="percent",
instant=True, instant=True,
legend="{{suite}}", legend="{{suite}}",
sort_order="desc", sort_order="desc",
thresholds=missing_thresholds, thresholds=success_thresholds,
decimals=0, decimals=0,
) )
) )
@ -3935,8 +3919,8 @@ def build_jobs_dashboard():
thresholds={ thresholds={
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "red", "value": None}, {"color": dark_red, "value": None},
{"color": "green", "value": 1}, {"color": dark_green, "value": 1},
], ],
}, },
) )
@ -3988,28 +3972,28 @@ def build_jobs_dashboard():
panels.append( panels.append(
bargauge_panel( bargauge_panel(
148, 148,
"Missing Test-Case Metrics by Suite", "Test-Case Metrics Present by Suite",
missing_test_case_by_suite, present_test_case_by_suite,
{"h": 6, "w": 12, "x": 0, "y": 94}, {"h": 6, "w": 12, "x": 0, "y": 94},
unit="none", unit="percent",
instant=True, instant=True,
legend="{{suite}}", legend="{{suite}}",
sort_order="desc", sort_order="desc",
thresholds=missing_thresholds, thresholds=success_thresholds,
decimals=0, decimals=0,
) )
) )
panels.append( panels.append(
bargauge_panel( bargauge_panel(
151, 151,
"No Real Test Cases by Suite", "Real Test Cases Present by Suite",
placeholder_test_case_by_suite, real_test_case_by_suite,
{"h": 6, "w": 12, "x": 12, "y": 94}, {"h": 6, "w": 12, "x": 12, "y": 94},
unit="none", unit="percent",
instant=True, instant=True,
legend="{{suite}}", legend="{{suite}}",
sort_order="desc", sort_order="desc",
thresholds=missing_thresholds, thresholds=success_thresholds,
decimals=0, decimals=0,
) )
) )
@ -4055,9 +4039,8 @@ def build_jobs_dashboard():
5: {"h": 4, "w": 4, "x": 12, "y": 0}, 5: {"h": 4, "w": 4, "x": 12, "y": 0},
6: {"h": 4, "w": 4, "x": 16, "y": 0}, 6: {"h": 4, "w": 4, "x": 16, "y": 0},
7: {"h": 4, "w": 4, "x": 20, "y": 0}, 7: {"h": 4, "w": 4, "x": 20, "y": 0},
8: {"h": 7, "w": 8, "x": 0, "y": 4}, 8: {"h": 7, "w": 12, "x": 0, "y": 4},
9: {"h": 7, "w": 8, "x": 8, "y": 4}, 9: {"h": 7, "w": 12, "x": 12, "y": 4},
10: {"h": 7, "w": 8, "x": 16, "y": 4},
17: {"h": 7, "w": 12, "x": 0, "y": 11}, 17: {"h": 7, "w": 12, "x": 0, "y": 11},
18: {"h": 7, "w": 12, "x": 12, "y": 11}, 18: {"h": 7, "w": 12, "x": 12, "y": 11},
} }
@ -4070,6 +4053,24 @@ def build_jobs_dashboard():
def children(ids): def children(ids):
return [panel_by_id[panel_id] for panel_id in ids] return [panel_by_id[panel_id] for panel_id in ids]
row_layout = {
11: {"h": 8, "w": 12, "x": 0, "y": 19},
12: {"h": 8, "w": 12, "x": 12, "y": 19},
13: {"h": 8, "w": 12, "x": 0, "y": 27},
14: {"h": 8, "w": 12, "x": 12, "y": 27},
145: {"h": 10, "w": 24, "x": 0, "y": 63},
147: {"h": 8, "w": 24, "x": 0, "y": 74},
146: {"h": 8, "w": 12, "x": 0, "y": 83},
152: {"h": 8, "w": 12, "x": 12, "y": 83},
31: {"h": 6, "w": 4, "x": 0, "y": 111},
32: {"h": 6, "w": 4, "x": 4, "y": 111},
33: {"h": 6, "w": 4, "x": 8, "y": 111},
34: {"h": 6, "w": 6, "x": 12, "y": 111},
35: {"h": 6, "w": 6, "x": 18, "y": 111},
}
for panel_id, grid in row_layout.items():
panel_by_id[panel_id]["gridPos"] = grid
compact_panels.extend( compact_panels.extend(
[ [
row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])), row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])),
@ -4093,9 +4094,15 @@ def build_jobs_dashboard():
), ),
row_panel( row_panel(
504, 504,
"Telemetry Completeness, SonarQube, And Branches", "Telemetry Completeness And Branches",
22, 22,
panels=children([27, 28, 29, 30, 31, 32, 33, 34, 35, 148, 151, 149, 150]), panels=children([27, 28, 29, 30, 148, 151, 149, 150]),
),
row_panel(
505,
"SonarQube Project Health",
23,
panels=children([31, 32, 33, 34, 35]),
), ),
] ]
) )

View File

@ -133,8 +133,8 @@ def test_jobs_dashboard_bar_gauges_use_solid_threshold_colors():
) )
threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"] threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"]
assert {"color": "yellow", "value": 93} in threshold_steps assert {"color": "dark-yellow", "value": 93} in threshold_steps
assert {"color": "blue", "value": 100} in threshold_steps assert {"color": "dark-blue", "value": 100} in threshold_steps
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint(): def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
@ -150,19 +150,24 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
} }
assert len(panels) == 16 assert len(panels) == 16
assert len(visible_query_panels) == 11 assert len(visible_query_panels) == 10
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11 assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 10
assert all(
panel["title"] != "Coverage Gap to 95% by Suite"
for panel in visible_query_panels
)
assert [row["title"] for row in rows] == [ assert [row["title"] for row in rows] == [
"Reliability And Run History", "Reliability And Run History",
"Failure Trends By Check", "Failure Trends By Check",
"Success Trends By Check", "Success Trends By Check",
"Test Drilldowns And Problem Tests", "Test Drilldowns And Problem Tests",
"Telemetry Completeness, SonarQube, And Branches", "Telemetry Completeness And Branches",
"SonarQube Project Health",
] ]
assert all(row["collapsed"] for row in rows) assert all(row["collapsed"] for row in rows)
assert "Failure Trend: Coverage" in nested_panels_by_title assert "Failure Trend: Coverage" in nested_panels_by_title
assert "Success Trend: Supply Chain" in nested_panels_by_title assert "Success Trend: Supply Chain" in nested_panels_by_title
assert "Selected Test Pass Rate History" in nested_panels_by_title assert "Selected Test Pass Rate History" in nested_panels_by_title
assert "Missing Coverage Metrics by Suite" in nested_panels_by_title assert "Coverage Metrics Present by Suite" in nested_panels_by_title
assert "SonarQube API Up" in nested_panels_by_title assert "SonarQube API Up" in nested_panels_by_title

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -139,6 +139,34 @@ data:
labels: labels:
scope: atlas scope: atlas
rollup: yearly rollup: yearly
platform-quality.yaml: |
groups:
- name: platform.quality
interval: 15m
rules:
- record: platform_quality:test_case_status:count_1h
expr: |
sum by (suite, branch, test, status) (
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__"}[1h])
)
labels:
rollup: hourly
- record: platform_quality:test_case_pass_rate:percent_1h
expr: |
100 * (
sum by (suite, branch, test) (
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status="passed"}[1h])
)
)
/
clamp_min(
sum by (suite, branch, test) (
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status=~"passed|failed|error|skipped"}[1h])
),
1
)
labels:
rollup: hourly
--- ---
@ -168,7 +196,7 @@ spec:
labels: labels:
app: vmalert-atlas-availability app: vmalert-atlas-availability
annotations: annotations:
bstein.dev/rules-revision: "2026-05-10-availability-rollup-v6" bstein.dev/rules-revision: "2026-05-15-platform-quality-rollups-v1"
spec: spec:
serviceAccountName: vmalert-atlas-availability serviceAccountName: vmalert-atlas-availability
affinity: affinity: