monitoring: tune testing dashboard and gate rollups
This commit is contained in:
parent
0c11a64d25
commit
6adbe457c4
@ -31,6 +31,7 @@
|
||||
"id": "KSV-0014",
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
||||
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
|
||||
"infrastructure/core/ntp-sync-daemonset.yaml",
|
||||
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
||||
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
||||
@ -98,6 +99,7 @@
|
||||
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
||||
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
||||
@ -152,6 +154,7 @@
|
||||
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
||||
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
||||
"services/monitoring/postmark-exporter-deployment.yaml",
|
||||
"services/monitoring/vmalert-atlas-availability.yaml",
|
||||
"services/monitoring/vault-sync-deployment.yaml",
|
||||
"services/nextcloud-mail-sync/cronjob.yaml",
|
||||
"services/nextcloud/collabora.yaml",
|
||||
@ -247,6 +250,7 @@
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
||||
"infrastructure/core/coredns-deployment.yaml",
|
||||
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
|
||||
"infrastructure/core/ntp-sync-daemonset.yaml",
|
||||
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
||||
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
||||
@ -310,6 +314,7 @@
|
||||
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
||||
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
||||
@ -364,6 +369,7 @@
|
||||
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
||||
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
||||
"services/monitoring/postmark-exporter-deployment.yaml",
|
||||
"services/monitoring/vmalert-atlas-availability.yaml",
|
||||
"services/monitoring/vault-sync-deployment.yaml",
|
||||
"services/nextcloud/collabora.yaml",
|
||||
"services/oauth2-proxy/deployment.yaml",
|
||||
|
||||
@ -3346,9 +3346,10 @@ def build_jobs_dashboard():
|
||||
f'/ clamp_min(sum by (suite) (max by (suite, check) (({{{checks_selector}}} > bool 0))), 1)) '
|
||||
f'or on(suite) ({selected_suite_zero})'
|
||||
)
|
||||
success_history_runs = f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))'
|
||||
success_history_by_suite = (
|
||||
f'100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) '
|
||||
f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))), 1))'
|
||||
f'(100 * sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) '
|
||||
f'/ ({success_history_runs})) and on(suite) (({success_history_runs}) > 0)'
|
||||
)
|
||||
coverage_by_suite = (
|
||||
f'(max by (suite) ({{{coverage_metric_selector}}})) '
|
||||
@ -3357,7 +3358,6 @@ def build_jobs_dashboard():
|
||||
coverage_with_missing = (
|
||||
f"({coverage_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
|
||||
)
|
||||
coverage_gap = f"clamp_min(95 - ({coverage_by_suite}), 0)"
|
||||
smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})'
|
||||
loc_files_by_suite = f'max by (suite) (platform_quality_gate_source_files_total{{{smell_selector}}})'
|
||||
smell_with_missing = (
|
||||
@ -3384,6 +3384,14 @@ def build_jobs_dashboard():
|
||||
f"or on(suite) (100 * (1 - clamp_max(({loc_violations_history}), 1))) "
|
||||
f"or on(suite) ({selected_suite_zero})"
|
||||
)
|
||||
coverage_loc_compliance_history = (
|
||||
"min by (suite) ("
|
||||
f'label_replace(max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval]), '
|
||||
'"dimension", "coverage", "__name__", ".*") '
|
||||
"or "
|
||||
f'label_replace(({loc_limit_compliance_history}), "dimension", "loc", "__name__", ".*")'
|
||||
")"
|
||||
)
|
||||
average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))"
|
||||
suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))'
|
||||
|
||||
@ -3397,39 +3405,36 @@ def build_jobs_dashboard():
|
||||
|
||||
def _check_state_series(regex: str, failed: bool) -> str:
|
||||
state = f'result!~"{non_failure}"' if failed else f'result=~"{success}"'
|
||||
core = (
|
||||
f'sum by (suite) (max_over_time(({{{checks_selector},check=~"{regex}",{state}}})[$__interval]))'
|
||||
)
|
||||
core = f'sum by (suite) (increase({{{checks_selector},check=~"{regex}",{state}}}[$__interval]))'
|
||||
return f'({core}) or on(suite) ({selected_suite_zero})'
|
||||
|
||||
problematic_tests_history_core = (
|
||||
f'topk(12, sum by (suite, test, jenkins_job) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[$__interval])))'
|
||||
f'topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed"}}))'
|
||||
)
|
||||
problematic_tests_history = f"({problematic_tests_history_core}) or on() vector(0)"
|
||||
worst_test_per_suite_core = (
|
||||
f'topk by (suite) (1, sum by (suite, test, jenkins_job) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))'
|
||||
f'topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))'
|
||||
)
|
||||
worst_test_per_suite = f"({worst_test_per_suite_core}) or on() vector(0)"
|
||||
selected_test_pass_fail = [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="passed"}}[$__interval])) or on() vector(0)',
|
||||
"legendFormat": "passed · {{suite}} · #{{build_number}}",
|
||||
"expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="passed"}}) or on() vector(0)',
|
||||
"legendFormat": "{{suite}} passed",
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="failed"}}[$__interval])) or on() vector(0)',
|
||||
"legendFormat": "failed · {{suite}} · #{{build_number}}",
|
||||
"expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="failed"}}) or on() vector(0)',
|
||||
"legendFormat": "{{suite}} failed",
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="skipped"}}[$__interval])) or on() vector(0)',
|
||||
"legendFormat": "skipped · {{suite}} · #{{build_number}}",
|
||||
"expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="skipped"}}) or on() vector(0)',
|
||||
"legendFormat": "{{suite}} skipped",
|
||||
},
|
||||
]
|
||||
selected_test_pass_rate = (
|
||||
f'100 * (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="passed"}}[$__interval]))) '
|
||||
f'/ clamp_min((sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status=~"passed|failed|error|skipped"}}[$__interval]))), 1)'
|
||||
f'avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__"}})'
|
||||
)
|
||||
recent_branch_evidence = (
|
||||
f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))'
|
||||
@ -3442,70 +3447,70 @@ def build_jobs_dashboard():
|
||||
missing = f"(({suite_universe}) unless on(suite) {presence_expr})"
|
||||
return f"({missing}) or on(suite) (0 * ({suite_universe}))"
|
||||
|
||||
missing_tests_by_suite = _missing_suite_series(
|
||||
def _present_suite_percent(presence_expr: str) -> str:
|
||||
present = f"(({suite_universe}) and on(suite) {presence_expr})"
|
||||
return f"(100 * ({present})) or on(suite) (0 * ({suite_universe}))"
|
||||
|
||||
present_tests_by_suite = _present_suite_percent(
|
||||
f'count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}})'
|
||||
)
|
||||
missing_checks_by_suite = _missing_suite_series(
|
||||
present_checks_by_suite = _present_suite_percent(
|
||||
f'count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}})'
|
||||
)
|
||||
missing_coverage_by_suite = _missing_suite_series(
|
||||
present_coverage_by_suite = _present_suite_percent(
|
||||
f"count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}})"
|
||||
)
|
||||
missing_loc_by_suite = _missing_suite_series(
|
||||
present_loc_by_suite = _present_suite_percent(
|
||||
f"count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}) "
|
||||
f"and on(suite) count by (suite) (platform_quality_gate_source_files_total{{{exported}}})"
|
||||
)
|
||||
missing_test_case_by_suite = _missing_suite_series(
|
||||
present_test_case_by_suite = _present_suite_percent(
|
||||
f"count by (suite) (platform_quality_gate_test_case_result{{{exported}}})"
|
||||
)
|
||||
placeholder_test_case_by_suite = _missing_suite_series(
|
||||
real_test_case_by_suite = _present_suite_percent(
|
||||
f'count by (suite) (platform_quality_gate_test_case_result{{{exported},test!="__no_test_cases__"}})'
|
||||
)
|
||||
|
||||
dark_red = "dark-red"
|
||||
dark_orange = "dark-orange"
|
||||
dark_yellow = "dark-yellow"
|
||||
dark_green = "dark-green"
|
||||
dark_blue = "dark-blue"
|
||||
success_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "orange", "value": 90},
|
||||
{"color": "yellow", "value": 93},
|
||||
{"color": "green", "value": 95},
|
||||
{"color": "blue", "value": 100},
|
||||
{"color": dark_red, "value": None},
|
||||
{"color": dark_orange, "value": 90},
|
||||
{"color": dark_yellow, "value": 93},
|
||||
{"color": dark_green, "value": 95},
|
||||
{"color": dark_blue, "value": 100},
|
||||
],
|
||||
}
|
||||
coverage_thresholds = success_thresholds
|
||||
failures_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 3},
|
||||
{"color": "red", "value": 5},
|
||||
],
|
||||
}
|
||||
coverage_gap_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 5},
|
||||
{"color": "red", "value": 10},
|
||||
{"color": dark_green, "value": None},
|
||||
{"color": dark_yellow, "value": 1},
|
||||
{"color": dark_orange, "value": 3},
|
||||
{"color": dark_red, "value": 5},
|
||||
],
|
||||
}
|
||||
smell_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "green", "value": 0},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 3},
|
||||
{"color": "red", "value": 5},
|
||||
{"color": dark_red, "value": None},
|
||||
{"color": dark_green, "value": 0},
|
||||
{"color": dark_yellow, "value": 1},
|
||||
{"color": dark_orange, "value": 3},
|
||||
{"color": dark_red, "value": 5},
|
||||
],
|
||||
}
|
||||
missing_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "red", "value": 1},
|
||||
{"color": dark_green, "value": None},
|
||||
{"color": dark_red, "value": 1},
|
||||
],
|
||||
}
|
||||
|
||||
@ -3554,7 +3559,7 @@ def build_jobs_dashboard():
|
||||
instant=True,
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [{"color": "red", "value": None}, {"color": "green", "value": 1}],
|
||||
"steps": [{"color": dark_red, "value": None}, {"color": dark_green, "value": 1}],
|
||||
},
|
||||
)
|
||||
)
|
||||
@ -3620,21 +3625,6 @@ def build_jobs_dashboard():
|
||||
{"type": "value", "options": {"-1": {"text": "no runs"}}}
|
||||
]
|
||||
panels.append(reliability_suite_panel)
|
||||
coverage_gap_panel = bargauge_panel(
|
||||
10,
|
||||
"Coverage Gap to 95% by Suite",
|
||||
coverage_gap,
|
||||
{"h": 8, "w": 8, "x": 16, "y": 5},
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=coverage_gap_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
coverage_gap_panel["description"] = "Gap from the 95% target. 0 means the suite is at or above target."
|
||||
panels.append(coverage_gap_panel)
|
||||
|
||||
history_panel = timeseries_panel(
|
||||
11,
|
||||
"Run Reliability History by Suite",
|
||||
@ -3676,15 +3666,10 @@ def build_jobs_dashboard():
|
||||
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[$__interval])) or on() vector(0)',
|
||||
"legendFormat": "Failure",
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval])) or on() vector(0)',
|
||||
"legendFormat": "Total",
|
||||
},
|
||||
],
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "sum"],
|
||||
legend_calcs=[],
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -3697,18 +3682,13 @@ def build_jobs_dashboard():
|
||||
targets=[
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval])',
|
||||
"legendFormat": "{{suite}} coverage %",
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": loc_limit_compliance_history,
|
||||
"legendFormat": "{{suite}} files <=500 LOC %",
|
||||
"expr": coverage_loc_compliance_history,
|
||||
"legendFormat": "{{suite}}",
|
||||
},
|
||||
],
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "max"],
|
||||
legend_calcs=[],
|
||||
)
|
||||
)
|
||||
run_mix_panel = pie_panel(
|
||||
@ -3742,12 +3722,14 @@ def build_jobs_dashboard():
|
||||
legend="{{suite}}",
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "max"],
|
||||
legend_calcs=[],
|
||||
)
|
||||
panel["description"] = (
|
||||
"One line per selected suite. 1 means this check dimension was in that state during the bucket; "
|
||||
"0 means the suite reported the dimension and it was not in that state."
|
||||
"One line per selected suite, counting check events in this state during each bucket. "
|
||||
"Intervals without runs stay at zero rather than being treated as failures."
|
||||
)
|
||||
panel["fieldConfig"]["defaults"]["min"] = 0
|
||||
panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True
|
||||
panels.append(panel)
|
||||
for index, (label, regex) in enumerate(check_dimensions[4:]):
|
||||
panel = timeseries_panel(
|
||||
@ -3759,12 +3741,14 @@ def build_jobs_dashboard():
|
||||
legend="{{suite}}",
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "max"],
|
||||
legend_calcs=[],
|
||||
)
|
||||
panel["description"] = (
|
||||
"One line per selected suite. 1 means this check dimension was in that state during the bucket; "
|
||||
"0 means the suite reported the dimension and it was not in that state."
|
||||
"One line per selected suite, counting check events in this state during each bucket. "
|
||||
"Intervals without runs stay at zero rather than being treated as failures."
|
||||
)
|
||||
panel["fieldConfig"]["defaults"]["min"] = 0
|
||||
panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True
|
||||
panels.append(panel)
|
||||
|
||||
_append_check_trends(130, "Failure Trend", True, 29)
|
||||
@ -3776,10 +3760,10 @@ def build_jobs_dashboard():
|
||||
problematic_tests_history,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 57},
|
||||
unit="none",
|
||||
legend="{{suite}} · {{test}}",
|
||||
legend="{{suite}} - {{test}}",
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "max", "sum"],
|
||||
legend_placement="right",
|
||||
legend_calcs=[],
|
||||
links=jenkins_suite_links(),
|
||||
data_links=jenkins_latest_artifact_data_links(),
|
||||
)
|
||||
@ -3810,7 +3794,7 @@ def build_jobs_dashboard():
|
||||
targets=selected_test_pass_fail,
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "sum"],
|
||||
legend_calcs=[],
|
||||
links=jenkins_suite_links(),
|
||||
data_links=jenkins_artifact_data_links(),
|
||||
)
|
||||
@ -3821,10 +3805,10 @@ def build_jobs_dashboard():
|
||||
selected_test_pass_rate,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 65},
|
||||
unit="percent",
|
||||
legend="{{suite}} · {{test}}",
|
||||
legend="{{suite}}",
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
legend_calcs=["lastNotNull", "min"],
|
||||
legend_calcs=[],
|
||||
links=jenkins_suite_links(),
|
||||
data_links=jenkins_artifact_data_links(),
|
||||
)
|
||||
@ -3871,56 +3855,56 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
27,
|
||||
"Missing Tests Metrics by Suite",
|
||||
missing_tests_by_suite,
|
||||
"Tests Metrics Present by Suite",
|
||||
present_tests_by_suite,
|
||||
{"h": 7, "w": 6, "x": 0, "y": 81},
|
||||
unit="none",
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=missing_thresholds,
|
||||
thresholds=success_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
28,
|
||||
"Missing Checks Metrics by Suite",
|
||||
missing_checks_by_suite,
|
||||
"Checks Metrics Present by Suite",
|
||||
present_checks_by_suite,
|
||||
{"h": 7, "w": 6, "x": 6, "y": 81},
|
||||
unit="none",
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=missing_thresholds,
|
||||
thresholds=success_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
29,
|
||||
"Missing Coverage Metrics by Suite",
|
||||
missing_coverage_by_suite,
|
||||
"Coverage Metrics Present by Suite",
|
||||
present_coverage_by_suite,
|
||||
{"h": 7, "w": 6, "x": 12, "y": 81},
|
||||
unit="none",
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=missing_thresholds,
|
||||
thresholds=success_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
30,
|
||||
"Missing LOC Compliance Metrics by Suite",
|
||||
missing_loc_by_suite,
|
||||
"LOC Compliance Metrics Present by Suite",
|
||||
present_loc_by_suite,
|
||||
{"h": 7, "w": 6, "x": 18, "y": 81},
|
||||
unit="none",
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=missing_thresholds,
|
||||
thresholds=success_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
)
|
||||
@ -3935,8 +3919,8 @@ def build_jobs_dashboard():
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "green", "value": 1},
|
||||
{"color": dark_red, "value": None},
|
||||
{"color": dark_green, "value": 1},
|
||||
],
|
||||
},
|
||||
)
|
||||
@ -3988,28 +3972,28 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
148,
|
||||
"Missing Test-Case Metrics by Suite",
|
||||
missing_test_case_by_suite,
|
||||
"Test-Case Metrics Present by Suite",
|
||||
present_test_case_by_suite,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 94},
|
||||
unit="none",
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=missing_thresholds,
|
||||
thresholds=success_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
151,
|
||||
"No Real Test Cases by Suite",
|
||||
placeholder_test_case_by_suite,
|
||||
"Real Test Cases Present by Suite",
|
||||
real_test_case_by_suite,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 94},
|
||||
unit="none",
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="desc",
|
||||
thresholds=missing_thresholds,
|
||||
thresholds=success_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
)
|
||||
@ -4055,9 +4039,8 @@ def build_jobs_dashboard():
|
||||
5: {"h": 4, "w": 4, "x": 12, "y": 0},
|
||||
6: {"h": 4, "w": 4, "x": 16, "y": 0},
|
||||
7: {"h": 4, "w": 4, "x": 20, "y": 0},
|
||||
8: {"h": 7, "w": 8, "x": 0, "y": 4},
|
||||
9: {"h": 7, "w": 8, "x": 8, "y": 4},
|
||||
10: {"h": 7, "w": 8, "x": 16, "y": 4},
|
||||
8: {"h": 7, "w": 12, "x": 0, "y": 4},
|
||||
9: {"h": 7, "w": 12, "x": 12, "y": 4},
|
||||
17: {"h": 7, "w": 12, "x": 0, "y": 11},
|
||||
18: {"h": 7, "w": 12, "x": 12, "y": 11},
|
||||
}
|
||||
@ -4070,6 +4053,24 @@ def build_jobs_dashboard():
|
||||
def children(ids):
|
||||
return [panel_by_id[panel_id] for panel_id in ids]
|
||||
|
||||
row_layout = {
|
||||
11: {"h": 8, "w": 12, "x": 0, "y": 19},
|
||||
12: {"h": 8, "w": 12, "x": 12, "y": 19},
|
||||
13: {"h": 8, "w": 12, "x": 0, "y": 27},
|
||||
14: {"h": 8, "w": 12, "x": 12, "y": 27},
|
||||
145: {"h": 10, "w": 24, "x": 0, "y": 63},
|
||||
147: {"h": 8, "w": 24, "x": 0, "y": 74},
|
||||
146: {"h": 8, "w": 12, "x": 0, "y": 83},
|
||||
152: {"h": 8, "w": 12, "x": 12, "y": 83},
|
||||
31: {"h": 6, "w": 4, "x": 0, "y": 111},
|
||||
32: {"h": 6, "w": 4, "x": 4, "y": 111},
|
||||
33: {"h": 6, "w": 4, "x": 8, "y": 111},
|
||||
34: {"h": 6, "w": 6, "x": 12, "y": 111},
|
||||
35: {"h": 6, "w": 6, "x": 18, "y": 111},
|
||||
}
|
||||
for panel_id, grid in row_layout.items():
|
||||
panel_by_id[panel_id]["gridPos"] = grid
|
||||
|
||||
compact_panels.extend(
|
||||
[
|
||||
row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])),
|
||||
@ -4093,9 +4094,15 @@ def build_jobs_dashboard():
|
||||
),
|
||||
row_panel(
|
||||
504,
|
||||
"Telemetry Completeness, SonarQube, And Branches",
|
||||
"Telemetry Completeness And Branches",
|
||||
22,
|
||||
panels=children([27, 28, 29, 30, 31, 32, 33, 34, 35, 148, 151, 149, 150]),
|
||||
panels=children([27, 28, 29, 30, 148, 151, 149, 150]),
|
||||
),
|
||||
row_panel(
|
||||
505,
|
||||
"SonarQube Project Health",
|
||||
23,
|
||||
panels=children([31, 32, 33, 34, 35]),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
@ -133,8 +133,8 @@ def test_jobs_dashboard_bar_gauges_use_solid_threshold_colors():
|
||||
)
|
||||
threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"]
|
||||
|
||||
assert {"color": "yellow", "value": 93} in threshold_steps
|
||||
assert {"color": "blue", "value": 100} in threshold_steps
|
||||
assert {"color": "dark-yellow", "value": 93} in threshold_steps
|
||||
assert {"color": "dark-blue", "value": 100} in threshold_steps
|
||||
|
||||
|
||||
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
||||
@ -150,19 +150,24 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
||||
}
|
||||
|
||||
assert len(panels) == 16
|
||||
assert len(visible_query_panels) == 11
|
||||
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
|
||||
assert len(visible_query_panels) == 10
|
||||
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 10
|
||||
assert all(
|
||||
panel["title"] != "Coverage Gap to 95% by Suite"
|
||||
for panel in visible_query_panels
|
||||
)
|
||||
assert [row["title"] for row in rows] == [
|
||||
"Reliability And Run History",
|
||||
"Failure Trends By Check",
|
||||
"Success Trends By Check",
|
||||
"Test Drilldowns And Problem Tests",
|
||||
"Telemetry Completeness, SonarQube, And Branches",
|
||||
"Telemetry Completeness And Branches",
|
||||
"SonarQube Project Health",
|
||||
]
|
||||
assert all(row["collapsed"] for row in rows)
|
||||
|
||||
assert "Failure Trend: Coverage" in nested_panels_by_title
|
||||
assert "Success Trend: Supply Chain" in nested_panels_by_title
|
||||
assert "Selected Test Pass Rate History" in nested_panels_by_title
|
||||
assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
|
||||
assert "Coverage Metrics Present by Suite" in nested_panels_by_title
|
||||
assert "SonarQube API Up" in nested_panels_by_title
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -139,6 +139,34 @@ data:
|
||||
labels:
|
||||
scope: atlas
|
||||
rollup: yearly
|
||||
platform-quality.yaml: |
|
||||
groups:
|
||||
- name: platform.quality
|
||||
interval: 15m
|
||||
rules:
|
||||
- record: platform_quality:test_case_status:count_1h
|
||||
expr: |
|
||||
sum by (suite, branch, test, status) (
|
||||
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__"}[1h])
|
||||
)
|
||||
labels:
|
||||
rollup: hourly
|
||||
- record: platform_quality:test_case_pass_rate:percent_1h
|
||||
expr: |
|
||||
100 * (
|
||||
sum by (suite, branch, test) (
|
||||
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status="passed"}[1h])
|
||||
)
|
||||
)
|
||||
/
|
||||
clamp_min(
|
||||
sum by (suite, branch, test) (
|
||||
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status=~"passed|failed|error|skipped"}[1h])
|
||||
),
|
||||
1
|
||||
)
|
||||
labels:
|
||||
rollup: hourly
|
||||
|
||||
---
|
||||
|
||||
@ -168,7 +196,7 @@ spec:
|
||||
labels:
|
||||
app: vmalert-atlas-availability
|
||||
annotations:
|
||||
bstein.dev/rules-revision: "2026-05-10-availability-rollup-v6"
|
||||
bstein.dev/rules-revision: "2026-05-15-platform-quality-rollups-v1"
|
||||
spec:
|
||||
serviceAccountName: vmalert-atlas-availability
|
||||
affinity:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user