monitoring: tune testing dashboard and gate rollups
This commit is contained in:
parent
0c11a64d25
commit
6adbe457c4
@ -31,6 +31,7 @@
|
|||||||
"id": "KSV-0014",
|
"id": "KSV-0014",
|
||||||
"targets": [
|
"targets": [
|
||||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
||||||
|
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
|
||||||
"infrastructure/core/ntp-sync-daemonset.yaml",
|
"infrastructure/core/ntp-sync-daemonset.yaml",
|
||||||
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
||||||
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
||||||
@ -98,6 +99,7 @@
|
|||||||
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
||||||
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
||||||
|
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
||||||
@ -152,6 +154,7 @@
|
|||||||
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
||||||
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
||||||
"services/monitoring/postmark-exporter-deployment.yaml",
|
"services/monitoring/postmark-exporter-deployment.yaml",
|
||||||
|
"services/monitoring/vmalert-atlas-availability.yaml",
|
||||||
"services/monitoring/vault-sync-deployment.yaml",
|
"services/monitoring/vault-sync-deployment.yaml",
|
||||||
"services/nextcloud-mail-sync/cronjob.yaml",
|
"services/nextcloud-mail-sync/cronjob.yaml",
|
||||||
"services/nextcloud/collabora.yaml",
|
"services/nextcloud/collabora.yaml",
|
||||||
@ -247,6 +250,7 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
||||||
"infrastructure/core/coredns-deployment.yaml",
|
"infrastructure/core/coredns-deployment.yaml",
|
||||||
|
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
|
||||||
"infrastructure/core/ntp-sync-daemonset.yaml",
|
"infrastructure/core/ntp-sync-daemonset.yaml",
|
||||||
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
||||||
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
||||||
@ -310,6 +314,7 @@
|
|||||||
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
||||||
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
||||||
|
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
||||||
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
||||||
@ -364,6 +369,7 @@
|
|||||||
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
||||||
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
||||||
"services/monitoring/postmark-exporter-deployment.yaml",
|
"services/monitoring/postmark-exporter-deployment.yaml",
|
||||||
|
"services/monitoring/vmalert-atlas-availability.yaml",
|
||||||
"services/monitoring/vault-sync-deployment.yaml",
|
"services/monitoring/vault-sync-deployment.yaml",
|
||||||
"services/nextcloud/collabora.yaml",
|
"services/nextcloud/collabora.yaml",
|
||||||
"services/oauth2-proxy/deployment.yaml",
|
"services/oauth2-proxy/deployment.yaml",
|
||||||
|
|||||||
@ -3346,9 +3346,10 @@ def build_jobs_dashboard():
|
|||||||
f'/ clamp_min(sum by (suite) (max by (suite, check) (({{{checks_selector}}} > bool 0))), 1)) '
|
f'/ clamp_min(sum by (suite) (max by (suite, check) (({{{checks_selector}}} > bool 0))), 1)) '
|
||||||
f'or on(suite) ({selected_suite_zero})'
|
f'or on(suite) ({selected_suite_zero})'
|
||||||
)
|
)
|
||||||
|
success_history_runs = f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))'
|
||||||
success_history_by_suite = (
|
success_history_by_suite = (
|
||||||
f'100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) '
|
f'(100 * sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) '
|
||||||
f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))), 1))'
|
f'/ ({success_history_runs})) and on(suite) (({success_history_runs}) > 0)'
|
||||||
)
|
)
|
||||||
coverage_by_suite = (
|
coverage_by_suite = (
|
||||||
f'(max by (suite) ({{{coverage_metric_selector}}})) '
|
f'(max by (suite) ({{{coverage_metric_selector}}})) '
|
||||||
@ -3357,7 +3358,6 @@ def build_jobs_dashboard():
|
|||||||
coverage_with_missing = (
|
coverage_with_missing = (
|
||||||
f"({coverage_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
|
f"({coverage_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
|
||||||
)
|
)
|
||||||
coverage_gap = f"clamp_min(95 - ({coverage_by_suite}), 0)"
|
|
||||||
smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})'
|
smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})'
|
||||||
loc_files_by_suite = f'max by (suite) (platform_quality_gate_source_files_total{{{smell_selector}}})'
|
loc_files_by_suite = f'max by (suite) (platform_quality_gate_source_files_total{{{smell_selector}}})'
|
||||||
smell_with_missing = (
|
smell_with_missing = (
|
||||||
@ -3384,6 +3384,14 @@ def build_jobs_dashboard():
|
|||||||
f"or on(suite) (100 * (1 - clamp_max(({loc_violations_history}), 1))) "
|
f"or on(suite) (100 * (1 - clamp_max(({loc_violations_history}), 1))) "
|
||||||
f"or on(suite) ({selected_suite_zero})"
|
f"or on(suite) ({selected_suite_zero})"
|
||||||
)
|
)
|
||||||
|
coverage_loc_compliance_history = (
|
||||||
|
"min by (suite) ("
|
||||||
|
f'label_replace(max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval]), '
|
||||||
|
'"dimension", "coverage", "__name__", ".*") '
|
||||||
|
"or "
|
||||||
|
f'label_replace(({loc_limit_compliance_history}), "dimension", "loc", "__name__", ".*")'
|
||||||
|
")"
|
||||||
|
)
|
||||||
average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))"
|
average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))"
|
||||||
suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))'
|
suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))'
|
||||||
|
|
||||||
@ -3397,39 +3405,36 @@ def build_jobs_dashboard():
|
|||||||
|
|
||||||
def _check_state_series(regex: str, failed: bool) -> str:
|
def _check_state_series(regex: str, failed: bool) -> str:
|
||||||
state = f'result!~"{non_failure}"' if failed else f'result=~"{success}"'
|
state = f'result!~"{non_failure}"' if failed else f'result=~"{success}"'
|
||||||
core = (
|
core = f'sum by (suite) (increase({{{checks_selector},check=~"{regex}",{state}}}[$__interval]))'
|
||||||
f'sum by (suite) (max_over_time(({{{checks_selector},check=~"{regex}",{state}}})[$__interval]))'
|
|
||||||
)
|
|
||||||
return f'({core}) or on(suite) ({selected_suite_zero})'
|
return f'({core}) or on(suite) ({selected_suite_zero})'
|
||||||
|
|
||||||
problematic_tests_history_core = (
|
problematic_tests_history_core = (
|
||||||
f'topk(12, sum by (suite, test, jenkins_job) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[$__interval])))'
|
f'topk(12, sum by (suite, test) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed"}}))'
|
||||||
)
|
)
|
||||||
problematic_tests_history = f"({problematic_tests_history_core}) or on() vector(0)"
|
problematic_tests_history = f"({problematic_tests_history_core}) or on() vector(0)"
|
||||||
worst_test_per_suite_core = (
|
worst_test_per_suite_core = (
|
||||||
f'topk by (suite) (1, sum by (suite, test, jenkins_job) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))'
|
f'topk by (suite) (1, sum by (suite, test) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch=~"{branch_var}",test!="__no_test_cases__",status="failed",{exported}}}[30d])))'
|
||||||
)
|
)
|
||||||
worst_test_per_suite = f"({worst_test_per_suite_core}) or on() vector(0)"
|
worst_test_per_suite = f"({worst_test_per_suite_core}) or on() vector(0)"
|
||||||
selected_test_pass_fail = [
|
selected_test_pass_fail = [
|
||||||
{
|
{
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="passed"}}[$__interval])) or on() vector(0)',
|
"expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="passed"}}) or on() vector(0)',
|
||||||
"legendFormat": "passed · {{suite}} · #{{build_number}}",
|
"legendFormat": "{{suite}} passed",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"refId": "B",
|
"refId": "B",
|
||||||
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="failed"}}[$__interval])) or on() vector(0)',
|
"expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="failed"}}) or on() vector(0)',
|
||||||
"legendFormat": "failed · {{suite}} · #{{build_number}}",
|
"legendFormat": "{{suite}} failed",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"refId": "C",
|
"refId": "C",
|
||||||
"expr": f'sum by (suite, test, status, jenkins_job, build_number) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="skipped"}}[$__interval])) or on() vector(0)',
|
"expr": f'sum by (suite) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__",status="skipped"}}) or on() vector(0)',
|
||||||
"legendFormat": "skipped · {{suite}} · #{{build_number}}",
|
"legendFormat": "{{suite}} skipped",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
selected_test_pass_rate = (
|
selected_test_pass_rate = (
|
||||||
f'100 * (sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status="passed"}}[$__interval]))) '
|
f'avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{{suite=~"{suite_var}",branch=~"{branch_var}",test=~"{test_var}",test!="__no_test_cases__"}})'
|
||||||
f'/ clamp_min((sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{{test_case_selector},status=~"passed|failed|error|skipped"}}[$__interval]))), 1)'
|
|
||||||
)
|
)
|
||||||
recent_branch_evidence = (
|
recent_branch_evidence = (
|
||||||
f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))'
|
f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))'
|
||||||
@ -3442,70 +3447,70 @@ def build_jobs_dashboard():
|
|||||||
missing = f"(({suite_universe}) unless on(suite) {presence_expr})"
|
missing = f"(({suite_universe}) unless on(suite) {presence_expr})"
|
||||||
return f"({missing}) or on(suite) (0 * ({suite_universe}))"
|
return f"({missing}) or on(suite) (0 * ({suite_universe}))"
|
||||||
|
|
||||||
missing_tests_by_suite = _missing_suite_series(
|
def _present_suite_percent(presence_expr: str) -> str:
|
||||||
|
present = f"(({suite_universe}) and on(suite) {presence_expr})"
|
||||||
|
return f"(100 * ({present})) or on(suite) (0 * ({suite_universe}))"
|
||||||
|
|
||||||
|
present_tests_by_suite = _present_suite_percent(
|
||||||
f'count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}})'
|
f'count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}})'
|
||||||
)
|
)
|
||||||
missing_checks_by_suite = _missing_suite_series(
|
present_checks_by_suite = _present_suite_percent(
|
||||||
f'count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}})'
|
f'count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}})'
|
||||||
)
|
)
|
||||||
missing_coverage_by_suite = _missing_suite_series(
|
present_coverage_by_suite = _present_suite_percent(
|
||||||
f"count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}})"
|
f"count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}})"
|
||||||
)
|
)
|
||||||
missing_loc_by_suite = _missing_suite_series(
|
present_loc_by_suite = _present_suite_percent(
|
||||||
f"count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}) "
|
f"count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}) "
|
||||||
f"and on(suite) count by (suite) (platform_quality_gate_source_files_total{{{exported}}})"
|
f"and on(suite) count by (suite) (platform_quality_gate_source_files_total{{{exported}}})"
|
||||||
)
|
)
|
||||||
missing_test_case_by_suite = _missing_suite_series(
|
present_test_case_by_suite = _present_suite_percent(
|
||||||
f"count by (suite) (platform_quality_gate_test_case_result{{{exported}}})"
|
f"count by (suite) (platform_quality_gate_test_case_result{{{exported}}})"
|
||||||
)
|
)
|
||||||
placeholder_test_case_by_suite = _missing_suite_series(
|
real_test_case_by_suite = _present_suite_percent(
|
||||||
f'count by (suite) (platform_quality_gate_test_case_result{{{exported},test!="__no_test_cases__"}})'
|
f'count by (suite) (platform_quality_gate_test_case_result{{{exported},test!="__no_test_cases__"}})'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
dark_red = "dark-red"
|
||||||
|
dark_orange = "dark-orange"
|
||||||
|
dark_yellow = "dark-yellow"
|
||||||
|
dark_green = "dark-green"
|
||||||
|
dark_blue = "dark-blue"
|
||||||
success_thresholds = {
|
success_thresholds = {
|
||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{"color": "red", "value": None},
|
{"color": dark_red, "value": None},
|
||||||
{"color": "orange", "value": 90},
|
{"color": dark_orange, "value": 90},
|
||||||
{"color": "yellow", "value": 93},
|
{"color": dark_yellow, "value": 93},
|
||||||
{"color": "green", "value": 95},
|
{"color": dark_green, "value": 95},
|
||||||
{"color": "blue", "value": 100},
|
{"color": dark_blue, "value": 100},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
coverage_thresholds = success_thresholds
|
coverage_thresholds = success_thresholds
|
||||||
failures_thresholds = {
|
failures_thresholds = {
|
||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{"color": "green", "value": None},
|
{"color": dark_green, "value": None},
|
||||||
{"color": "yellow", "value": 1},
|
{"color": dark_yellow, "value": 1},
|
||||||
{"color": "orange", "value": 3},
|
{"color": dark_orange, "value": 3},
|
||||||
{"color": "red", "value": 5},
|
{"color": dark_red, "value": 5},
|
||||||
],
|
|
||||||
}
|
|
||||||
coverage_gap_thresholds = {
|
|
||||||
"mode": "absolute",
|
|
||||||
"steps": [
|
|
||||||
{"color": "green", "value": None},
|
|
||||||
{"color": "yellow", "value": 1},
|
|
||||||
{"color": "orange", "value": 5},
|
|
||||||
{"color": "red", "value": 10},
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
smell_thresholds = {
|
smell_thresholds = {
|
||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{"color": "red", "value": None},
|
{"color": dark_red, "value": None},
|
||||||
{"color": "green", "value": 0},
|
{"color": dark_green, "value": 0},
|
||||||
{"color": "yellow", "value": 1},
|
{"color": dark_yellow, "value": 1},
|
||||||
{"color": "orange", "value": 3},
|
{"color": dark_orange, "value": 3},
|
||||||
{"color": "red", "value": 5},
|
{"color": dark_red, "value": 5},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
missing_thresholds = {
|
missing_thresholds = {
|
||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{"color": "green", "value": None},
|
{"color": dark_green, "value": None},
|
||||||
{"color": "red", "value": 1},
|
{"color": dark_red, "value": 1},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3554,7 +3559,7 @@ def build_jobs_dashboard():
|
|||||||
instant=True,
|
instant=True,
|
||||||
thresholds={
|
thresholds={
|
||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [{"color": "red", "value": None}, {"color": "green", "value": 1}],
|
"steps": [{"color": dark_red, "value": None}, {"color": dark_green, "value": 1}],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -3620,21 +3625,6 @@ def build_jobs_dashboard():
|
|||||||
{"type": "value", "options": {"-1": {"text": "no runs"}}}
|
{"type": "value", "options": {"-1": {"text": "no runs"}}}
|
||||||
]
|
]
|
||||||
panels.append(reliability_suite_panel)
|
panels.append(reliability_suite_panel)
|
||||||
coverage_gap_panel = bargauge_panel(
|
|
||||||
10,
|
|
||||||
"Coverage Gap to 95% by Suite",
|
|
||||||
coverage_gap,
|
|
||||||
{"h": 8, "w": 8, "x": 16, "y": 5},
|
|
||||||
unit="percent",
|
|
||||||
instant=True,
|
|
||||||
legend="{{suite}}",
|
|
||||||
sort_order="desc",
|
|
||||||
thresholds=coverage_gap_thresholds,
|
|
||||||
decimals=2,
|
|
||||||
)
|
|
||||||
coverage_gap_panel["description"] = "Gap from the 95% target. 0 means the suite is at or above target."
|
|
||||||
panels.append(coverage_gap_panel)
|
|
||||||
|
|
||||||
history_panel = timeseries_panel(
|
history_panel = timeseries_panel(
|
||||||
11,
|
11,
|
||||||
"Run Reliability History by Suite",
|
"Run Reliability History by Suite",
|
||||||
@ -3676,15 +3666,10 @@ def build_jobs_dashboard():
|
|||||||
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[$__interval])) or on() vector(0)',
|
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[$__interval])) or on() vector(0)',
|
||||||
"legendFormat": "Failure",
|
"legendFormat": "Failure",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"refId": "C",
|
|
||||||
"expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval])) or on() vector(0)',
|
|
||||||
"legendFormat": "Total",
|
|
||||||
},
|
|
||||||
],
|
],
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="bottom",
|
||||||
legend_calcs=["lastNotNull", "sum"],
|
legend_calcs=[],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
@ -3697,18 +3682,13 @@ def build_jobs_dashboard():
|
|||||||
targets=[
|
targets=[
|
||||||
{
|
{
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval])',
|
"expr": coverage_loc_compliance_history,
|
||||||
"legendFormat": "{{suite}} coverage %",
|
"legendFormat": "{{suite}}",
|
||||||
},
|
|
||||||
{
|
|
||||||
"refId": "B",
|
|
||||||
"expr": loc_limit_compliance_history,
|
|
||||||
"legendFormat": "{{suite}} files <=500 LOC %",
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="bottom",
|
||||||
legend_calcs=["lastNotNull", "max"],
|
legend_calcs=[],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
run_mix_panel = pie_panel(
|
run_mix_panel = pie_panel(
|
||||||
@ -3742,12 +3722,14 @@ def build_jobs_dashboard():
|
|||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="bottom",
|
||||||
legend_calcs=["lastNotNull", "max"],
|
legend_calcs=[],
|
||||||
)
|
)
|
||||||
panel["description"] = (
|
panel["description"] = (
|
||||||
"One line per selected suite. 1 means this check dimension was in that state during the bucket; "
|
"One line per selected suite, counting check events in this state during each bucket. "
|
||||||
"0 means the suite reported the dimension and it was not in that state."
|
"Intervals without runs stay at zero rather than being treated as failures."
|
||||||
)
|
)
|
||||||
|
panel["fieldConfig"]["defaults"]["min"] = 0
|
||||||
|
panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True
|
||||||
panels.append(panel)
|
panels.append(panel)
|
||||||
for index, (label, regex) in enumerate(check_dimensions[4:]):
|
for index, (label, regex) in enumerate(check_dimensions[4:]):
|
||||||
panel = timeseries_panel(
|
panel = timeseries_panel(
|
||||||
@ -3759,12 +3741,14 @@ def build_jobs_dashboard():
|
|||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="bottom",
|
||||||
legend_calcs=["lastNotNull", "max"],
|
legend_calcs=[],
|
||||||
)
|
)
|
||||||
panel["description"] = (
|
panel["description"] = (
|
||||||
"One line per selected suite. 1 means this check dimension was in that state during the bucket; "
|
"One line per selected suite, counting check events in this state during each bucket. "
|
||||||
"0 means the suite reported the dimension and it was not in that state."
|
"Intervals without runs stay at zero rather than being treated as failures."
|
||||||
)
|
)
|
||||||
|
panel["fieldConfig"]["defaults"]["min"] = 0
|
||||||
|
panel["fieldConfig"]["defaults"].setdefault("custom", {})["spanNulls"] = True
|
||||||
panels.append(panel)
|
panels.append(panel)
|
||||||
|
|
||||||
_append_check_trends(130, "Failure Trend", True, 29)
|
_append_check_trends(130, "Failure Trend", True, 29)
|
||||||
@ -3776,10 +3760,10 @@ def build_jobs_dashboard():
|
|||||||
problematic_tests_history,
|
problematic_tests_history,
|
||||||
{"h": 8, "w": 12, "x": 0, "y": 57},
|
{"h": 8, "w": 12, "x": 0, "y": 57},
|
||||||
unit="none",
|
unit="none",
|
||||||
legend="{{suite}} · {{test}}",
|
legend="{{suite}} - {{test}}",
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="right",
|
||||||
legend_calcs=["lastNotNull", "max", "sum"],
|
legend_calcs=[],
|
||||||
links=jenkins_suite_links(),
|
links=jenkins_suite_links(),
|
||||||
data_links=jenkins_latest_artifact_data_links(),
|
data_links=jenkins_latest_artifact_data_links(),
|
||||||
)
|
)
|
||||||
@ -3810,7 +3794,7 @@ def build_jobs_dashboard():
|
|||||||
targets=selected_test_pass_fail,
|
targets=selected_test_pass_fail,
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="bottom",
|
||||||
legend_calcs=["lastNotNull", "sum"],
|
legend_calcs=[],
|
||||||
links=jenkins_suite_links(),
|
links=jenkins_suite_links(),
|
||||||
data_links=jenkins_artifact_data_links(),
|
data_links=jenkins_artifact_data_links(),
|
||||||
)
|
)
|
||||||
@ -3821,10 +3805,10 @@ def build_jobs_dashboard():
|
|||||||
selected_test_pass_rate,
|
selected_test_pass_rate,
|
||||||
{"h": 8, "w": 12, "x": 12, "y": 65},
|
{"h": 8, "w": 12, "x": 12, "y": 65},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
legend="{{suite}} · {{test}}",
|
legend="{{suite}}",
|
||||||
legend_display="list",
|
legend_display="list",
|
||||||
legend_placement="bottom",
|
legend_placement="bottom",
|
||||||
legend_calcs=["lastNotNull", "min"],
|
legend_calcs=[],
|
||||||
links=jenkins_suite_links(),
|
links=jenkins_suite_links(),
|
||||||
data_links=jenkins_artifact_data_links(),
|
data_links=jenkins_artifact_data_links(),
|
||||||
)
|
)
|
||||||
@ -3871,56 +3855,56 @@ def build_jobs_dashboard():
|
|||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
27,
|
27,
|
||||||
"Missing Tests Metrics by Suite",
|
"Tests Metrics Present by Suite",
|
||||||
missing_tests_by_suite,
|
present_tests_by_suite,
|
||||||
{"h": 7, "w": 6, "x": 0, "y": 81},
|
{"h": 7, "w": 6, "x": 0, "y": 81},
|
||||||
unit="none",
|
unit="percent",
|
||||||
instant=True,
|
instant=True,
|
||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
sort_order="desc",
|
sort_order="desc",
|
||||||
thresholds=missing_thresholds,
|
thresholds=success_thresholds,
|
||||||
decimals=0,
|
decimals=0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
28,
|
28,
|
||||||
"Missing Checks Metrics by Suite",
|
"Checks Metrics Present by Suite",
|
||||||
missing_checks_by_suite,
|
present_checks_by_suite,
|
||||||
{"h": 7, "w": 6, "x": 6, "y": 81},
|
{"h": 7, "w": 6, "x": 6, "y": 81},
|
||||||
unit="none",
|
unit="percent",
|
||||||
instant=True,
|
instant=True,
|
||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
sort_order="desc",
|
sort_order="desc",
|
||||||
thresholds=missing_thresholds,
|
thresholds=success_thresholds,
|
||||||
decimals=0,
|
decimals=0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
29,
|
29,
|
||||||
"Missing Coverage Metrics by Suite",
|
"Coverage Metrics Present by Suite",
|
||||||
missing_coverage_by_suite,
|
present_coverage_by_suite,
|
||||||
{"h": 7, "w": 6, "x": 12, "y": 81},
|
{"h": 7, "w": 6, "x": 12, "y": 81},
|
||||||
unit="none",
|
unit="percent",
|
||||||
instant=True,
|
instant=True,
|
||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
sort_order="desc",
|
sort_order="desc",
|
||||||
thresholds=missing_thresholds,
|
thresholds=success_thresholds,
|
||||||
decimals=0,
|
decimals=0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
30,
|
30,
|
||||||
"Missing LOC Compliance Metrics by Suite",
|
"LOC Compliance Metrics Present by Suite",
|
||||||
missing_loc_by_suite,
|
present_loc_by_suite,
|
||||||
{"h": 7, "w": 6, "x": 18, "y": 81},
|
{"h": 7, "w": 6, "x": 18, "y": 81},
|
||||||
unit="none",
|
unit="percent",
|
||||||
instant=True,
|
instant=True,
|
||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
sort_order="desc",
|
sort_order="desc",
|
||||||
thresholds=missing_thresholds,
|
thresholds=success_thresholds,
|
||||||
decimals=0,
|
decimals=0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -3935,8 +3919,8 @@ def build_jobs_dashboard():
|
|||||||
thresholds={
|
thresholds={
|
||||||
"mode": "absolute",
|
"mode": "absolute",
|
||||||
"steps": [
|
"steps": [
|
||||||
{"color": "red", "value": None},
|
{"color": dark_red, "value": None},
|
||||||
{"color": "green", "value": 1},
|
{"color": dark_green, "value": 1},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -3988,28 +3972,28 @@ def build_jobs_dashboard():
|
|||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
148,
|
148,
|
||||||
"Missing Test-Case Metrics by Suite",
|
"Test-Case Metrics Present by Suite",
|
||||||
missing_test_case_by_suite,
|
present_test_case_by_suite,
|
||||||
{"h": 6, "w": 12, "x": 0, "y": 94},
|
{"h": 6, "w": 12, "x": 0, "y": 94},
|
||||||
unit="none",
|
unit="percent",
|
||||||
instant=True,
|
instant=True,
|
||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
sort_order="desc",
|
sort_order="desc",
|
||||||
thresholds=missing_thresholds,
|
thresholds=success_thresholds,
|
||||||
decimals=0,
|
decimals=0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
151,
|
151,
|
||||||
"No Real Test Cases by Suite",
|
"Real Test Cases Present by Suite",
|
||||||
placeholder_test_case_by_suite,
|
real_test_case_by_suite,
|
||||||
{"h": 6, "w": 12, "x": 12, "y": 94},
|
{"h": 6, "w": 12, "x": 12, "y": 94},
|
||||||
unit="none",
|
unit="percent",
|
||||||
instant=True,
|
instant=True,
|
||||||
legend="{{suite}}",
|
legend="{{suite}}",
|
||||||
sort_order="desc",
|
sort_order="desc",
|
||||||
thresholds=missing_thresholds,
|
thresholds=success_thresholds,
|
||||||
decimals=0,
|
decimals=0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -4055,9 +4039,8 @@ def build_jobs_dashboard():
|
|||||||
5: {"h": 4, "w": 4, "x": 12, "y": 0},
|
5: {"h": 4, "w": 4, "x": 12, "y": 0},
|
||||||
6: {"h": 4, "w": 4, "x": 16, "y": 0},
|
6: {"h": 4, "w": 4, "x": 16, "y": 0},
|
||||||
7: {"h": 4, "w": 4, "x": 20, "y": 0},
|
7: {"h": 4, "w": 4, "x": 20, "y": 0},
|
||||||
8: {"h": 7, "w": 8, "x": 0, "y": 4},
|
8: {"h": 7, "w": 12, "x": 0, "y": 4},
|
||||||
9: {"h": 7, "w": 8, "x": 8, "y": 4},
|
9: {"h": 7, "w": 12, "x": 12, "y": 4},
|
||||||
10: {"h": 7, "w": 8, "x": 16, "y": 4},
|
|
||||||
17: {"h": 7, "w": 12, "x": 0, "y": 11},
|
17: {"h": 7, "w": 12, "x": 0, "y": 11},
|
||||||
18: {"h": 7, "w": 12, "x": 12, "y": 11},
|
18: {"h": 7, "w": 12, "x": 12, "y": 11},
|
||||||
}
|
}
|
||||||
@ -4070,6 +4053,24 @@ def build_jobs_dashboard():
|
|||||||
def children(ids):
|
def children(ids):
|
||||||
return [panel_by_id[panel_id] for panel_id in ids]
|
return [panel_by_id[panel_id] for panel_id in ids]
|
||||||
|
|
||||||
|
row_layout = {
|
||||||
|
11: {"h": 8, "w": 12, "x": 0, "y": 19},
|
||||||
|
12: {"h": 8, "w": 12, "x": 12, "y": 19},
|
||||||
|
13: {"h": 8, "w": 12, "x": 0, "y": 27},
|
||||||
|
14: {"h": 8, "w": 12, "x": 12, "y": 27},
|
||||||
|
145: {"h": 10, "w": 24, "x": 0, "y": 63},
|
||||||
|
147: {"h": 8, "w": 24, "x": 0, "y": 74},
|
||||||
|
146: {"h": 8, "w": 12, "x": 0, "y": 83},
|
||||||
|
152: {"h": 8, "w": 12, "x": 12, "y": 83},
|
||||||
|
31: {"h": 6, "w": 4, "x": 0, "y": 111},
|
||||||
|
32: {"h": 6, "w": 4, "x": 4, "y": 111},
|
||||||
|
33: {"h": 6, "w": 4, "x": 8, "y": 111},
|
||||||
|
34: {"h": 6, "w": 6, "x": 12, "y": 111},
|
||||||
|
35: {"h": 6, "w": 6, "x": 18, "y": 111},
|
||||||
|
}
|
||||||
|
for panel_id, grid in row_layout.items():
|
||||||
|
panel_by_id[panel_id]["gridPos"] = grid
|
||||||
|
|
||||||
compact_panels.extend(
|
compact_panels.extend(
|
||||||
[
|
[
|
||||||
row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])),
|
row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])),
|
||||||
@ -4093,9 +4094,15 @@ def build_jobs_dashboard():
|
|||||||
),
|
),
|
||||||
row_panel(
|
row_panel(
|
||||||
504,
|
504,
|
||||||
"Telemetry Completeness, SonarQube, And Branches",
|
"Telemetry Completeness And Branches",
|
||||||
22,
|
22,
|
||||||
panels=children([27, 28, 29, 30, 31, 32, 33, 34, 35, 148, 151, 149, 150]),
|
panels=children([27, 28, 29, 30, 148, 151, 149, 150]),
|
||||||
|
),
|
||||||
|
row_panel(
|
||||||
|
505,
|
||||||
|
"SonarQube Project Health",
|
||||||
|
23,
|
||||||
|
panels=children([31, 32, 33, 34, 35]),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@ -133,8 +133,8 @@ def test_jobs_dashboard_bar_gauges_use_solid_threshold_colors():
|
|||||||
)
|
)
|
||||||
threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"]
|
threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"]
|
||||||
|
|
||||||
assert {"color": "yellow", "value": 93} in threshold_steps
|
assert {"color": "dark-yellow", "value": 93} in threshold_steps
|
||||||
assert {"color": "blue", "value": 100} in threshold_steps
|
assert {"color": "dark-blue", "value": 100} in threshold_steps
|
||||||
|
|
||||||
|
|
||||||
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
||||||
@ -150,19 +150,24 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert len(panels) == 16
|
assert len(panels) == 16
|
||||||
assert len(visible_query_panels) == 11
|
assert len(visible_query_panels) == 10
|
||||||
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
|
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 10
|
||||||
|
assert all(
|
||||||
|
panel["title"] != "Coverage Gap to 95% by Suite"
|
||||||
|
for panel in visible_query_panels
|
||||||
|
)
|
||||||
assert [row["title"] for row in rows] == [
|
assert [row["title"] for row in rows] == [
|
||||||
"Reliability And Run History",
|
"Reliability And Run History",
|
||||||
"Failure Trends By Check",
|
"Failure Trends By Check",
|
||||||
"Success Trends By Check",
|
"Success Trends By Check",
|
||||||
"Test Drilldowns And Problem Tests",
|
"Test Drilldowns And Problem Tests",
|
||||||
"Telemetry Completeness, SonarQube, And Branches",
|
"Telemetry Completeness And Branches",
|
||||||
|
"SonarQube Project Health",
|
||||||
]
|
]
|
||||||
assert all(row["collapsed"] for row in rows)
|
assert all(row["collapsed"] for row in rows)
|
||||||
|
|
||||||
assert "Failure Trend: Coverage" in nested_panels_by_title
|
assert "Failure Trend: Coverage" in nested_panels_by_title
|
||||||
assert "Success Trend: Supply Chain" in nested_panels_by_title
|
assert "Success Trend: Supply Chain" in nested_panels_by_title
|
||||||
assert "Selected Test Pass Rate History" in nested_panels_by_title
|
assert "Selected Test Pass Rate History" in nested_panels_by_title
|
||||||
assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
|
assert "Coverage Metrics Present by Suite" in nested_panels_by_title
|
||||||
assert "SonarQube API Up" in nested_panels_by_title
|
assert "SonarQube API Up" in nested_panels_by_title
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -139,6 +139,34 @@ data:
|
|||||||
labels:
|
labels:
|
||||||
scope: atlas
|
scope: atlas
|
||||||
rollup: yearly
|
rollup: yearly
|
||||||
|
platform-quality.yaml: |
|
||||||
|
groups:
|
||||||
|
- name: platform.quality
|
||||||
|
interval: 15m
|
||||||
|
rules:
|
||||||
|
- record: platform_quality:test_case_status:count_1h
|
||||||
|
expr: |
|
||||||
|
sum by (suite, branch, test, status) (
|
||||||
|
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__"}[1h])
|
||||||
|
)
|
||||||
|
labels:
|
||||||
|
rollup: hourly
|
||||||
|
- record: platform_quality:test_case_pass_rate:percent_1h
|
||||||
|
expr: |
|
||||||
|
100 * (
|
||||||
|
sum by (suite, branch, test) (
|
||||||
|
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status="passed"}[1h])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
/
|
||||||
|
clamp_min(
|
||||||
|
sum by (suite, branch, test) (
|
||||||
|
max_over_time(platform_quality_gate_test_case_result{job="platform-quality-ci",test!="__no_test_cases__",status=~"passed|failed|error|skipped"}[1h])
|
||||||
|
),
|
||||||
|
1
|
||||||
|
)
|
||||||
|
labels:
|
||||||
|
rollup: hourly
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -168,7 +196,7 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: vmalert-atlas-availability
|
app: vmalert-atlas-availability
|
||||||
annotations:
|
annotations:
|
||||||
bstein.dev/rules-revision: "2026-05-10-availability-rollup-v6"
|
bstein.dev/rules-revision: "2026-05-15-platform-quality-rollups-v1"
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: vmalert-atlas-availability
|
serviceAccountName: vmalert-atlas-availability
|
||||||
affinity:
|
affinity:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user