diff --git a/Jenkinsfile b/Jenkinsfile index 1ff6317..e4905c6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -88,9 +88,8 @@ spec: SEMVER = 'dev' COVERAGE_JSON = 'build/coverage.json' JUNIT_XML = 'build/junit.xml' - METRICS_PREFIX = 'ariadne_ci' - VM_IMPORT_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428/api/v1/import/prometheus' - REPO_NAME = 'metis' + SUITE_NAME = 'metis' + PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' } options { disableConcurrentBuilds() diff --git a/scripts/publish_test_metrics.py b/scripts/publish_test_metrics.py index 4850ca2..af25cd8 100644 --- a/scripts/publish_test_metrics.py +++ b/scripts/publish_test_metrics.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +"""Publish Metis quality-gate test metrics to Pushgateway (Prometheus ingest).""" + from __future__ import annotations import json @@ -31,18 +33,20 @@ def _load_junit(path: str) -> dict[str, int]: tree = ET.parse(path) root = tree.getroot() - def _as_int(node, name: str) -> int: + def _as_int(node: ET.Element, name: str) -> int: raw = node.attrib.get(name) or "0" try: return int(float(raw)) except ValueError: return 0 - suites = [] + suites: list[ET.Element] if root.tag == "testsuite": suites = [root] elif root.tag == "testsuites": suites = list(root.findall("testsuite")) + else: + suites = [] totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} for suite in suites: @@ -53,7 +57,15 @@ def _load_junit(path: str) -> dict[str, int]: return totals -def _post_metrics(url: str, payload: str) -> None: +def _read_http(url: str) -> str: + try: + with urllib.request.urlopen(url, timeout=10) as resp: + return resp.read().decode("utf-8", errors="replace") + except Exception: + return "" + + +def _post_text(url: str, payload: str) -> None: req = urllib.request.Request( url, data=payload.encode("utf-8"), @@ -65,14 +77,36 @@ def _post_metrics(url: str, payload: str) -> None: raise RuntimeError(f"metrics push failed status={resp.status}") -def main() -> int: - vm_url = os.getenv("VM_IMPORT_URL", "").strip() - if not vm_url: - print("VM_IMPORT_URL not set; skipping metrics push") - return 0 +def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float: + text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics") + if not text: + return 0.0 + for line in text.splitlines(): + if not line.startswith(metric + "{"): + continue + if any(f'{k}="{v}"' not in line for k, v in labels.items()): + continue + parts = line.split() + if len(parts) < 2: + continue + try: + return float(parts[1]) + except ValueError: + return 0.0 + return 0.0 + + +def main() -> int: coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json") junit_path = os.getenv("JUNIT_XML", "build/junit.xml") + pushgateway_url = os.getenv( + "PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091" + ).strip() + suite = os.getenv("SUITE_NAME", "metis") + branch = os.getenv("BRANCH_NAME", "") + build_number = os.getenv("BUILD_NUMBER", "") + commit = os.getenv("GIT_COMMIT", "") if not os.path.exists(coverage_path): raise RuntimeError(f"missing coverage file {coverage_path}") @@ -83,33 +117,72 @@ def main() -> int: totals = _load_junit(junit_path) passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0) + outcome = "ok" + if totals["tests"] <= 0 or totals["failures"] > 0 or totals["errors"] > 0: + outcome = "failed" + + job_name = "platform-quality-ci" + ok_count = _fetch_existing_counter( + pushgateway_url, + "platform_quality_gate_runs_total", + {"job": job_name, "suite": suite, "status": "ok"}, + ) + failed_count = _fetch_existing_counter( + pushgateway_url, + "platform_quality_gate_runs_total", + {"job": job_name, "suite": suite, "status": "failed"}, + ) + if outcome == "ok": + ok_count += 1 + else: + failed_count += 1 + labels = { - "job": os.getenv("CI_JOB_NAME", "metis"), - "branch": os.getenv("BRANCH_NAME", ""), - "build_number": os.getenv("BUILD_NUMBER", ""), - "commit": os.getenv("GIT_COMMIT", ""), - "repo": os.getenv("REPO_NAME", "metis"), + "suite": suite, + "branch": branch, + "build_number": build_number, + "commit": commit, } - - prefix = os.getenv("METRICS_PREFIX", "ariadne_ci") - lines = [ - f"{prefix}_coverage_percent{_label_str(labels)} {coverage:.3f}", - f"{prefix}_tests_total{_label_str({**labels, 'result': 'passed'})} {passed}", - f"{prefix}_tests_total{_label_str({**labels, 'result': 'failed'})} {totals['failures']}", - f"{prefix}_tests_total{_label_str({**labels, 'result': 'error'})} {totals['errors']}", - f"{prefix}_tests_total{_label_str({**labels, 'result': 'skipped'})} {totals['skipped']}", - f"{prefix}_build_info{_label_str(labels)} 1", + payload_lines = [ + "# TYPE platform_quality_gate_runs_total counter", + f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}', + f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}', + "# TYPE metis_quality_gate_tests_total gauge", + f'metis_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}', + f'metis_quality_gate_tests_total{{suite="{suite}",result="failed"}} {totals["failures"]}', + f'metis_quality_gate_tests_total{{suite="{suite}",result="error"}} {totals["errors"]}', + f'metis_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {totals["skipped"]}', + "# TYPE metis_quality_gate_coverage_percent gauge", + f'metis_quality_gate_coverage_percent{{suite="{suite}"}} {coverage:.3f}', + "# TYPE metis_quality_gate_build_info gauge", + f"metis_quality_gate_build_info{_label_str(labels)} 1", ] + payload = "\n".join(payload_lines) + "\n" + _post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}", payload) - payload = "\n".join(lines) + "\n" - _post_metrics(vm_url, payload) - print("metrics push complete") + print( + json.dumps( + { + "suite": suite, + "outcome": outcome, + "tests_total": totals["tests"], + "tests_passed": passed, + "tests_failed": totals["failures"], + "tests_errors": totals["errors"], + "tests_skipped": totals["skipped"], + "coverage_percent": round(coverage, 3), + "ok_counter": ok_count, + "failed_counter": failed_count, + }, + indent=2, + ) + ) return 0 if __name__ == "__main__": try: - sys.exit(main()) + raise SystemExit(main()) except Exception as exc: print(f"metrics push failed: {exc}") - sys.exit(1) + raise