diff --git a/Jenkinsfile b/Jenkinsfile index d38096f..531b513 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -71,6 +71,8 @@ spec: environment { PIP_DISABLE_PIP_VERSION_CHECK = '1' PYTHONUNBUFFERED = '1' + SUITE_NAME = 'atlasbot' + PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' } stages { stage('Checkout') { @@ -115,7 +117,9 @@ spec: seq 1 10 | while read _; do docker info && break || sleep 2 done - docker buildx create --name bstein-builder --driver docker-container --bootstrap --use + BUILDER_NAME="atlasbot-${BUILD_NUMBER}" + docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true + docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use ''' } } @@ -166,6 +170,96 @@ spec: } } post { + success { + container('tester') { + sh ''' + set -euo pipefail + export QUALITY_STATUS=ok + python - <<'PY' +import os +import re +import urllib.request + +suite = os.environ.get("SUITE_NAME", "atlasbot") +status = os.environ.get("QUALITY_STATUS", "failed") +gateway = os.environ.get("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091").rstrip("/") +text = urllib.request.urlopen(f"{gateway}/metrics", timeout=10).read().decode("utf-8", errors="replace") + +def counter(name: str) -> float: + pattern = re.compile( + rf'^platform_quality_gate_runs_total\\{{[^}}]*job="platform-quality-ci"[^}}]*suite="{re.escape(suite)}"[^}}]*status="{name}"[^}}]*\\}}\\s+([0-9]+(?:\\.[0-9]+)?)$', + re.M, + ) + match = pattern.search(text) + return float(match.group(1)) if match else 0.0 + +ok = counter("ok") +failed = counter("failed") +if status == "ok": + ok += 1 +else: + failed += 1 +payload = ( + "# TYPE platform_quality_gate_runs_total counter\\n" + f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {int(ok)}\\n' + f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {int(failed)}\\n' +) +req = urllib.request.Request( + f"{gateway}/metrics/job/platform-quality-ci/suite/{suite}", + data=payload.encode("utf-8"), + method="POST", + headers={"Content-Type": "text/plain"}, +) +urllib.request.urlopen(req, timeout=10).read() +PY + ''' + } + } + failure { + container('tester') { + sh ''' + set -euo pipefail + export QUALITY_STATUS=failed + python - <<'PY' +import os +import re +import urllib.request + +suite = os.environ.get("SUITE_NAME", "atlasbot") +status = os.environ.get("QUALITY_STATUS", "failed") +gateway = os.environ.get("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091").rstrip("/") +text = urllib.request.urlopen(f"{gateway}/metrics", timeout=10).read().decode("utf-8", errors="replace") + +def counter(name: str) -> float: + pattern = re.compile( + rf'^platform_quality_gate_runs_total\\{{[^}}]*job="platform-quality-ci"[^}}]*suite="{re.escape(suite)}"[^}}]*status="{name}"[^}}]*\\}}\\s+([0-9]+(?:\\.[0-9]+)?)$', + re.M, + ) + match = pattern.search(text) + return float(match.group(1)) if match else 0.0 + +ok = counter("ok") +failed = counter("failed") +if status == "ok": + ok += 1 +else: + failed += 1 +payload = ( + "# TYPE platform_quality_gate_runs_total counter\\n" + f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {int(ok)}\\n' + f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {int(failed)}\\n' +) +req = urllib.request.Request( + f"{gateway}/metrics/job/platform-quality-ci/suite/{suite}", + data=payload.encode("utf-8"), + method="POST", + headers={"Content-Type": "text/plain"}, +) +urllib.request.urlopen(req, timeout=10).read() +PY + ''' + } + } always { script { if (fileExists('build.env')) { diff --git a/scripts/publish_test_metrics.py b/scripts/publish_test_metrics.py index d58acd7..9a8a38d 100644 --- a/scripts/publish_test_metrics.py +++ b/scripts/publish_test_metrics.py @@ -1,59 +1,146 @@ +#!/usr/bin/env python3 +"""Publish Atlasbot CI test metrics to Pushgateway. + +Inputs: +- JUnit XML file and coverage JSON file. + +Outputs: +- platform_quality_gate_runs_total{suite="atlasbot",status="ok|failed"} +- atlasbot_quality_gate_tests_total{suite="atlasbot",result=*} +- atlasbot_quality_gate_coverage_percent{suite="atlasbot"} +""" + +from __future__ import annotations + +import json import os -import time +import urllib.request import xml.etree.ElementTree as ET - -import httpx - -VM_URL = os.getenv("VM_PUSH_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428/api/v1/import/prometheus") -JUNIT_PATH = os.getenv("JUNIT_PATH", "build/junit.xml") -COVERAGE_PATH = os.getenv("COVERAGE_PATH", "build/coverage.json") -JOB = os.getenv("TEST_JOB", "atlasbot-tests") +from pathlib import Path -def _load_junit(path: str) -> dict[str, float]: +def _as_int(node: ET.Element, name: str) -> int: + raw = node.attrib.get(name) or "0" + try: + return int(float(raw)) + except ValueError: + return 0 + + +def _load_junit(path: Path) -> dict[str, int]: + if not path.exists(): + return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} + tree = ET.parse(path) root = tree.getroot() - tests = int(root.attrib.get("tests", "0")) - failures = int(root.attrib.get("failures", "0")) - errors = int(root.attrib.get("errors", "0")) - time_sec = float(root.attrib.get("time", "0")) - return { - "tests": tests, - "failures": failures, - "errors": errors, - "time": time_sec, - } + suites: list[ET.Element] + if root.tag == "testsuite": + suites = [root] + elif root.tag == "testsuites": + suites = list(root.findall("testsuite")) + else: + suites = [] + + totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} + for suite in suites: + totals["tests"] += _as_int(suite, "tests") + totals["failures"] += _as_int(suite, "failures") + totals["errors"] += _as_int(suite, "errors") + totals["skipped"] += _as_int(suite, "skipped") + return totals -def _load_coverage(path: str) -> float: - try: - import json - - data = json.load(open(path)) - total = data.get("summary", {}).get("percent_covered", 0) - return float(total) / 100.0 - except Exception: +def _load_coverage_percent(path: Path) -> float: + if not path.exists(): return 0.0 + payload = json.loads(path.read_text(encoding="utf-8")) + summary = payload.get("summary") or {} + percent = summary.get("percent_covered") + if isinstance(percent, (int, float)): + return float(percent) + return 0.0 -def _format_metric(name: str, value: float) -> str: - return f"{name}{{job=\"{JOB}\"}} {value} {int(time.time())}" +def _read_text(url: str) -> str: + try: + with urllib.request.urlopen(url, timeout=10) as resp: + return resp.read().decode("utf-8", errors="replace") + except Exception: + return "" -def main() -> None: - junit = _load_junit(JUNIT_PATH) - coverage = _load_coverage(COVERAGE_PATH) - lines = [ - _format_metric("atlasbot_tests_total", junit["tests"]), - _format_metric("atlasbot_tests_failed", junit["failures"]), - _format_metric("atlasbot_tests_errors", junit["errors"]), - _format_metric("atlasbot_tests_time_seconds", junit["time"]), - _format_metric("atlasbot_coverage_ratio", coverage), - ] - body = "\n".join(lines) + "\n" - httpx.post(VM_URL, content=body, timeout=10.0) - print("metrics push complete") +def _counter(metrics: str, suite: str, status: str) -> float: + for line in metrics.splitlines(): + if not line.startswith("platform_quality_gate_runs_total{"): + continue + if f'job="platform-quality-ci"' not in line: + continue + if f'suite="{suite}"' not in line: + continue + if f'status="{status}"' not in line: + continue + parts = line.split() + if len(parts) < 2: + continue + try: + return float(parts[1]) + except ValueError: + return 0.0 + return 0.0 + + +def _post_text(url: str, payload: str) -> None: + req = urllib.request.Request( + url, + data=payload.encode("utf-8"), + method="POST", + headers={"Content-Type": "text/plain"}, + ) + with urllib.request.urlopen(req, timeout=10) as resp: + if resp.status >= 400: + raise RuntimeError(f"push failed status={resp.status}") + + +def main() -> int: + suite = os.getenv("SUITE_NAME", "atlasbot") + pushgateway_url = os.getenv( + "PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091" + ).rstrip("/") + + junit_path = Path(os.getenv("JUNIT_PATH", "build/junit.xml")) + coverage_path = Path(os.getenv("COVERAGE_PATH", "build/coverage.json")) + + totals = _load_junit(junit_path) + coverage_pct = _load_coverage_percent(coverage_path) + passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0) + outcome = "ok" if totals["tests"] > 0 and totals["failures"] == 0 and totals["errors"] == 0 else "failed" + + metrics = _read_text(f"{pushgateway_url}/metrics") + ok_count = _counter(metrics, suite, "ok") + failed_count = _counter(metrics, suite, "failed") + if outcome == "ok": + ok_count += 1 + else: + failed_count += 1 + + payload = "\n".join( + [ + "# TYPE platform_quality_gate_runs_total counter", + f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}', + f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}', + "# TYPE atlasbot_quality_gate_tests_total gauge", + f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}', + f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="failed"}} {totals["failures"]}', + f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="error"}} {totals["errors"]}', + f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {totals["skipped"]}', + "# TYPE atlasbot_quality_gate_coverage_percent gauge", + f'atlasbot_quality_gate_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}', + ] + ) + "\n" + + _post_text(f"{pushgateway_url}/metrics/job/platform-quality-ci/suite/{suite}", payload) + return 0 if __name__ == "__main__": - main() + raise SystemExit(main())