ci: publish atlasbot quality gate metrics to pushgateway

This commit is contained in:
Brad Stein 2026-04-10 05:18:44 -03:00
parent 01b27d5488
commit 25920c2b2c
2 changed files with 225 additions and 44 deletions

96
Jenkinsfile vendored
View File

@ -71,6 +71,8 @@ spec:
environment { environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1' PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1' PYTHONUNBUFFERED = '1'
SUITE_NAME = 'atlasbot'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
} }
stages { stages {
stage('Checkout') { stage('Checkout') {
@ -115,7 +117,9 @@ spec:
seq 1 10 | while read _; do seq 1 10 | while read _; do
docker info && break || sleep 2 docker info && break || sleep 2
done done
docker buildx create --name bstein-builder --driver docker-container --bootstrap --use BUILDER_NAME="atlasbot-${BUILD_NUMBER}"
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use
''' '''
} }
} }
@ -166,6 +170,96 @@ spec:
} }
} }
post { post {
success {
container('tester') {
sh '''
set -euo pipefail
export QUALITY_STATUS=ok
python - <<'PY'
import os
import re
import urllib.request
suite = os.environ.get("SUITE_NAME", "atlasbot")
status = os.environ.get("QUALITY_STATUS", "failed")
gateway = os.environ.get("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091").rstrip("/")
text = urllib.request.urlopen(f"{gateway}/metrics", timeout=10).read().decode("utf-8", errors="replace")
def counter(name: str) -> float:
pattern = re.compile(
rf'^platform_quality_gate_runs_total\\{{[^}}]*job="platform-quality-ci"[^}}]*suite="{re.escape(suite)}"[^}}]*status="{name}"[^}}]*\\}}\\s+([0-9]+(?:\\.[0-9]+)?)$',
re.M,
)
match = pattern.search(text)
return float(match.group(1)) if match else 0.0
ok = counter("ok")
failed = counter("failed")
if status == "ok":
ok += 1
else:
failed += 1
payload = (
"# TYPE platform_quality_gate_runs_total counter\\n"
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {int(ok)}\\n'
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {int(failed)}\\n'
)
req = urllib.request.Request(
f"{gateway}/metrics/job/platform-quality-ci/suite/{suite}",
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
urllib.request.urlopen(req, timeout=10).read()
PY
'''
}
}
failure {
container('tester') {
sh '''
set -euo pipefail
export QUALITY_STATUS=failed
python - <<'PY'
import os
import re
import urllib.request
suite = os.environ.get("SUITE_NAME", "atlasbot")
status = os.environ.get("QUALITY_STATUS", "failed")
gateway = os.environ.get("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091").rstrip("/")
text = urllib.request.urlopen(f"{gateway}/metrics", timeout=10).read().decode("utf-8", errors="replace")
def counter(name: str) -> float:
pattern = re.compile(
rf'^platform_quality_gate_runs_total\\{{[^}}]*job="platform-quality-ci"[^}}]*suite="{re.escape(suite)}"[^}}]*status="{name}"[^}}]*\\}}\\s+([0-9]+(?:\\.[0-9]+)?)$',
re.M,
)
match = pattern.search(text)
return float(match.group(1)) if match else 0.0
ok = counter("ok")
failed = counter("failed")
if status == "ok":
ok += 1
else:
failed += 1
payload = (
"# TYPE platform_quality_gate_runs_total counter\\n"
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {int(ok)}\\n'
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {int(failed)}\\n'
)
req = urllib.request.Request(
f"{gateway}/metrics/job/platform-quality-ci/suite/{suite}",
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
urllib.request.urlopen(req, timeout=10).read()
PY
'''
}
}
always { always {
script { script {
if (fileExists('build.env')) { if (fileExists('build.env')) {

View File

@ -1,59 +1,146 @@
#!/usr/bin/env python3
"""Publish Atlasbot CI test metrics to Pushgateway.
Inputs:
- JUnit XML file and coverage JSON file.
Outputs:
- platform_quality_gate_runs_total{suite="atlasbot",status="ok|failed"}
- atlasbot_quality_gate_tests_total{suite="atlasbot",result=*}
- atlasbot_quality_gate_coverage_percent{suite="atlasbot"}
"""
from __future__ import annotations
import json
import os import os
import time import urllib.request
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from pathlib import Path
import httpx
VM_URL = os.getenv("VM_PUSH_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428/api/v1/import/prometheus")
JUNIT_PATH = os.getenv("JUNIT_PATH", "build/junit.xml")
COVERAGE_PATH = os.getenv("COVERAGE_PATH", "build/coverage.json")
JOB = os.getenv("TEST_JOB", "atlasbot-tests")
def _load_junit(path: str) -> dict[str, float]: def _as_int(node: ET.Element, name: str) -> int:
raw = node.attrib.get(name) or "0"
try:
return int(float(raw))
except ValueError:
return 0
def _load_junit(path: Path) -> dict[str, int]:
if not path.exists():
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
tree = ET.parse(path) tree = ET.parse(path)
root = tree.getroot() root = tree.getroot()
tests = int(root.attrib.get("tests", "0")) suites: list[ET.Element]
failures = int(root.attrib.get("failures", "0")) if root.tag == "testsuite":
errors = int(root.attrib.get("errors", "0")) suites = [root]
time_sec = float(root.attrib.get("time", "0")) elif root.tag == "testsuites":
return { suites = list(root.findall("testsuite"))
"tests": tests, else:
"failures": failures, suites = []
"errors": errors,
"time": time_sec, totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
} for suite in suites:
totals["tests"] += _as_int(suite, "tests")
totals["failures"] += _as_int(suite, "failures")
totals["errors"] += _as_int(suite, "errors")
totals["skipped"] += _as_int(suite, "skipped")
return totals
def _load_coverage(path: str) -> float: def _load_coverage_percent(path: Path) -> float:
try: if not path.exists():
import json
data = json.load(open(path))
total = data.get("summary", {}).get("percent_covered", 0)
return float(total) / 100.0
except Exception:
return 0.0 return 0.0
payload = json.loads(path.read_text(encoding="utf-8"))
summary = payload.get("summary") or {}
percent = summary.get("percent_covered")
if isinstance(percent, (int, float)):
return float(percent)
return 0.0
def _format_metric(name: str, value: float) -> str: def _read_text(url: str) -> str:
return f"{name}{{job=\"{JOB}\"}} {value} {int(time.time())}" try:
with urllib.request.urlopen(url, timeout=10) as resp:
return resp.read().decode("utf-8", errors="replace")
except Exception:
return ""
def main() -> None: def _counter(metrics: str, suite: str, status: str) -> float:
junit = _load_junit(JUNIT_PATH) for line in metrics.splitlines():
coverage = _load_coverage(COVERAGE_PATH) if not line.startswith("platform_quality_gate_runs_total{"):
lines = [ continue
_format_metric("atlasbot_tests_total", junit["tests"]), if f'job="platform-quality-ci"' not in line:
_format_metric("atlasbot_tests_failed", junit["failures"]), continue
_format_metric("atlasbot_tests_errors", junit["errors"]), if f'suite="{suite}"' not in line:
_format_metric("atlasbot_tests_time_seconds", junit["time"]), continue
_format_metric("atlasbot_coverage_ratio", coverage), if f'status="{status}"' not in line:
] continue
body = "\n".join(lines) + "\n" parts = line.split()
httpx.post(VM_URL, content=body, timeout=10.0) if len(parts) < 2:
print("metrics push complete") continue
try:
return float(parts[1])
except ValueError:
return 0.0
return 0.0
def _post_text(url: str, payload: str) -> None:
req = urllib.request.Request(
url,
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
with urllib.request.urlopen(req, timeout=10) as resp:
if resp.status >= 400:
raise RuntimeError(f"push failed status={resp.status}")
def main() -> int:
suite = os.getenv("SUITE_NAME", "atlasbot")
pushgateway_url = os.getenv(
"PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091"
).rstrip("/")
junit_path = Path(os.getenv("JUNIT_PATH", "build/junit.xml"))
coverage_path = Path(os.getenv("COVERAGE_PATH", "build/coverage.json"))
totals = _load_junit(junit_path)
coverage_pct = _load_coverage_percent(coverage_path)
passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
outcome = "ok" if totals["tests"] > 0 and totals["failures"] == 0 and totals["errors"] == 0 else "failed"
metrics = _read_text(f"{pushgateway_url}/metrics")
ok_count = _counter(metrics, suite, "ok")
failed_count = _counter(metrics, suite, "failed")
if outcome == "ok":
ok_count += 1
else:
failed_count += 1
payload = "\n".join(
[
"# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}',
"# TYPE atlasbot_quality_gate_tests_total gauge",
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="failed"}} {totals["failures"]}',
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="error"}} {totals["errors"]}',
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {totals["skipped"]}',
"# TYPE atlasbot_quality_gate_coverage_percent gauge",
f'atlasbot_quality_gate_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}',
]
) + "\n"
_post_text(f"{pushgateway_url}/metrics/job/platform-quality-ci/suite/{suite}", payload)
return 0
if __name__ == "__main__": if __name__ == "__main__":
main() raise SystemExit(main())