ci: publish atlasbot quality gate metrics to pushgateway

This commit is contained in:
Brad Stein 2026-04-10 05:18:44 -03:00
parent 01b27d5488
commit 25920c2b2c
2 changed files with 225 additions and 44 deletions

96
Jenkinsfile vendored
View File

@ -71,6 +71,8 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'atlasbot'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
}
stages {
stage('Checkout') {
@ -115,7 +117,9 @@ spec:
seq 1 10 | while read _; do
docker info && break || sleep 2
done
docker buildx create --name bstein-builder --driver docker-container --bootstrap --use
BUILDER_NAME="atlasbot-${BUILD_NUMBER}"
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use
'''
}
}
@ -166,6 +170,96 @@ spec:
}
}
post {
success {
container('tester') {
sh '''
set -euo pipefail
export QUALITY_STATUS=ok
python - <<'PY'
import os
import re
import urllib.request
suite = os.environ.get("SUITE_NAME", "atlasbot")
status = os.environ.get("QUALITY_STATUS", "failed")
gateway = os.environ.get("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091").rstrip("/")
text = urllib.request.urlopen(f"{gateway}/metrics", timeout=10).read().decode("utf-8", errors="replace")
def counter(name: str) -> float:
pattern = re.compile(
rf'^platform_quality_gate_runs_total\\{{[^}}]*job="platform-quality-ci"[^}}]*suite="{re.escape(suite)}"[^}}]*status="{name}"[^}}]*\\}}\\s+([0-9]+(?:\\.[0-9]+)?)$',
re.M,
)
match = pattern.search(text)
return float(match.group(1)) if match else 0.0
ok = counter("ok")
failed = counter("failed")
if status == "ok":
ok += 1
else:
failed += 1
payload = (
"# TYPE platform_quality_gate_runs_total counter\\n"
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {int(ok)}\\n'
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {int(failed)}\\n'
)
req = urllib.request.Request(
f"{gateway}/metrics/job/platform-quality-ci/suite/{suite}",
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
urllib.request.urlopen(req, timeout=10).read()
PY
'''
}
}
failure {
container('tester') {
sh '''
set -euo pipefail
export QUALITY_STATUS=failed
python - <<'PY'
import os
import re
import urllib.request
suite = os.environ.get("SUITE_NAME", "atlasbot")
status = os.environ.get("QUALITY_STATUS", "failed")
gateway = os.environ.get("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091").rstrip("/")
text = urllib.request.urlopen(f"{gateway}/metrics", timeout=10).read().decode("utf-8", errors="replace")
def counter(name: str) -> float:
pattern = re.compile(
rf'^platform_quality_gate_runs_total\\{{[^}}]*job="platform-quality-ci"[^}}]*suite="{re.escape(suite)}"[^}}]*status="{name}"[^}}]*\\}}\\s+([0-9]+(?:\\.[0-9]+)?)$',
re.M,
)
match = pattern.search(text)
return float(match.group(1)) if match else 0.0
ok = counter("ok")
failed = counter("failed")
if status == "ok":
ok += 1
else:
failed += 1
payload = (
"# TYPE platform_quality_gate_runs_total counter\\n"
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {int(ok)}\\n'
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {int(failed)}\\n'
)
req = urllib.request.Request(
f"{gateway}/metrics/job/platform-quality-ci/suite/{suite}",
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
urllib.request.urlopen(req, timeout=10).read()
PY
'''
}
}
always {
script {
if (fileExists('build.env')) {

View File

@ -1,59 +1,146 @@
#!/usr/bin/env python3
"""Publish Atlasbot CI test metrics to Pushgateway.
Inputs:
- JUnit XML file and coverage JSON file.
Outputs:
- platform_quality_gate_runs_total{suite="atlasbot",status="ok|failed"}
- atlasbot_quality_gate_tests_total{suite="atlasbot",result=*}
- atlasbot_quality_gate_coverage_percent{suite="atlasbot"}
"""
from __future__ import annotations
import json
import os
import time
import urllib.request
import xml.etree.ElementTree as ET
import httpx
VM_URL = os.getenv("VM_PUSH_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428/api/v1/import/prometheus")
JUNIT_PATH = os.getenv("JUNIT_PATH", "build/junit.xml")
COVERAGE_PATH = os.getenv("COVERAGE_PATH", "build/coverage.json")
JOB = os.getenv("TEST_JOB", "atlasbot-tests")
from pathlib import Path
def _load_junit(path: str) -> dict[str, float]:
def _as_int(node: ET.Element, name: str) -> int:
raw = node.attrib.get(name) or "0"
try:
return int(float(raw))
except ValueError:
return 0
def _load_junit(path: Path) -> dict[str, int]:
if not path.exists():
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
tree = ET.parse(path)
root = tree.getroot()
tests = int(root.attrib.get("tests", "0"))
failures = int(root.attrib.get("failures", "0"))
errors = int(root.attrib.get("errors", "0"))
time_sec = float(root.attrib.get("time", "0"))
return {
"tests": tests,
"failures": failures,
"errors": errors,
"time": time_sec,
}
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
suites = list(root.findall("testsuite"))
else:
suites = []
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
for suite in suites:
totals["tests"] += _as_int(suite, "tests")
totals["failures"] += _as_int(suite, "failures")
totals["errors"] += _as_int(suite, "errors")
totals["skipped"] += _as_int(suite, "skipped")
return totals
def _load_coverage(path: str) -> float:
try:
import json
data = json.load(open(path))
total = data.get("summary", {}).get("percent_covered", 0)
return float(total) / 100.0
except Exception:
def _load_coverage_percent(path: Path) -> float:
if not path.exists():
return 0.0
payload = json.loads(path.read_text(encoding="utf-8"))
summary = payload.get("summary") or {}
percent = summary.get("percent_covered")
if isinstance(percent, (int, float)):
return float(percent)
return 0.0
def _format_metric(name: str, value: float) -> str:
return f"{name}{{job=\"{JOB}\"}} {value} {int(time.time())}"
def _read_text(url: str) -> str:
try:
with urllib.request.urlopen(url, timeout=10) as resp:
return resp.read().decode("utf-8", errors="replace")
except Exception:
return ""
def main() -> None:
junit = _load_junit(JUNIT_PATH)
coverage = _load_coverage(COVERAGE_PATH)
lines = [
_format_metric("atlasbot_tests_total", junit["tests"]),
_format_metric("atlasbot_tests_failed", junit["failures"]),
_format_metric("atlasbot_tests_errors", junit["errors"]),
_format_metric("atlasbot_tests_time_seconds", junit["time"]),
_format_metric("atlasbot_coverage_ratio", coverage),
]
body = "\n".join(lines) + "\n"
httpx.post(VM_URL, content=body, timeout=10.0)
print("metrics push complete")
def _counter(metrics: str, suite: str, status: str) -> float:
for line in metrics.splitlines():
if not line.startswith("platform_quality_gate_runs_total{"):
continue
if f'job="platform-quality-ci"' not in line:
continue
if f'suite="{suite}"' not in line:
continue
if f'status="{status}"' not in line:
continue
parts = line.split()
if len(parts) < 2:
continue
try:
return float(parts[1])
except ValueError:
return 0.0
return 0.0
def _post_text(url: str, payload: str) -> None:
req = urllib.request.Request(
url,
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
with urllib.request.urlopen(req, timeout=10) as resp:
if resp.status >= 400:
raise RuntimeError(f"push failed status={resp.status}")
def main() -> int:
suite = os.getenv("SUITE_NAME", "atlasbot")
pushgateway_url = os.getenv(
"PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091"
).rstrip("/")
junit_path = Path(os.getenv("JUNIT_PATH", "build/junit.xml"))
coverage_path = Path(os.getenv("COVERAGE_PATH", "build/coverage.json"))
totals = _load_junit(junit_path)
coverage_pct = _load_coverage_percent(coverage_path)
passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
outcome = "ok" if totals["tests"] > 0 and totals["failures"] == 0 and totals["errors"] == 0 else "failed"
metrics = _read_text(f"{pushgateway_url}/metrics")
ok_count = _counter(metrics, suite, "ok")
failed_count = _counter(metrics, suite, "failed")
if outcome == "ok":
ok_count += 1
else:
failed_count += 1
payload = "\n".join(
[
"# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}',
"# TYPE atlasbot_quality_gate_tests_total gauge",
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="failed"}} {totals["failures"]}',
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="error"}} {totals["errors"]}',
f'atlasbot_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {totals["skipped"]}',
"# TYPE atlasbot_quality_gate_coverage_percent gauge",
f'atlasbot_quality_gate_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}',
]
) + "\n"
_post_text(f"{pushgateway_url}/metrics/job/platform-quality-ci/suite/{suite}", payload)
return 0
if __name__ == "__main__":
main()
raise SystemExit(main())