ariadne/scripts/publish_test_metrics.py

357 lines
13 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2026-04-10 13:57:33 -03:00
"""Publish Ariadne test and quality-gate metrics to Pushgateway.
Inputs: build artifacts such as JUnit XML, Slipcover coverage JSON, optional
quality-gate JSON, and standard Jenkins metadata from the environment.
Outputs: Prometheus metric lines pushed to Pushgateway so Grafana can chart test
health, quality-gate drift, and build context even when a build fails early.
"""
from __future__ import annotations
import json
import os
2026-04-10 13:57:33 -03:00
from pathlib import Path
from typing import Any
import urllib.request
import xml.etree.ElementTree as ET
def _escape_label(value: str) -> str:
2026-04-10 13:57:33 -03:00
"""Escape a Prometheus label value.
Inputs: an arbitrary label string from build metadata.
Outputs: a safely escaped label fragment so pushed metric lines remain valid
Prometheus exposition format.
"""
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
def _label_str(labels: dict[str, str]) -> str:
2026-04-10 13:57:33 -03:00
"""Build a Prometheus label set from non-empty values.
Inputs: a dictionary of label keys and raw string values.
Outputs: a `{...}` label fragment or an empty string so callers can compose
metrics without repeating label-filtering logic.
"""
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
return "{" + ",".join(parts) + "}" if parts else ""
2026-04-10 13:57:33 -03:00
def _load_coverage(path: Path) -> float | None:
"""Read the overall Slipcover percentage if the artifact exists.
Inputs: the expected coverage artifact path.
Outputs: the percent covered value, or `None` when the artifact is missing or
malformed so failed builds can still publish partial metrics.
"""
if not path.exists():
return None
payload = json.loads(path.read_text(encoding="utf-8"))
summary = payload.get("summary") or {}
percent = summary.get("percent_covered")
if isinstance(percent, (int, float)):
return float(percent)
2026-04-10 13:57:33 -03:00
return None
def _load_junit(path: Path) -> dict[str, int] | None:
"""Aggregate JUnit totals if the artifact exists.
2026-04-10 13:57:33 -03:00
Inputs: the expected JUnit XML path.
Outputs: test totals by outcome, or `None` when the artifact is absent so the
publisher can still emit build and gate status metrics.
"""
if not path.exists():
return None
tree = ET.parse(path)
root = tree.getroot()
def _as_int(node: ET.Element, name: str) -> int:
raw = node.attrib.get(name) or "0"
try:
return int(float(raw))
except ValueError:
return 0
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
suites = list(root.findall("testsuite"))
else:
suites = []
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
for suite in suites:
totals["tests"] += _as_int(suite, "tests")
totals["failures"] += _as_int(suite, "failures")
totals["errors"] += _as_int(suite, "errors")
totals["skipped"] += _as_int(suite, "skipped")
return totals
2026-04-10 13:57:33 -03:00
def _load_quality_gate(path: Path) -> dict[str, Any] | None:
"""Load the optional quality-gate JSON report.
Inputs: the expected report path from `scripts/check_quality_gate.py`.
Outputs: the parsed report when present so metric publication can include
violation counts and per-file gate state.
"""
if not path.exists():
return None
payload = json.loads(path.read_text(encoding="utf-8"))
return payload if isinstance(payload, dict) else None
def _read_http(url: str) -> str:
try:
with urllib.request.urlopen(url, timeout=10) as resp:
return resp.read().decode("utf-8", errors="replace")
except Exception:
return ""
def _post_text(url: str, payload: str) -> None:
req = urllib.request.Request(
url,
data=payload.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/plain"},
)
with urllib.request.urlopen(req, timeout=10) as resp:
if resp.status >= 400:
raise RuntimeError(f"metrics push failed status={resp.status}")
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
2026-04-10 13:57:33 -03:00
"""Fetch the current counter value for a metric series.
Inputs: the Pushgateway base URL plus the metric name and exact labels.
Outputs: the last published counter value so reruns can increment rather than
overwrite the total.
"""
text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics")
if not text:
return 0.0
for line in text.splitlines():
if not line.startswith(metric + "{"):
continue
2026-04-10 13:57:33 -03:00
if any(f'{key}="{value}"' not in line for key, value in labels.items()):
continue
parts = line.split()
if len(parts) < 2:
continue
try:
return float(parts[1])
except ValueError:
return 0.0
return 0.0
2026-04-10 13:57:33 -03:00
def _metric_line(name: str, value: int | float, labels: dict[str, str] | None = None) -> str:
label_text = _label_str(labels or {})
return f"{name}{label_text} {value}"
def _build_payload() -> tuple[str, dict[str, Any]]:
"""Assemble the Pushgateway payload and a summary object.
Inputs: environment variables and any available build artifacts.
Outputs: metric lines ready for Pushgateway plus a summary dict that local
tests and Jenkins logs can inspect.
"""
coverage_path = Path(os.getenv("COVERAGE_JSON", "build/coverage.json"))
junit_path = Path(os.getenv("JUNIT_XML", "build/junit.xml"))
quality_gate_path = Path(os.getenv("QUALITY_GATE_JSON", "build/quality-gate.json"))
pushgateway_url = os.getenv(
"PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091"
).strip()
suite = os.getenv("SUITE_NAME", "ariadne")
branch = os.getenv("BRANCH_NAME", "")
build_number = os.getenv("BUILD_NUMBER", "")
commit = os.getenv("GIT_COMMIT", "")
2026-04-10 13:57:33 -03:00
outcome = os.getenv("QUALITY_STATUS", "failed").strip() or "failed"
coverage = _load_coverage(coverage_path)
totals = _load_junit(junit_path)
2026-04-10 13:57:33 -03:00
quality_gate = _load_quality_gate(quality_gate_path)
2026-04-10 13:57:33 -03:00
tests_total = totals["tests"] if totals else 0
tests_failed = totals["failures"] if totals else 0
tests_errors = totals["errors"] if totals else 0
tests_skipped = totals["skipped"] if totals else 0
tests_passed = max(tests_total - tests_failed - tests_errors - tests_skipped, 0)
job_name = "platform-quality-ci"
ok_count = _fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "ok"},
)
failed_count = _fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "failed"},
)
if outcome == "ok":
ok_count += 1
else:
failed_count += 1
2026-04-10 13:57:33 -03:00
build_labels = {
"suite": suite,
"branch": branch,
"build_number": build_number,
"commit": commit,
}
2026-04-10 13:57:33 -03:00
suite_labels = {"suite": suite}
summary = (quality_gate or {}).get("summary") or {}
files = (quality_gate or {}).get("files") or {}
metric_lines = [
"# TYPE platform_quality_gate_runs_total counter",
2026-04-10 13:57:33 -03:00
_metric_line("platform_quality_gate_runs_total", int(ok_count), {"suite": suite, "status": "ok"}),
_metric_line("platform_quality_gate_runs_total", int(failed_count), {"suite": suite, "status": "failed"}),
"# TYPE ariadne_quality_gate_status gauge",
_metric_line("ariadne_quality_gate_status", 1 if outcome == "ok" else 0, suite_labels),
"# TYPE ariadne_quality_gate_artifact_present gauge",
_metric_line(
"ariadne_quality_gate_artifact_present",
1 if coverage_path.exists() else 0,
{"suite": suite, "artifact": "coverage_json"},
),
_metric_line(
"ariadne_quality_gate_artifact_present",
1 if junit_path.exists() else 0,
{"suite": suite, "artifact": "junit_xml"},
),
_metric_line(
"ariadne_quality_gate_artifact_present",
1 if quality_gate_path.exists() else 0,
{"suite": suite, "artifact": "quality_gate_json"},
),
"# TYPE ariadne_quality_gate_tests_total gauge",
2026-04-10 13:57:33 -03:00
_metric_line("ariadne_quality_gate_tests_total", tests_passed, {"suite": suite, "result": "passed"}),
_metric_line("ariadne_quality_gate_tests_total", tests_failed, {"suite": suite, "result": "failed"}),
_metric_line("ariadne_quality_gate_tests_total", tests_errors, {"suite": suite, "result": "error"}),
_metric_line("ariadne_quality_gate_tests_total", tests_skipped, {"suite": suite, "result": "skipped"}),
"# TYPE ariadne_quality_gate_coverage_percent gauge",
2026-04-10 13:57:33 -03:00
_metric_line("ariadne_quality_gate_coverage_percent", round(coverage or 0.0, 3), suite_labels),
"# TYPE ariadne_quality_gate_violation_total gauge",
_metric_line(
"ariadne_quality_gate_violation_total",
int(summary.get("line_count_violations", 0)),
{"suite": suite, "check": "line_count"},
),
_metric_line(
"ariadne_quality_gate_violation_total",
int(summary.get("docstring_violations", 0)),
{"suite": suite, "check": "docstrings"},
),
_metric_line(
"ariadne_quality_gate_violation_total",
int(summary.get("coverage_violations", 0)),
{"suite": suite, "check": "coverage"},
),
"# TYPE ariadne_quality_gate_legacy_exception_total gauge",
_metric_line(
"ariadne_quality_gate_legacy_exception_total",
int(summary.get("legacy_line_count_files", 0)),
{"suite": suite, "check": "line_count"},
),
_metric_line(
"ariadne_quality_gate_legacy_exception_total",
int(summary.get("legacy_docstring_files", 0)),
{"suite": suite, "check": "docstrings"},
),
_metric_line(
"ariadne_quality_gate_legacy_exception_total",
int(summary.get("coverage_exemptions", 0)),
{"suite": suite, "check": "coverage"},
),
"# TYPE ariadne_quality_gate_build_info gauge",
2026-04-10 13:57:33 -03:00
f"ariadne_quality_gate_build_info{_label_str(build_labels)} 1",
]
2026-04-10 13:57:33 -03:00
if quality_gate:
metric_lines.extend(
[
"# TYPE ariadne_quality_gate_file_lines gauge",
"# TYPE ariadne_quality_gate_file_missing_docstrings gauge",
"# TYPE ariadne_quality_gate_file_coverage_percent gauge",
]
)
2026-04-10 13:57:33 -03:00
for path, data in sorted(files.items()):
file_labels = {"suite": suite, "file": path}
if isinstance(data.get("lines"), int):
metric_lines.append(_metric_line("ariadne_quality_gate_file_lines", data["lines"], file_labels))
if isinstance(data.get("missing_docstrings"), int):
metric_lines.append(
_metric_line(
"ariadne_quality_gate_file_missing_docstrings",
data["missing_docstrings"],
file_labels,
)
)
coverage_percent = data.get("coverage_percent")
if isinstance(coverage_percent, (int, float)):
metric_lines.append(
_metric_line(
"ariadne_quality_gate_file_coverage_percent",
round(float(coverage_percent), 3),
file_labels,
)
)
payload = "\n".join(metric_lines) + "\n"
result = {
"suite": suite,
"outcome": outcome,
"tests_total": tests_total,
"tests_passed": tests_passed,
"tests_failed": tests_failed,
"tests_errors": tests_errors,
"tests_skipped": tests_skipped,
"coverage_percent": round(coverage or 0.0, 3),
"quality_gate_present": quality_gate is not None,
"quality_gate_status": (quality_gate or {}).get("status") or "missing",
"quality_gate_summary": summary,
"ok_counter": ok_count,
"failed_counter": failed_count,
"payload": payload,
"pushgateway_url": pushgateway_url,
"job_name": job_name,
}
return payload, result
def main() -> int:
"""Publish Ariadne quality metrics to Pushgateway.
Inputs: environment variables plus any available build artifacts.
Outputs: a POST to Pushgateway and a JSON summary printed to stdout so local
runs and Jenkins logs can confirm exactly what was emitted.
"""
payload, result = _build_payload()
target = f"{result['pushgateway_url'].rstrip('/')}/metrics/job/{result['job_name']}/suite/{result['suite']}"
_post_text(target, payload)
printable = dict(result)
printable.pop("payload", None)
print(json.dumps(printable, indent=2, sort_keys=True))
return 0
if __name__ == "__main__":
2026-04-10 13:57:33 -03:00
raise SystemExit(main())