pegasus/scripts/publish_test_metrics.py

#!/usr/bin/env python3
"""Publish Pegasus test-suite results to Prometheus via Pushgateway.

Inputs:
- Backend JUnit XML and frontend JUnit XML
- Backend/frontend coverage summaries

Outputs pushed:
- platform_quality_gate_runs_total{suite="pegasus",status="ok|failed"}
- pegasus_quality_gate_tests_total{suite="pegasus",result=*}
- pegasus_quality_gate_coverage_percent{suite="pegasus"}
"""

from __future__ import annotations

import json
import os
import urllib.request
import xml.etree.ElementTree as ET
from pathlib import Path


SOURCE_SUFFIXES = {".go", ".py", ".js", ".mjs", ".ts", ".tsx", ".json", ".yaml", ".yml", ".sh"}
SKIP_DIRS = {".git", ".venv", "venv", "node_modules", "build", "dist", "__pycache__", ".pytest_cache", "frontend-coverage"}


def _escape_label(value: str) -> str:
    return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')


def _label_str(labels: dict[str, str]) -> str:
    parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
    return "{" + ",".join(parts) + "}" if parts else ""


def _read_text(path: Path) -> str:
    if not path.exists():
        return ""
    return path.read_text(encoding="utf-8")


def _as_int(node: ET.Element, name: str) -> int:
    raw = node.attrib.get(name) or "0"
    try:
        return int(float(raw))
    except ValueError:
        return 0


def _load_junit(path: Path) -> dict[str, int]:
    if not path.exists():
        return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}

    tree = ET.parse(path)
    root = tree.getroot()
    suites: list[ET.Element]
    if root.tag == "testsuite":
        suites = [root]
    elif root.tag == "testsuites":
        suites = list(root.findall("testsuite"))
    else:
        suites = []

    totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
    for suite in suites:
        totals["tests"] += _as_int(suite, "tests")
        totals["failures"] += _as_int(suite, "failures")
        totals["errors"] += _as_int(suite, "errors")
        totals["skipped"] += _as_int(suite, "skipped")
    return totals


def _load_backend_coverage_percent(path: Path) -> float:
    if not path.exists():
        return 0.0
    try:
        return float(path.read_text(encoding="utf-8").strip())
    except ValueError:
        return 0.0


def _load_frontend_coverage_percent(path: Path) -> float:
    if not path.exists():
        return 0.0
    payload = json.loads(path.read_text(encoding="utf-8"))
    total = payload.get("total") or {}
    lines = total.get("lines") or {}
    pct = lines.get("pct")
    if isinstance(pct, (int, float)):
        return float(pct)
    return 0.0


def _read_test_exit_code(path: Path) -> int:
    if not path.exists():
        return 1
    raw = path.read_text(encoding="utf-8").strip()
    try:
        return int(raw)
    except ValueError:
        return 1


def _load_gate_summary(path: Path) -> dict[str, object]:
    if not path.exists():
        return {"ok": False, "issues": []}
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return {"ok": False, "issues": []}


def _count_lines_over_limit(root: Path, *, max_lines: int = 500) -> int:
    count = 0
    for path in root.rglob("*"):
        if not path.is_file():
            continue
        if any(part in SKIP_DIRS for part in path.parts):
            continue
        if path.name != "Jenkinsfile" and path.suffix.lower() not in SOURCE_SUFFIXES:
            continue
        try:
            with path.open("r", encoding="utf-8", errors="ignore") as handle:
                lines = sum(1 for _ in handle)
        except OSError:
            continue
        if lines > max_lines:
            count += 1
    return count


def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
    text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics")
    if not text:
        return 0.0

    for line in text.splitlines():
        if not line.startswith(metric + "{"):
            continue
        if any(f'{k}="{v}"' not in line for k, v in labels.items()):
            continue
        parts = line.split()
        if len(parts) < 2:
            continue
        try:
            return float(parts[1])
        except ValueError:
            return 0.0
    return 0.0


def _read_http(url: str) -> str:
    try:
        with urllib.request.urlopen(url, timeout=10) as resp:
            return resp.read().decode("utf-8", errors="replace")
    except Exception:
        return ""


def _post_text(url: str, payload: str) -> None:
    req = urllib.request.Request(
        url,
        data=payload.encode("utf-8"),
        method="POST",
        headers={"Content-Type": "text/plain"},
    )
    with urllib.request.urlopen(req, timeout=10) as resp:
        if resp.status >= 400:
            raise RuntimeError(f"push failed status={resp.status}")


def main() -> int:
    suite = os.getenv("SUITE_NAME", "pegasus")
    pushgateway_url = os.getenv(
        "PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091"
    )

    backend_junit = Path(os.getenv("BACKEND_JUNIT_XML", "build/junit-backend.xml"))
    frontend_junit = Path(os.getenv("FRONTEND_JUNIT_XML", "build/junit-frontend.xml"))
    backend_cov = Path(os.getenv("BACKEND_COVERAGE_PERCENT_FILE", "build/coverage-backend-percent.txt"))
    frontend_cov = Path(
        os.getenv("FRONTEND_COVERAGE_JSON", "build/frontend-coverage/coverage-summary.json")
    )
    backend_rc_file = Path(os.getenv("BACKEND_TEST_RC_FILE", "build/backend-test.rc"))
    frontend_rc_file = Path(os.getenv("FRONTEND_TEST_RC_FILE", "build/frontend-test.rc"))
    gate_summary = _load_gate_summary(Path(os.getenv("GATE_SUMMARY_FILE", "build/gate-summary.json")))
    repo_root = Path(__file__).resolve().parents[1]

    b = _load_junit(backend_junit)
    f = _load_junit(frontend_junit)
    totals = {
        "tests": b["tests"] + f["tests"],
        "failures": b["failures"] + f["failures"],
        "errors": b["errors"] + f["errors"],
        "skipped": b["skipped"] + f["skipped"],
    }
    passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)

    backend_pct = _load_backend_coverage_percent(backend_cov)
    frontend_pct = _load_frontend_coverage_percent(frontend_cov)
    coverage_pct = (backend_pct + frontend_pct) / 2 if (backend_pct or frontend_pct) else 0.0
    over_500 = _count_lines_over_limit(repo_root)

    backend_rc = _read_test_exit_code(backend_rc_file)
    frontend_rc = _read_test_exit_code(frontend_rc_file)
    backend_suite_result = "passed" if backend_rc == 0 else "failed"
    frontend_suite_result = "passed" if frontend_rc == 0 else "failed"
    branch = os.getenv("BRANCH_NAME", "")
    build_number = os.getenv("BUILD_NUMBER", "")
    commit = os.getenv("GIT_COMMIT", "")

    labels = {
        "suite": suite,
        "branch": branch,
        "build_number": build_number,
        "commit": commit,
    }
    gate_ok = bool(gate_summary.get("ok"))
    gate_issues = gate_summary.get("issues") or []
    outcome = (
        "ok"
        if gate_ok
        and backend_rc == 0
        and frontend_rc == 0
        and totals["tests"] > 0
        and totals["failures"] == 0
        and totals["errors"] == 0
        else "failed"
    )

    job_name = "platform-quality-ci"
    ok_count = _fetch_existing_counter(
        pushgateway_url,
        "platform_quality_gate_runs_total",
        {"job": job_name, "suite": suite, "status": "ok"},
    )
    failed_count = _fetch_existing_counter(
        pushgateway_url,
        "platform_quality_gate_runs_total",
        {"job": job_name, "suite": suite, "status": "failed"},
    )

    if outcome == "ok":
        ok_count += 1
    else:
        failed_count += 1
    tests_check = "ok" if outcome == "ok" else "failed"
    coverage_check = "ok" if coverage_pct >= 95.0 else "failed"
    loc_check = "ok" if over_500 == 0 else "failed"
    gate_check = "ok" if gate_ok else "failed"

    payload_lines = [
        "# TYPE platform_quality_gate_runs_total counter",
        f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
        f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}',
        "# TYPE pegasus_test_suite_result gauge",
        f'pegasus_test_suite_result{{suite="backend",status="{backend_suite_result}"}} 1',
        f'pegasus_test_suite_result{{suite="frontend",status="{frontend_suite_result}"}} 1',
        "# TYPE pegasus_quality_gate_tests_total gauge",
        f'pegasus_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
        f'pegasus_quality_gate_tests_total{{suite="{suite}",result="failed"}} {totals["failures"]}',
        f'pegasus_quality_gate_tests_total{{suite="{suite}",result="error"}} {totals["errors"]}',
        f'pegasus_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {totals["skipped"]}',
        "# TYPE pegasus_quality_gate_coverage_percent gauge",
        f'pegasus_quality_gate_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}',
        "# TYPE pegasus_quality_gate_status gauge",
        f'pegasus_quality_gate_status{{suite="{suite}",result="{"ok" if gate_ok else "failed"}"}} 1',
        "# TYPE pegasus_quality_gate_issues_total gauge",
        f'pegasus_quality_gate_issues_total{{suite="{suite}"}} {len(gate_issues)}',
        "# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
        f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}',
        "# TYPE platform_quality_gate_source_lines_over_500_total gauge",
        f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {over_500}',
        "# TYPE pegasus_quality_gate_checks_total gauge",
        f'pegasus_quality_gate_checks_total{{suite="{suite}",check="tests",result="{tests_check}"}} 1',
        f'pegasus_quality_gate_checks_total{{suite="{suite}",check="coverage",result="{coverage_check}"}} 1',
        f'pegasus_quality_gate_checks_total{{suite="{suite}",check="loc",result="{loc_check}"}} 1',
        f'pegasus_quality_gate_checks_total{{suite="{suite}",check="gate",result="{gate_check}"}} 1',
        "# TYPE pegasus_quality_gate_build_info gauge",
        f"pegasus_quality_gate_build_info{_label_str(labels)} 1",
    ]
    payload = "\n".join(payload_lines) + "\n"

    push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
    _post_text(push_url, payload)

    summary = {
        "suite": suite,
        "tests_total": totals["tests"],
        "tests_passed": passed,
        "tests_failed": totals["failures"],
        "tests_errors": totals["errors"],
        "tests_skipped": totals["skipped"],
        "coverage_percent": round(coverage_pct, 3),
        "source_lines_over_500": over_500,
        "outcome": outcome,
        "backend_rc": backend_rc,
        "frontend_rc": frontend_rc,
        "ok_counter": ok_count,
        "failed_counter": failed_count,
    }
    Path("build/metrics-summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")

    print(json.dumps(summary, indent=2))
    return 0


if __name__ == "__main__":
    try:
        raise SystemExit(main())
    except Exception as exc:
        print(f"metrics push failed: {exc}")
        raise