ci: stabilize dind and publish quality metrics via pushgateway

This commit is contained in:
Brad Stein 2026-04-10 05:59:38 -03:00
parent 57d1b67d59
commit 06759399fb
2 changed files with 112 additions and 33 deletions

18
Jenkinsfile vendored
View File

@ -61,8 +61,7 @@ spec:
- name: docker-config-writable - name: docker-config-writable
emptyDir: {} emptyDir: {}
- name: dind-storage - name: dind-storage
persistentVolumeClaim: emptyDir: {}
claimName: jenkins-dind-cache
- name: harbor-config - name: harbor-config
secret: secret:
secretName: harbor-robot-pipeline secretName: harbor-robot-pipeline
@ -80,9 +79,6 @@ spec:
COVERAGE_MIN = '99' COVERAGE_MIN = '99'
COVERAGE_JSON = 'build/coverage.json' COVERAGE_JSON = 'build/coverage.json'
JUNIT_XML = 'build/junit.xml' JUNIT_XML = 'build/junit.xml'
METRICS_PREFIX = 'ariadne_ci'
VM_IMPORT_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428/api/v1/import/prometheus'
REPO_NAME = 'ariadne'
SUITE_NAME = 'ariadne' SUITE_NAME = 'ariadne'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
} }
@ -165,13 +161,23 @@ python -c "import json; payload=json.load(open('build/coverage.json', encoding='
container('builder') { container('builder') {
sh ''' sh '''
set -euo pipefail set -euo pipefail
ready=0
for i in $(seq 1 10); do for i in $(seq 1 10); do
if docker info >/dev/null 2>&1; then if docker info >/dev/null 2>&1; then
ready=1
break break
fi fi
sleep 2 sleep 2
done done
docker buildx use default || docker buildx create --name default --driver docker --use if [ "${ready}" -ne 1 ]; then
echo "docker daemon did not become ready on ${DOCKER_HOST}" >&2
docker version || true
exit 1
fi
BUILDER_NAME="ariadne-${BUILD_NUMBER}"
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use
docker buildx inspect "${BUILDER_NAME}" --bootstrap
''' '''
} }
} }

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Publish Ariadne quality-gate test metrics to Pushgateway (Prometheus ingest)."""
from __future__ import annotations from __future__ import annotations
import json import json
@ -31,18 +33,20 @@ def _load_junit(path: str) -> dict[str, int]:
tree = ET.parse(path) tree = ET.parse(path)
root = tree.getroot() root = tree.getroot()
def _as_int(node, name: str) -> int: def _as_int(node: ET.Element, name: str) -> int:
raw = node.attrib.get(name) or "0" raw = node.attrib.get(name) or "0"
try: try:
return int(float(raw)) return int(float(raw))
except ValueError: except ValueError:
return 0 return 0
suites = [] suites: list[ET.Element]
if root.tag == "testsuite": if root.tag == "testsuite":
suites = [root] suites = [root]
elif root.tag == "testsuites": elif root.tag == "testsuites":
suites = list(root.findall("testsuite")) suites = list(root.findall("testsuite"))
else:
suites = []
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
for suite in suites: for suite in suites:
@ -53,7 +57,15 @@ def _load_junit(path: str) -> dict[str, int]:
return totals return totals
def _post_metrics(url: str, payload: str) -> None: def _read_http(url: str) -> str:
try:
with urllib.request.urlopen(url, timeout=10) as resp:
return resp.read().decode("utf-8", errors="replace")
except Exception:
return ""
def _post_text(url: str, payload: str) -> None:
req = urllib.request.Request( req = urllib.request.Request(
url, url,
data=payload.encode("utf-8"), data=payload.encode("utf-8"),
@ -65,14 +77,36 @@ def _post_metrics(url: str, payload: str) -> None:
raise RuntimeError(f"metrics push failed status={resp.status}") raise RuntimeError(f"metrics push failed status={resp.status}")
def main() -> int: def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
vm_url = os.getenv("VM_IMPORT_URL", "").strip() text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics")
if not vm_url: if not text:
print("VM_IMPORT_URL not set; skipping metrics push") return 0.0
return 0
for line in text.splitlines():
if not line.startswith(metric + "{"):
continue
if any(f'{k}="{v}"' not in line for k, v in labels.items()):
continue
parts = line.split()
if len(parts) < 2:
continue
try:
return float(parts[1])
except ValueError:
return 0.0
return 0.0
def main() -> int:
coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json") coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json")
junit_path = os.getenv("JUNIT_XML", "build/junit.xml") junit_path = os.getenv("JUNIT_XML", "build/junit.xml")
pushgateway_url = os.getenv(
"PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091"
).strip()
suite = os.getenv("SUITE_NAME", "ariadne")
branch = os.getenv("BRANCH_NAME", "")
build_number = os.getenv("BUILD_NUMBER", "")
commit = os.getenv("GIT_COMMIT", "")
if not os.path.exists(coverage_path): if not os.path.exists(coverage_path):
raise RuntimeError(f"missing coverage file {coverage_path}") raise RuntimeError(f"missing coverage file {coverage_path}")
@ -83,33 +117,72 @@ def main() -> int:
totals = _load_junit(junit_path) totals = _load_junit(junit_path)
passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0) passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
outcome = "ok"
if totals["tests"] <= 0 or totals["failures"] > 0 or totals["errors"] > 0:
outcome = "failed"
job_name = "platform-quality-ci"
ok_count = _fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "ok"},
)
failed_count = _fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "failed"},
)
if outcome == "ok":
ok_count += 1
else:
failed_count += 1
labels = { labels = {
"job": os.getenv("CI_JOB_NAME", "ariadne"), "suite": suite,
"branch": os.getenv("BRANCH_NAME", ""), "branch": branch,
"build_number": os.getenv("BUILD_NUMBER", ""), "build_number": build_number,
"commit": os.getenv("GIT_COMMIT", ""), "commit": commit,
"repo": os.getenv("REPO_NAME", "ariadne"),
} }
payload_lines = [
prefix = os.getenv("METRICS_PREFIX", "ariadne_ci") "# TYPE platform_quality_gate_runs_total counter",
lines = [ f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
f"{prefix}_coverage_percent{_label_str(labels)} {coverage:.3f}", f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}',
f"{prefix}_tests_total{_label_str({**labels, 'result': 'passed'})} {passed}", "# TYPE ariadne_quality_gate_tests_total gauge",
f"{prefix}_tests_total{_label_str({**labels, 'result': 'failed'})} {totals['failures']}", f'ariadne_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
f"{prefix}_tests_total{_label_str({**labels, 'result': 'error'})} {totals['errors']}", f'ariadne_quality_gate_tests_total{{suite="{suite}",result="failed"}} {totals["failures"]}',
f"{prefix}_tests_total{_label_str({**labels, 'result': 'skipped'})} {totals['skipped']}", f'ariadne_quality_gate_tests_total{{suite="{suite}",result="error"}} {totals["errors"]}',
f"{prefix}_build_info{_label_str(labels)} 1", f'ariadne_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {totals["skipped"]}',
"# TYPE ariadne_quality_gate_coverage_percent gauge",
f'ariadne_quality_gate_coverage_percent{{suite="{suite}"}} {coverage:.3f}',
"# TYPE ariadne_quality_gate_build_info gauge",
f"ariadne_quality_gate_build_info{_label_str(labels)} 1",
] ]
payload = "\n".join(payload_lines) + "\n"
_post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}", payload)
payload = "\n".join(lines) + "\n" print(
_post_metrics(vm_url, payload) json.dumps(
print("metrics push complete") {
"suite": suite,
"outcome": outcome,
"tests_total": totals["tests"],
"tests_passed": passed,
"tests_failed": totals["failures"],
"tests_errors": totals["errors"],
"tests_skipped": totals["skipped"],
"coverage_percent": round(coverage, 3),
"ok_counter": ok_count,
"failed_counter": failed_count,
},
indent=2,
)
)
return 0 return 0
if __name__ == "__main__": if __name__ == "__main__":
try: try:
sys.exit(main()) raise SystemExit(main())
except Exception as exc: except Exception as exc:
print(f"metrics push failed: {exc}") print(f"metrics push failed: {exc}")
sys.exit(1) raise