2026-04-09 20:10:52 -03:00
|
|
|
#!/usr/bin/env sh
|
|
|
|
|
set -eu
|
|
|
|
|
|
|
|
|
|
PUSHGATEWAY_URL="${PUSHGATEWAY_URL:-http://platform-quality-gateway.monitoring.svc.cluster.local:9091}"
|
|
|
|
|
HTTP_TIMEOUT_SECONDS="${HTTP_TIMEOUT_SECONDS:-12}"
|
|
|
|
|
|
|
|
|
|
fetch_counter() {
|
|
|
|
|
suite="$1"
|
|
|
|
|
status="$2"
|
|
|
|
|
line="$(curl -fsS "${PUSHGATEWAY_URL}/metrics" 2>/dev/null | awk -v suite="$suite" -v status="$status" '
|
|
|
|
|
/^platform_quality_gate_runs_total\{/ {
|
|
|
|
|
if (index($0, "suite=\"" suite "\"") && index($0, "status=\"" status "\"")) {
|
|
|
|
|
print $0
|
|
|
|
|
exit
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
' || true)"
|
|
|
|
|
if [ -z "${line}" ]; then
|
|
|
|
|
printf '0\n'
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
printf '%s\n' "${line}" | awk '{print $2 + 0}'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
push_suite_counters() {
|
|
|
|
|
suite="$1"
|
|
|
|
|
outcome="$2"
|
|
|
|
|
|
|
|
|
|
ok_count="$(fetch_counter "${suite}" "ok")"
|
|
|
|
|
failed_count="$(fetch_counter "${suite}" "failed")"
|
|
|
|
|
|
|
|
|
|
if [ "${outcome}" = "ok" ]; then
|
|
|
|
|
ok_count=$((ok_count + 1))
|
|
|
|
|
else
|
|
|
|
|
failed_count=$((failed_count + 1))
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
cat <<METRICS | curl -fsS --data-binary @- "${PUSHGATEWAY_URL}/metrics/job/platform-quality-suite-probe/suite/${suite}" >/dev/null
|
|
|
|
|
# TYPE platform_quality_gate_runs_total counter
|
|
|
|
|
platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
|
|
|
|
|
platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
|
|
|
|
|
METRICS
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check_http_suite() {
|
|
|
|
|
suite="$1"
|
|
|
|
|
url="$2"
|
|
|
|
|
expected_code="$3"
|
|
|
|
|
body_match="${4:-}"
|
|
|
|
|
|
|
|
|
|
body_file="$(mktemp)"
|
|
|
|
|
code="$(curl -ksS -m "${HTTP_TIMEOUT_SECONDS}" -o "${body_file}" -w '%{http_code}' "${url}" || true)"
|
|
|
|
|
|
|
|
|
|
outcome="failed"
|
|
|
|
|
if [ "${code}" = "${expected_code}" ]; then
|
|
|
|
|
if [ -z "${body_match}" ] || grep -q -- "${body_match}" "${body_file}"; then
|
|
|
|
|
outcome="ok"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
rm -f "${body_file}"
|
|
|
|
|
push_suite_counters "${suite}" "${outcome}"
|
|
|
|
|
|
|
|
|
|
if [ "${outcome}" = "ok" ]; then
|
|
|
|
|
printf '[probe] suite=%s outcome=ok url=%s\n' "${suite}" "${url}"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
printf '[probe] suite=%s outcome=failed url=%s code=%s\n' "${suite}" "${url}" "${code}" >&2
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
failures=0
|
|
|
|
|
|
|
|
|
|
check_http_suite "atlasbot" "http://atlasbot.comms.svc.cluster.local:8090/health" "200" '"status": "ok"' || failures=$((failures + 1))
|
2026-04-18 16:34:20 -03:00
|
|
|
check_http_suite "pegasus" "http://pegasus.jellyfin.svc.cluster.local/healthz" "200" || failures=$((failures + 1))
|
2026-04-19 14:18:41 -03:00
|
|
|
check_http_suite "bstein_home" "http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local/api/healthz" "200" || failures=$((failures + 1))
|
2026-04-09 20:10:52 -03:00
|
|
|
|
|
|
|
|
if [ "${failures}" -gt 0 ]; then
|
|
|
|
|
printf '[probe] completed with %s suite failure(s)\n' "${failures}" >&2
|
|
|
|
|
else
|
|
|
|
|
printf '[probe] completed with all suites passing\n'
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Report failures through metrics, not Job failure retries.
|
|
|
|
|
exit 0
|