diff --git a/scripts/verify_jenkins_workspace_cleanup_rollout.sh b/scripts/verify_jenkins_workspace_cleanup_rollout.sh new file mode 100755 index 00000000..088a48ba --- /dev/null +++ b/scripts/verify_jenkins_workspace_cleanup_rollout.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail + +MODE="${1:-dry-run}" +if [[ "$MODE" != "dry-run" && "$MODE" != "active" ]]; then + echo "usage: $0 [dry-run|active]" >&2 + exit 2 +fi + +EXPECTED_DRY_RUN="true" +PROM_MODE="dry_run" +if [[ "$MODE" == "active" ]]; then + EXPECTED_DRY_RUN="false" + PROM_MODE="delete" +fi + +KUSTOMIZATION="${KUSTOMIZATION:-services-maintenance}" +NAMESPACE="${NAMESPACE:-maintenance}" +DEPLOYMENT="${DEPLOYMENT:-ariadne}" +LOCAL_METRICS_PORT="${LOCAL_METRICS_PORT:-18080}" + +for cmd in flux kubectl curl grep awk; do + if ! command -v "$cmd" >/dev/null 2>&1; then + echo "missing required command: $cmd" >&2 + exit 2 + fi +done + +echo "[1/5] reconcile Flux kustomization: ${KUSTOMIZATION}" +flux reconcile kustomization "$KUSTOMIZATION" --namespace flux-system --with-source + +echo "[2/5] wait for deployment rollout" +kubectl -n "$NAMESPACE" rollout status "deployment/$DEPLOYMENT" --timeout=5m + +echo "[3/5] verify ariadne env wiring" +ENV_DUMP="$(kubectl -n "$NAMESPACE" get deployment "$DEPLOYMENT" -o jsonpath='{range .spec.template.spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}')" +echo "$ENV_DUMP" | grep -F "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP=45 */6 * * *" +echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_NAMESPACE=jenkins" +echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_PVC_PREFIX=pvc-workspace-" +echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS=24" +echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_DRY_RUN=${EXPECTED_DRY_RUN}" +echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN=20" + +echo "[4/5] scrape /metrics and confirm cleanup metrics are exported" +PF_LOG="$(mktemp)" +METRICS_FILE="$(mktemp)" +cleanup() { + if [[ -n "${PF_PID:-}" ]]; then + kill "$PF_PID" >/dev/null 2>&1 || true + wait "$PF_PID" 2>/dev/null || true + fi + rm -f "$PF_LOG" "$METRICS_FILE" +} +trap cleanup EXIT + +kubectl -n "$NAMESPACE" port-forward "deployment/$DEPLOYMENT" "${LOCAL_METRICS_PORT}:8080" >"$PF_LOG" 2>&1 & +PF_PID=$! +sleep 2 +curl -fsS "http://127.0.0.1:${LOCAL_METRICS_PORT}/metrics" >"$METRICS_FILE" +grep -F "# HELP ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" +grep -F "# HELP ariadne_jenkins_workspace_cleanup_objects_total" "$METRICS_FILE" + +echo "[5/5] show recent cleanup signal" +if grep -q "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"; then + grep "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" | grep "mode=\"${PROM_MODE}\"" || true +else + echo "No run counter sample yet for mode=${PROM_MODE}; wait for schedule window and re-run." >&2 +fi + +echo "Recent cleanup logs (if any):" +kubectl -n "$NAMESPACE" logs "deployment/$DEPLOYMENT" --tail=500 | grep -i "jenkins workspace cleanup" | tail -n 20 || true + +echo "verification complete for mode=${MODE}"