#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_DIR="${ANANKE_REPO_DIR:-$(cd "${SCRIPT_DIR}/.." && pwd)}" BOOTSTRAP_DIR="${SCRIPT_DIR}/bootstrap" CONFIG_FILE="${BOOTSTRAP_DIR}/recovery-config.env" if [[ -f "${CONFIG_FILE}" ]]; then # shellcheck disable=SC1090 source "${CONFIG_FILE}" fi if [[ -z "${KUBECONFIG:-}" && -f "${SCRIPT_DIR}/kubeconfig" ]]; then export KUBECONFIG="${SCRIPT_DIR}/kubeconfig" fi usage() { cat < [options] Options: --execute Actually run commands (default is dry-run) --shutdown-mode Shutdown behavior: host-poweroff or cluster-only (default: ${SHUTDOWN_MODE:-host-poweroff}) --expected-flux-branch Expected Flux source branch during startup checks (default: ${DEFAULT_FLUX_BRANCH:-main}) --expected-flux-url Expected Flux source URL during startup checks --allow-flux-source-mutation Required to allow --force-flux-url during startup --force-flux-url Startup: patch flux-system GitRepository URL to this value --force-flux-branch Startup: patch flux-system GitRepository branch to this value --skip-etcd-snapshot Shutdown: skip etcd snapshot before shutdown --skip-drain Shutdown: skip worker drain during shutdown --skip-local-bootstrap Startup: skip local bootstrap fallback applies --skip-harbor-bootstrap Startup: skip Harbor recovery bootstrap stage --skip-harbor-seed Startup: skip bootstrap image seed/import stage --skip-helper-prewarm Prepare/Shutdown/Startup: skip node-helper prewarm --refresh-bootstrap-image-aliases Remove bootstrap image aliases before import, to clear poisoned registry pulls --min-startup-battery Minimum UPS percent required before bootstrap (default: 35) --ups-host UPS identifier for upsc (default: ups@localhost) --ups-battery-key UPS battery key for upsc (default: battery.charge) --recovery-state-file Recovery state file for outage-aware restart logic --replica-snapshot-file File used to persist workload replica snapshot across shutdown/startup --bootstrap-images-file Image list expected inside the bootstrap bundle --harbor-bundle-file Bootstrap bundle on the control host --longhorn-unlock-bundle-file Longhorn-only bundle for Harbor-deadlock recovery --longhorn-unlock-images-file Longhorn-only image list for Harbor-deadlock recovery --longhorn-manager-cache-bundle-file Single-image Longhorn manager cache repair archive --skip-longhorn-unlock-bundle-seed Longhorn unlock: skip full Longhorn bundle seed and run surgical repairs only --bootstrap-bundle-arch Node architecture expected by the bootstrap bundle (default: ${BOOTSTRAP_BUNDLE_ARCH:-arm64}) --harbor-target-node Node that should host Harbor during bootstrap (default: auto) --harbor-canary-node Node used for Harbor pull canary (default: auto) --harbor-host-label-key Node label key used to pin Harbor bootstrap workloads (default: ${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}) --harbor-canary-image Harbor-backed image used for pull canary (default: ${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0}) --node-helper-image Privileged helper image used for host operations (default: ${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/ananke-node-helper:0.1.0}) --bundle-http-port Temporary HTTP port used to serve bootstrap bundles (default: ${BUNDLE_HTTP_PORT:-8877}) --api-wait-timeout Startup: Kubernetes API wait timeout (default: 600) --drain-timeout Worker drain timeout for normal shutdown (default: 180) --emergency-drain-timeout Worker drain timeout for emergency fallback (default: 45) --flux-ready-timeout Startup: max time to wait for Flux kustomizations Ready (default: 1200) --startup-checklist-timeout Startup: max time to wait for external service checklist (default: 900) --startup-workload-timeout Startup: max time to wait for workload readiness checks (default: 900) --startup-stability-window Startup: continuous healthy window required before success (default: 180) --startup-stability-timeout Startup: max time allowed to achieve the healthy window (default: 900) --require-ups-battery Hard-fail startup if UPS battery cannot be read -h, --help Show help Examples: scripts/cluster_power_recovery.sh prepare --execute scripts/cluster_power_recovery.sh bootstrap-seed --execute scripts/cluster_power_recovery.sh harbor-seed --execute scripts/cluster_power_recovery.sh longhorn-unlock --execute scripts/cluster_power_recovery.sh flux-hold --execute scripts/cluster_power_recovery.sh status scripts/cluster_power_recovery.sh shutdown --execute scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main USAGE } MODE="${1:-}" if [[ -z "${MODE}" || "${MODE}" == "-h" || "${MODE}" == "--help" ]]; then usage exit 0 fi shift || true case "${MODE}" in prepare|status|bootstrap-seed|harbor-seed|longhorn-seed|longhorn-unlock|flux-hold|shutdown|startup) ;; *) echo "Unknown mode: ${MODE}" >&2 usage exit 1 ;; esac EXECUTE=0 SHUTDOWN_MODE="${SHUTDOWN_MODE:-host-poweroff}" EXPECTED_FLUX_BRANCH="${DEFAULT_FLUX_BRANCH:-main}" EXPECTED_FLUX_URL="${EXPECTED_FLUX_URL:-ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git}" ALLOW_FLUX_SOURCE_MUTATION=0 FORCE_FLUX_URL="" FORCE_FLUX_BRANCH="" SKIP_ETCD_SNAPSHOT=0 SKIP_DRAIN=0 SKIP_LOCAL_BOOTSTRAP=0 SKIP_HARBOR_BOOTSTRAP=0 SKIP_HARBOR_SEED=0 SKIP_HELPER_PREWARM=0 UPS_HOST="${UPS_HOST:-ups@localhost}" UPS_BATTERY_KEY="${UPS_BATTERY_KEY:-battery.charge}" MIN_STARTUP_BATTERY="${MIN_STARTUP_BATTERY:-35}" REQUIRE_UPS_BATTERY="${REQUIRE_UPS_BATTERY:-0}" DRAIN_TIMEOUT_SECONDS=180 EMERGENCY_DRAIN_TIMEOUT_SECONDS=45 API_WAIT_TIMEOUT_SECONDS=600 FLUX_READY_TIMEOUT_SECONDS="${FLUX_READY_TIMEOUT_SECONDS:-1200}" FLUX_READY_POLL_SECONDS="${FLUX_READY_POLL_SECONDS:-10}" STARTUP_CHECKLIST_TIMEOUT_SECONDS="${STARTUP_CHECKLIST_TIMEOUT_SECONDS:-900}" STARTUP_CHECKLIST_POLL_SECONDS="${STARTUP_CHECKLIST_POLL_SECONDS:-10}" STARTUP_WORKLOAD_TIMEOUT_SECONDS="${STARTUP_WORKLOAD_TIMEOUT_SECONDS:-900}" STARTUP_WORKLOAD_POLL_SECONDS="${STARTUP_WORKLOAD_POLL_SECONDS:-10}" STARTUP_STABILITY_WINDOW_SECONDS="${STARTUP_STABILITY_WINDOW_SECONDS:-180}" STARTUP_STABILITY_TIMEOUT_SECONDS="${STARTUP_STABILITY_TIMEOUT_SECONDS:-900}" STARTUP_STABILITY_POLL_SECONDS="${STARTUP_STABILITY_POLL_SECONDS:-10}" STARTUP_IGNORE_PODS_REGEX="${STARTUP_IGNORE_PODS_REGEX:-}" STARTUP_IGNORE_WORKLOADS_REGEX="${STARTUP_IGNORE_WORKLOADS_REGEX:-}" STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system)$}" STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}" RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS="${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS:-ai-llm,bstein-dev-home-migrations,descheduler,finance,game-stream,gitops-ui,health,jellyfin,jenkins,longhorn-ui,mailu,nextcloud,nextcloud-mail-sync,outline,planka,quality,resource-guardrails,typhon,vaultwarden,veles,wallet-monero-temp,xmr-miner}" RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS="${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS:-core,helm,cert-manager,longhorn-adopt,longhorn,metallb,traefik,vault-csi,vault-injector,vault,postgres,harbor,gitea,keycloak,oauth2-proxy,openldap,openclaw,monitoring,bstein-dev-home,comms,crypto,logging,maintenance,monerod,sui-metrics}" RECOVERY_FLUX_CRITICAL_HELMRELEASES="${RECOVERY_FLUX_CRITICAL_HELMRELEASES:-cert-manager/cert-manager,comms/othrys-element,comms/othrys-synapse,harbor/harbor,kube-system/secrets-store-csi-driver,logging/data-prepper,logging/fluent-bit,logging/opensearch,logging/opensearch-dashboards,logging/otel-collector,longhorn-system/longhorn,metallb-system/metallb,monitoring/alertmanager,monitoring/grafana,monitoring/kube-state-metrics,monitoring/node-exporter,monitoring/victoria-metrics-single,vault/vault-injector}" RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE="${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE:-${HOME}/${STATE_SUBDIR:-.local/share/ananke}/longhorn_unlock_optional_flux.tsv}" RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER="${RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER:-1}" RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION:-1}" RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION:-0}" RECOVERY_FLUX_ROOT_APPLY_TIMEOUT="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT:-15m}" RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS="${RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS:-6}" RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS="${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS:-10}" RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER="${RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER:-0}" RECOVERY_FLUX_PATCH_VERIFY_ATTEMPTS="${RECOVERY_FLUX_PATCH_VERIFY_ATTEMPTS:-3}" RECOVERY_FLUX_PATCH_VERIFY_SLEEP_SECONDS="${RECOVERY_FLUX_PATCH_VERIFY_SLEEP_SECONDS:-1}" RECOVERY_FLUX_FINAL_STABILITY_SECONDS="${RECOVERY_FLUX_FINAL_STABILITY_SECONDS:-45}" RECOVERY_FLUX_FINAL_STABILITY_POLL_SECONDS="${RECOVERY_FLUX_FINAL_STABILITY_POLL_SECONDS:-5}" RECOVERY_FLUX_FINAL_STABILITY_TIMEOUT_SECONDS="${RECOVERY_FLUX_FINAL_STABILITY_TIMEOUT_SECONDS:-300}" RECOVERY_FLUX_FINAL_STOP_AUX_CONTROLLERS="${RECOVERY_FLUX_FINAL_STOP_AUX_CONTROLLERS:-1}" RECOVERY_FLUX_AUX_CONTROLLERS="${RECOVERY_FLUX_AUX_CONTROLLERS:-image-automation-controller,image-reflector-controller,notification-controller}" RECOVERY_KUBECTL_FIELD_MANAGER="${RECOVERY_KUBECTL_FIELD_MANAGER:-ananke-recovery-hold}" STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS:-10}" STARTUP_SERVICE_CHECKLIST="${STARTUP_SERVICE_CHECKLIST:-}" STARTUP_INCLUDE_INGRESS_CHECKS="${STARTUP_INCLUDE_INGRESS_CHECKS:-1}" STARTUP_INGRESS_ALLOWED_STATUSES="${STARTUP_INGRESS_ALLOWED_STATUSES:-200,301,302,307,308,401,403,404}" STARTUP_IGNORE_INGRESS_HOSTS_REGEX="${STARTUP_IGNORE_INGRESS_HOSTS_REGEX:-}" STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS:-10}" SHUTDOWN_NAMESPACE_EXCLUDES_REGEX="${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$}" REQUIRE_NONEMPTY_REPLICA_SNAPSHOT="${REQUIRE_NONEMPTY_REPLICA_SNAPSHOT:-1}" STARTUP_REQUIRE_MAIL_SAFEGUARDS="${STARTUP_REQUIRE_MAIL_SAFEGUARDS:-1}" MAIL_STARTUP_NAMESPACE="${MAIL_STARTUP_NAMESPACE:-mailu-mailserver}" MAIL_STARTUP_ENDPOINT_SERVICES="${MAIL_STARTUP_ENDPOINT_SERVICES:-mailu-front,mailu-postfix,mailu-dovecot}" MAIL_STARTUP_HOST="${MAIL_STARTUP_HOST:-mail.bstein.dev}" MAIL_STARTUP_TCP_PORTS="${MAIL_STARTUP_TCP_PORTS:-25,465,587,993,995}" MAIL_STARTUP_TCP_TIMEOUT_SECONDS="${MAIL_STARTUP_TCP_TIMEOUT_SECONDS:-3}" BUNDLE_HTTP_PORT="${BUNDLE_HTTP_PORT:-8877}" STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/ananke}" RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state" REPLICA_SNAPSHOT_FILE="${STATE_ROOT}/desired_workload_replicas.tsv" HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}" BOOTSTRAP_IMAGES_FILE="${BOOTSTRAP_IMAGES_FILE:-${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt}" LONGHORN_UNLOCK_IMAGES_FILE="${LONGHORN_UNLOCK_IMAGES_FILE:-${BOOTSTRAP_DIR}/longhorn-unlock-images.txt}" LONGHORN_UNLOCK_BUNDLE_FILE="${LONGHORN_UNLOCK_BUNDLE_FILE:-${STATE_ROOT}/bundles/longhorn-unlock-v1.8.2-${BOOTSTRAP_BUNDLE_ARCH:-arm64}.tar.zst}" LONGHORN_MANAGER_IMAGE="${LONGHORN_MANAGER_IMAGE:-registry.bstein.dev/infra/longhorn-manager:v1.8.2}" LONGHORN_MANAGER_CACHE_BUNDLE_FILE="${LONGHORN_MANAGER_CACHE_BUNDLE_FILE:-${STATE_ROOT}/bundles/longhorn-manager-v1.8.2-${BOOTSTRAP_BUNDLE_ARCH:-arm64}.tar}" LONGHORN_UNLOCK_SSH_KNOWN_HOSTS="${LONGHORN_UNLOCK_SSH_KNOWN_HOSTS:-/tmp/ananke_longhorn_unlock_known_hosts}" BOOTSTRAP_BUNDLE_ARCH="${BOOTSTRAP_BUNDLE_ARCH:-arm64}" RECOVERY_UNCORDON_DENYLIST="${RECOVERY_UNCORDON_DENYLIST:-titan-18,titan-22,titan-24}" STALE_TERMINATING_POD_SECONDS="${STALE_TERMINATING_POD_SECONDS:-300}" RECOVERY_NODE_RUNTIME_RESTART_ENABLED="${RECOVERY_NODE_RUNTIME_RESTART_ENABLED:-1}" RECOVERY_NODE_RUNTIME_RESTART_DENYLIST="${RECOVERY_NODE_RUNTIME_RESTART_DENYLIST:-${RECOVERY_UNCORDON_DENYLIST}}" RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES="${RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES:-3}" RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS="${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS:-300}" HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-}" HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-}" HARBOR_HOST_LABEL_KEY="${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}" HARBOR_CANARY_IMAGE="${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0}" NODE_HELPER_IMAGE="${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/ananke-node-helper:0.1.0}" NODE_HELPER_NAMESPACE="${NODE_HELPER_NAMESPACE:-maintenance}" NODE_HELPER_SERVICE_ACCOUNT="${NODE_HELPER_SERVICE_ACCOUNT:-default}" NODE_HELPER_PREWARM_DS="${NODE_HELPER_PREWARM_DS:-ananke-node-helper-prewarm}" REGISTRY_PULL_SECRET="${REGISTRY_PULL_SECRET:-harbor-regcred}" REFRESH_BOOTSTRAP_IMAGE_ALIASES="${REFRESH_BOOTSTRAP_IMAGE_ALIASES:-0}" SKIP_LONGHORN_UNLOCK_BUNDLE_SEED="${SKIP_LONGHORN_UNLOCK_BUNDLE_SEED:-0}" LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE="${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE:-${STATE_ROOT}/longhorn_unlock_optional_replicas.tsv}" KEEP_PREWARM_DAEMONSET=0 BOOTSTRAP_IMAGES_SEEDED=0 RECOVERY_PENDING=0 STARTUP_ATTEMPTED_DURING_OUTAGE=0 LAST_CHECKPOINT="none" BUNDLE_SERVER_PID="" UPS_HOST_IN_USE="" while [[ $# -gt 0 ]]; do case "$1" in --execute) EXECUTE=1 shift ;; --shutdown-mode) SHUTDOWN_MODE="${2:?missing shutdown mode}" shift 2 ;; --expected-flux-branch) EXPECTED_FLUX_BRANCH="${2:?missing branch}" shift 2 ;; --expected-flux-url) EXPECTED_FLUX_URL="${2:?missing flux url}" shift 2 ;; --allow-flux-source-mutation) ALLOW_FLUX_SOURCE_MUTATION=1 shift ;; --force-flux-url) FORCE_FLUX_URL="${2:?missing flux url}" shift 2 ;; --force-flux-branch) FORCE_FLUX_BRANCH="${2:?missing branch}" shift 2 ;; --skip-etcd-snapshot) SKIP_ETCD_SNAPSHOT=1 shift ;; --skip-drain) SKIP_DRAIN=1 shift ;; --skip-local-bootstrap) SKIP_LOCAL_BOOTSTRAP=1 shift ;; --skip-harbor-bootstrap) SKIP_HARBOR_BOOTSTRAP=1 shift ;; --skip-harbor-seed) SKIP_HARBOR_SEED=1 shift ;; --skip-helper-prewarm) SKIP_HELPER_PREWARM=1 shift ;; --refresh-bootstrap-image-aliases) REFRESH_BOOTSTRAP_IMAGE_ALIASES=1 shift ;; --ups-host) UPS_HOST="${2:?missing ups host}" shift 2 ;; --ups-battery-key) UPS_BATTERY_KEY="${2:?missing ups key}" shift 2 ;; --min-startup-battery) MIN_STARTUP_BATTERY="${2:?missing battery threshold}" shift 2 ;; --require-ups-battery) REQUIRE_UPS_BATTERY=1 shift ;; --recovery-state-file) RECOVERY_STATE_FILE="${2:?missing state file path}" shift 2 ;; --replica-snapshot-file) REPLICA_SNAPSHOT_FILE="${2:?missing replica snapshot file path}" shift 2 ;; --harbor-bundle-file) HARBOR_BUNDLE_FILE="${2:?missing bundle file path}" shift 2 ;; --longhorn-unlock-bundle-file) LONGHORN_UNLOCK_BUNDLE_FILE="${2:?missing Longhorn unlock bundle file path}" shift 2 ;; --bootstrap-images-file) BOOTSTRAP_IMAGES_FILE="${2:?missing bootstrap image list path}" shift 2 ;; --longhorn-unlock-images-file) LONGHORN_UNLOCK_IMAGES_FILE="${2:?missing Longhorn unlock image list path}" shift 2 ;; --longhorn-manager-cache-bundle-file) LONGHORN_MANAGER_CACHE_BUNDLE_FILE="${2:?missing Longhorn manager cache bundle file path}" shift 2 ;; --skip-longhorn-unlock-bundle-seed) SKIP_LONGHORN_UNLOCK_BUNDLE_SEED=1 shift ;; --bootstrap-bundle-arch) BOOTSTRAP_BUNDLE_ARCH="${2:?missing bootstrap bundle architecture}" shift 2 ;; --harbor-target-node) HARBOR_TARGET_NODE="${2:?missing harbor target node}" shift 2 ;; --harbor-canary-node) HARBOR_CANARY_NODE="${2:?missing harbor canary node}" shift 2 ;; --harbor-host-label-key) HARBOR_HOST_LABEL_KEY="${2:?missing harbor host label key}" shift 2 ;; --harbor-canary-image) HARBOR_CANARY_IMAGE="${2:?missing canary image}" shift 2 ;; --node-helper-image) NODE_HELPER_IMAGE="${2:?missing node helper image}" shift 2 ;; --bundle-http-port) BUNDLE_HTTP_PORT="${2:?missing bundle http port}" shift 2 ;; --api-wait-timeout) API_WAIT_TIMEOUT_SECONDS="${2:?missing api wait timeout}" shift 2 ;; --flux-ready-timeout) FLUX_READY_TIMEOUT_SECONDS="${2:?missing flux ready timeout}" shift 2 ;; --startup-checklist-timeout) STARTUP_CHECKLIST_TIMEOUT_SECONDS="${2:?missing startup checklist timeout}" shift 2 ;; --startup-workload-timeout) STARTUP_WORKLOAD_TIMEOUT_SECONDS="${2:?missing startup workload timeout}" shift 2 ;; --startup-stability-window) STARTUP_STABILITY_WINDOW_SECONDS="${2:?missing startup stability window}" shift 2 ;; --startup-stability-timeout) STARTUP_STABILITY_TIMEOUT_SECONDS="${2:?missing startup stability timeout}" shift 2 ;; --drain-timeout) DRAIN_TIMEOUT_SECONDS="${2:?missing drain timeout}" shift 2 ;; --emergency-drain-timeout) EMERGENCY_DRAIN_TIMEOUT_SECONDS="${2:?missing emergency drain timeout}" shift 2 ;; -h|--help) usage exit 0 ;; *) echo "Unknown option: $1" >&2 usage exit 1 ;; esac done case "${SHUTDOWN_MODE}" in host-poweroff|cluster-only) ;; *) echo "Invalid --shutdown-mode '${SHUTDOWN_MODE}'. Expected host-poweroff or cluster-only." >&2 exit 1 ;; esac if [[ -n "${FORCE_FLUX_URL}" && "${ALLOW_FLUX_SOURCE_MUTATION}" -ne 1 ]]; then echo "--force-flux-url requires --allow-flux-source-mutation (breakglass)." >&2 exit 1 fi require_cmd() { local cmd="$1" if ! command -v "${cmd}" >/dev/null 2>&1; then echo "Missing required command: ${cmd}" >&2 exit 1 fi } require_cmd kubectl require_cmd bash require_cmd base64 require_cmd curl log() { echo "[cluster-power] $*"; } warn() { echo "[cluster-power][warn] $*" >&2; } die() { echo "[cluster-power][error] $*" >&2; exit 1; } run() { if [[ "${EXECUTE}" -eq 1 ]]; then log "EXEC: $*" "$@" else log "DRY-RUN: $*" fi } run_shell() { if [[ "${EXECUTE}" -eq 1 ]]; then log "EXEC: $*" bash -lc "$*" else log "DRY-RUN: $*" fi } apply_kustomization() { local path="$1" local full_path="${REPO_DIR}/${path}" if [[ "${EXECUTE}" -eq 1 ]]; then log "EXEC: kubectl kustomize ${full_path} --load-restrictor=LoadRestrictionsNone | kubectl apply -f -" kubectl kustomize "${full_path}" --load-restrictor=LoadRestrictionsNone | kubectl apply -f - else log "DRY-RUN: kubectl kustomize ${full_path} --load-restrictor=LoadRestrictionsNone | kubectl apply -f -" fi } sanitize_name() { printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9-' '-' } state_dir() { dirname "${RECOVERY_STATE_FILE}" } load_recovery_state() { RECOVERY_PENDING=0 STARTUP_ATTEMPTED_DURING_OUTAGE=0 LAST_CHECKPOINT="none" [[ -f "${RECOVERY_STATE_FILE}" ]] || return 0 while IFS='=' read -r key value; do case "${key}" in recovery_pending) RECOVERY_PENDING="${value}" ;; startup_attempted) STARTUP_ATTEMPTED_DURING_OUTAGE="${value}" ;; last_checkpoint) LAST_CHECKPOINT="${value}" ;; esac done < "${RECOVERY_STATE_FILE}" } save_recovery_state() { [[ "${EXECUTE}" -eq 1 ]] || return 0 mkdir -p "$(state_dir)" cat > "${RECOVERY_STATE_FILE}" </dev/null || true LAST_CHECKPOINT="none" } sanitize_battery_percent() { local raw="$1" raw="${raw##*:}" raw="${raw//[[:space:]]/}" raw="${raw%%.*}" [[ "${raw}" =~ ^[0-9]+$ ]] || return 1 printf '%s' "${raw}" } candidate_ups_hosts() { local candidate name local -A seen=() if [[ -n "${UPS_HOST}" ]]; then seen["${UPS_HOST}"]=1 echo "${UPS_HOST}" fi while IFS= read -r name; do [[ -n "${name}" ]] || continue for candidate in "${name}@localhost" "${name}"; do [[ -n "${seen[${candidate}]+x}" ]] && continue seen["${candidate}"]=1 echo "${candidate}" done done < <(upsc -l 2>/dev/null || true) } read_ups_battery() { if ! command -v upsc >/dev/null 2>&1; then return 1 fi local host raw parsed while IFS= read -r host; do raw="$(upsc "${host}" "${UPS_BATTERY_KEY}" 2>/dev/null || true)" [[ -n "${raw}" ]] || continue parsed="$(sanitize_battery_percent "${raw}" || true)" [[ -n "${parsed}" ]] || continue UPS_HOST_IN_USE="${host}" printf '%s' "${parsed}" return 0 done < <(candidate_ups_hosts) return 1 } ensure_minimum_battery_for_bootstrap() { local battery battery="$(read_ups_battery || true)" if [[ -z "${battery}" ]]; then if [[ "${REQUIRE_UPS_BATTERY}" -eq 1 ]]; then warn "Unable to read UPS battery status and --require-ups-battery is set." return 1 fi warn "Unable to read UPS battery status; continuing without hard battery gating." return 0 fi log "ups-battery=${battery}% host=${UPS_HOST_IN_USE:-${UPS_HOST}}" if (( battery < MIN_STARTUP_BATTERY )); then warn "UPS battery ${battery}% below minimum startup threshold ${MIN_STARTUP_BATTERY}%." return 1 fi return 0 } report_flux_source_state() { local flux_url flux_branch flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)" flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)" [[ -n "${flux_url}" ]] && log "flux-source-url=${flux_url}" if [[ -n "${flux_branch}" ]]; then log "flux-source-branch=${flux_branch}" fi } csv_has_value() { local csv="$1" local value="$2" local needle=",${value}," local haystack=",${csv}," [[ "${haystack}" == *"${needle}"* ]] } assert_flux_source_expected() { if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: skipping strict Flux source drift guard" return 0 fi local flux_url flux_branch flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)" flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)" [[ -n "${flux_url}" ]] || die "Unable to read Flux source URL from flux-system/gitrepository." [[ -n "${flux_branch}" ]] || die "Unable to read Flux source branch from flux-system/gitrepository." if [[ -n "${EXPECTED_FLUX_URL}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then die "Flux source URL drift detected: got '${flux_url}', expected '${EXPECTED_FLUX_URL}'. Refusing startup." fi if [[ -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then die "Flux source branch drift detected: got '${flux_branch}', expected '${EXPECTED_FLUX_BRANCH}'. Use --force-flux-branch to correct." fi } kustomization_is_optional() { local name="$1" [[ -n "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" ]] || return 1 csv_has_value "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" "${name}" } list_not_ready_kustomizations() { local rows line name ready message rows="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io \ -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status,MESSAGE:.status.conditions[?(@.type=="Ready")].message' \ --no-headers 2>/dev/null || true)" [[ -n "${rows}" ]] || return 0 while IFS= read -r line; do [[ -n "${line}" ]] || continue name="$(awk '{print $1}' <<< "${line}")" ready="$(awk '{print $2}' <<< "${line}")" message="${line#${name} }" message="${message#${ready} }" if kustomization_is_optional "${name}"; then continue fi if [[ "${ready}" != "True" ]]; then printf '%s|%s\n' "${name}" "${message}" fi done <<< "${rows}" } trigger_flux_reconcile_all() { local now now="$(date --iso-8601=seconds)" run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="${now}" --overwrite if command -v flux >/dev/null 2>&1; then run flux reconcile source git flux-system -n flux-system --timeout=3m fi } heal_failed_flux_jobs() { local rows line ns name failed flux_owner helm_owner healed healed=0 rows="$(kubectl get jobs.batch -A \ -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,FAILED:.status.failed,FLUX_OWNER:.metadata.labels.kustomize\\.toolkit\\.fluxcd\\.io/name,HELM_OWNER:.metadata.labels.helm\\.toolkit\\.fluxcd\\.io/name \ --no-headers 2>/dev/null || true)" [[ -n "${rows}" ]] || return 1 while IFS= read -r line; do [[ -n "${line}" ]] || continue ns="$(awk '{print $1}' <<< "${line}")" name="$(awk '{print $2}' <<< "${line}")" failed="$(awk '{print $3}' <<< "${line}")" flux_owner="$(awk '{print $4}' <<< "${line}")" helm_owner="$(awk '{print $5}' <<< "${line}")" [[ "${failed}" != "" ]] || continue [[ "${failed}" =~ ^[0-9]+$ ]] || continue (( failed > 0 )) || continue if [[ "${flux_owner}" == "" && "${helm_owner}" == "" ]]; then continue fi warn "Deleting failed Flux-managed Job ${ns}/${name} to heal immutable-template drift." run kubectl -n "${ns}" delete job "${name}" --ignore-not-found healed=1 done <<< "${rows}" (( healed == 1 )) } wait_for_flux_kustomizations_ready() { if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: skipping wait for all Flux kustomizations Ready" return 0 fi local start now not_ready immutable_hits start="$(date +%s)" immutable_hits=0 while true; do not_ready="$(list_not_ready_kustomizations || true)" if [[ -z "${not_ready}" ]]; then log "flux-kustomizations=all-ready" return 0 fi log "flux-kustomizations-not-ready:" while IFS= read -r line; do [[ -n "${line}" ]] || continue log " ${line}" done <<< "${not_ready}" if grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${not_ready}"; then if (( immutable_hits < 3 )); then immutable_hits=$(( immutable_hits + 1 )) warn "Detected immutable Job failure signal in Flux status. Attempting automated Job cleanup (${immutable_hits}/3)." if heal_failed_flux_jobs; then trigger_flux_reconcile_all fi fi fi now="$(date +%s)" if (( now - start >= FLUX_READY_TIMEOUT_SECONDS )); then die "Timed out waiting for Flux kustomizations Ready after ${FLUX_READY_TIMEOUT_SECONDS}s." fi sleep "${FLUX_READY_POLL_SECONDS}" done } default_startup_service_checklist() { cat <<'CHECKS' gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"|| grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"|| harbor|https://registry.bstein.dev/v2/|401|unauthorized|/dev/null \ | sed '/^[[:space:]]*$/d' \ | sort -u } generated_ingress_service_checks() { local host while IFS= read -r host; do [[ -n "${host}" ]] || continue if [[ -n "${STARTUP_IGNORE_INGRESS_HOSTS_REGEX}" ]] && [[ "${host}" =~ ${STARTUP_IGNORE_INGRESS_HOSTS_REGEX} ]]; then continue fi printf 'ingress-%s|https://%s/|%s|||0|%s\n' "${host}" "${host}" "${STARTUP_INGRESS_ALLOWED_STATUSES}" "${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS}" done < <(list_ingress_hosts) } startup_service_checklist_rows() { local base if [[ -n "${STARTUP_SERVICE_CHECKLIST}" ]]; then base="$(printf '%s' "${STARTUP_SERVICE_CHECKLIST}" | tr ';' '\n')" else base="$(default_startup_service_checklist)" fi printf '%s\n' "${base}" | sed '/^[[:space:]]*$/d' if [[ "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "1" || "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "true" ]]; then generated_ingress_service_checks fi } service_status_allowed() { local expected_csv="$1" local got="$2" local token IFS=',' read -r -a _statuses <<< "${expected_csv}" for token in "${_statuses[@]}"; do if [[ "${token}" == "${got}" ]]; then return 0 fi done return 1 } check_mail_safeguards_once() { local quiet="${1:-0}" local failures=0 namespace service host port ips local -a services=() ports=() if [[ "${STARTUP_REQUIRE_MAIL_SAFEGUARDS}" != "1" && "${STARTUP_REQUIRE_MAIL_SAFEGUARDS}" != "true" ]]; then return 0 fi namespace="${MAIL_STARTUP_NAMESPACE}" as_array_from_csv "${MAIL_STARTUP_ENDPOINT_SERVICES}" services for service in "${services[@]}"; do service="${service//[[:space:]]/}" [[ -n "${service}" ]] || continue ips="$(kubectl -n "${namespace}" get endpoints "${service}" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" if [[ -z "${ips//[[:space:]]/}" ]]; then if [[ "${quiet}" != "1" ]]; then warn "startup-check mail-endpoints ${namespace}/${service}: no ready endpoints." fi failures=1 fi done host="${MAIL_STARTUP_HOST}" if [[ -n "${host}" ]]; then as_array_from_csv "${MAIL_STARTUP_TCP_PORTS}" ports for port in "${ports[@]}"; do port="${port//[[:space:]]/}" [[ "${port}" =~ ^[0-9]+$ ]] || continue if ! timeout "${MAIL_STARTUP_TCP_TIMEOUT_SECONDS}" bash -lc "/dev/null 2>&1; then if [[ "${quiet}" != "1" ]]; then warn "startup-check mail-tcp ${host}:${port}: connect failed." fi failures=1 fi done fi (( failures == 0 )) } check_startup_service_checklist_once() { local rows row name url expected body_must body_must_not insecure timeout code rc local body_file failures failures=0 rows="$(startup_service_checklist_rows)" while IFS= read -r row; do [[ -n "${row}" ]] || continue IFS='|' read -r name url expected body_must body_must_not insecure timeout <<< "${row}" [[ -n "${name}" && -n "${url}" && -n "${expected}" ]] || continue [[ -n "${insecure}" ]] || insecure=0 [[ -n "${timeout}" ]] || timeout="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}" body_file="$(mktemp)" rc=0 if [[ "${insecure}" == "1" || "${insecure}" == "true" ]]; then code="$(curl -ksS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)" else code="$(curl -sS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)" fi if (( rc != 0 )); then warn "startup-check ${name}: request failed (rc=${rc}) url=${url}" failures=1 rm -f "${body_file}" continue fi if ! service_status_allowed "${expected}" "${code}"; then warn "startup-check ${name}: expected status ${expected}, got ${code} url=${url}" failures=1 rm -f "${body_file}" continue fi if [[ -n "${body_must}" ]] && ! grep -Fq -- "${body_must}" "${body_file}"; then warn "startup-check ${name}: missing required body fragment '${body_must}'" failures=1 rm -f "${body_file}" continue fi if [[ -n "${body_must_not}" ]] && grep -Fq -- "${body_must_not}" "${body_file}"; then warn "startup-check ${name}: forbidden body fragment '${body_must_not}' present" failures=1 rm -f "${body_file}" continue fi rm -f "${body_file}" done <<< "${rows}" if ! check_mail_safeguards_once; then failures=1 fi (( failures == 0 )) } wait_for_startup_service_checklist() { if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: skipping startup external service checklist wait" return 0 fi local start now checklist_ok workloads_ok start="$(date +%s)" while true; do checklist_ok=0 workloads_ok=0 if check_startup_service_checklist_once; then checklist_ok=1 fi if list_unhealthy_workloads | sed '/^[[:space:]]*$/d' | grep -q .; then workloads_ok=0 else workloads_ok=1 fi if (( checklist_ok == 1 && workloads_ok == 1 )); then log "startup-checklist=all-passed" return 0 fi if (( workloads_ok == 0 )); then warn "startup-checklist: workloads are not fully ready yet." fi now="$(date +%s)" if (( now - start >= STARTUP_CHECKLIST_TIMEOUT_SECONDS )); then die "Timed out waiting for startup external checklist after ${STARTUP_CHECKLIST_TIMEOUT_SECONDS}s." fi sleep "${STARTUP_CHECKLIST_POLL_SECONDS}" done } collect_unstable_pods() { local rows rows="$(kubectl get pods -A --no-headers 2>/dev/null \ | awk '$4 ~ /(CrashLoopBackOff|ImagePullBackOff|ErrImagePull|CreateContainerConfigError|RunContainerError|InvalidImageName)/ {print $1 "/" $2 "|" $4}' || true)" if [[ -n "${STARTUP_IGNORE_PODS_REGEX}" ]]; then rows="$(printf '%s\n' "${rows}" | grep -Ev "${STARTUP_IGNORE_PODS_REGEX}" || true)" fi printf '%s' "${rows}" } wait_for_startup_stability_window() { if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: skipping startup stability window" return 0 fi local hard_deadline stable_since now unstable pods not_ready unhealthy_workloads stable_since="$(date +%s)" hard_deadline=$(( stable_since + STARTUP_STABILITY_TIMEOUT_SECONDS )) while true; do unstable=0 not_ready="$(list_not_ready_kustomizations || true)" if [[ -n "${not_ready}" ]]; then unstable=1 warn "stability-window: Flux kustomizations not ready." fi pods="$(collect_unstable_pods || true)" if [[ -n "${pods}" ]]; then unstable=1 warn "stability-window: unstable pods detected." while IFS= read -r line; do [[ -n "${line}" ]] || continue warn " ${line}" done <<< "${pods}" fi if ! check_startup_service_checklist_once; then unstable=1 warn "stability-window: external service checklist failed." fi unhealthy_workloads="$(list_unhealthy_workloads || true)" if [[ -n "${unhealthy_workloads}" ]]; then unstable=1 warn "stability-window: workloads not fully ready." while IFS= read -r line; do [[ -n "${line}" ]] || continue warn " ${line}" done <<< "${unhealthy_workloads}" fi now="$(date +%s)" if (( unstable == 0 )); then if (( now - stable_since >= STARTUP_STABILITY_WINDOW_SECONDS )); then log "startup-stability-window=passed (${STARTUP_STABILITY_WINDOW_SECONDS}s)" return 0 fi else stable_since="${now}" fi if (( now >= hard_deadline )); then die "Timed out waiting for startup stability window (${STARTUP_STABILITY_WINDOW_SECONDS}s healthy) within ${STARTUP_STABILITY_TIMEOUT_SECONDS}s." fi sleep "${STARTUP_STABILITY_POLL_SECONDS}" done } wait_for_api() { local attempts=$(( API_WAIT_TIMEOUT_SECONDS / 5 )) if (( attempts < 1 )); then attempts=1 fi if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: skipping live Kubernetes API wait" return 0 fi local i for i in $(seq 1 "${attempts}"); do if kubectl version --request-timeout=5s >/dev/null 2>&1; then return 0 fi sleep 5 done return 1 } patch_flux_suspend_all() { local value="$1" local patch patch=$(printf '{"spec":{"suspend":%s}}' "${value}") local ks_list hr_list ks_list="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' || true)" hr_list="$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"/"}{.metadata.name}{"\n"}{end}' || true)" while IFS= read -r k; do [[ -z "${k}" ]] && continue patch_kustomization_suspend "${k}" "${value}" done <<< "${ks_list}" while IFS= read -r hr; do [[ -z "${hr}" ]] && continue local ns="${hr%%/*}" local name="${hr##*/}" run kubectl -n "${ns}" patch helmrelease "${name}" --field-manager="${RECOVERY_KUBECTL_FIELD_MANAGER}" --type=merge -p "${patch}" done <<< "${hr_list}" } apply_kustomization_suspend_field() { local name="$1" local value="$2" if [[ "${EXECUTE}" -eq 1 ]]; then log "EXEC: kubectl apply --server-side --force-conflicts --field-manager=${RECOVERY_KUBECTL_FIELD_MANAGER} kustomization/${name} suspend=${value}" printf 'apiVersion: kustomize.toolkit.fluxcd.io/v1\nkind: Kustomization\nmetadata:\n name: %s\n namespace: flux-system\nspec:\n suspend: %s\n' "${name}" "${value}" \ | kubectl apply --server-side --force-conflicts --field-manager="${RECOVERY_KUBECTL_FIELD_MANAGER}" -f - else log "DRY-RUN: kubectl apply --server-side --force-conflicts --field-manager=${RECOVERY_KUBECTL_FIELD_MANAGER} kustomization/${name} suspend=${value}" fi } patch_kustomization_suspend() { local name="$1" local value="$2" if kubectl -n flux-system get kustomization "${name}" >/dev/null 2>&1; then apply_kustomization_suspend_field "${name}" "${value}" if [[ "${EXECUTE}" -eq 1 && "${value}" == "true" ]]; then local attempt observed for attempt in $(seq 1 "${RECOVERY_FLUX_PATCH_VERIFY_ATTEMPTS}"); do observed="$(kubectl -n flux-system get kustomization "${name}" -o jsonpath='{.spec.suspend}' 2>/dev/null || true)" [[ "${observed}" == "true" ]] && return 0 sleep "${RECOVERY_FLUX_PATCH_VERIFY_SLEEP_SECONDS}" done warn "Flux Kustomization ${name} suspend=true did not verify after patch; observed=${observed:-missing}." fi else warn "Flux Kustomization ${name} not found; skipping suspend=${value}." fi } csv_each() { local csv="$1" local item IFS=',' read -r -a _csv_items <<< "${csv}" for item in "${_csv_items[@]}"; do item="${item//[[:space:]]/}" [[ -n "${item}" ]] || continue printf '%s\n' "${item}" done } save_recovery_optional_flux_snapshot() { [[ "${EXECUTE}" -eq 1 ]] || return 0 mkdir -p "$(dirname "${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE}")" : > "${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE}" local name suspend while IFS= read -r name; do if ! kubectl -n flux-system get kustomization "${name}" >/dev/null 2>&1; then continue fi suspend="$(kubectl -n flux-system get kustomization "${name}" -o jsonpath='{.spec.suspend}' 2>/dev/null || true)" [[ -n "${suspend}" ]] || suspend="false" printf '%s\t%s\n' "${name}" "${suspend}" >> "${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE}" done < <(csv_each "${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS}") log "recovery-flux-optional-snapshot=${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE}" } patch_recovery_optional_flux_suspend() { local value="$1" local name if [[ "${value}" == "true" ]]; then save_recovery_optional_flux_snapshot fi while IFS= read -r name; do patch_kustomization_suspend "${name}" "${value}" done < <(csv_each "${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS}") } patch_recovery_optional_flux_suspend_without_snapshot() { local value="$1" local name while IFS= read -r name; do patch_kustomization_suspend "${name}" "${value}" done < <(csv_each "${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS}") } patch_recovery_critical_flux_suspend() { local value="$1" local name while IFS= read -r name; do patch_kustomization_suspend "${name}" "${value}" done < <(csv_each "${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS}") } patch_recovery_critical_helmrelease_suspend() { local value="$1" local ref namespace name while IFS= read -r ref; do namespace="${ref%%/*}" name="${ref##*/}" [[ -n "${namespace}" && -n "${name}" && "${namespace}" != "${name}" ]] || continue patch_helmrelease_suspend "${namespace}" "${name}" "${value}" done < <(csv_each "${RECOVERY_FLUX_CRITICAL_HELMRELEASES}") } recovery_flux_unsuspended_list() { local names=() local name suspend if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then names+=("flux-system") fi while IFS= read -r name; do names+=("${name}") done < <(csv_each "${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS}") for name in "${names[@]}"; do if ! kubectl -n flux-system get kustomization "${name}" >/dev/null 2>&1; then continue fi suspend="$(kubectl -n flux-system get kustomization "${name}" -o jsonpath='{.spec.suspend}' 2>/dev/null || true)" if [[ "${suspend}" != "true" ]]; then printf '%s\n' "${name}" fi done } reassert_recovery_flux_suspend_hold() { if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then patch_kustomization_suspend flux-system true fi patch_recovery_optional_flux_suspend_without_snapshot true } verify_recovery_flux_suspend_stable_window() { [[ "${EXECUTE}" -eq 1 ]] || return 0 local deadline stable_since stable_for unsuspended deadline=$((SECONDS + RECOVERY_FLUX_FINAL_STABILITY_TIMEOUT_SECONDS)) stable_since=0 reassert_recovery_flux_suspend_hold while (( SECONDS < deadline )); do unsuspended="$(recovery_flux_unsuspended_list | paste -sd, -)" if [[ -n "${unsuspended}" ]]; then warn "Flux suspend hold was overwritten; reasserting recovery hold: ${unsuspended}" reassert_recovery_flux_suspend_hold stable_since=0 sleep "${RECOVERY_FLUX_FINAL_STABILITY_POLL_SECONDS}" continue fi if (( stable_since == 0 )); then stable_since="${SECONDS}" fi stable_for=$((SECONDS - stable_since)) if (( stable_for >= RECOVERY_FLUX_FINAL_STABILITY_SECONDS )); then log "recovery-flux-suspend=stable seconds=${stable_for}" return 0 fi sleep "${RECOVERY_FLUX_FINAL_STABILITY_POLL_SECONDS}" done unsuspended="$(recovery_flux_unsuspended_list | paste -sd, -)" warn "Timed out waiting for stable Flux suspend hold after ${RECOVERY_FLUX_FINAL_STABILITY_TIMEOUT_SECONDS}s: ${unsuspended:-none-unsuspended-now}" return 1 } stop_recovery_flux_aux_controllers() { [[ "${EXECUTE}" -eq 1 ]] || return 0 [[ "${RECOVERY_FLUX_FINAL_STOP_AUX_CONTROLLERS}" == "1" || "${RECOVERY_FLUX_FINAL_STOP_AUX_CONTROLLERS}" == "true" ]] || return 0 local controller while IFS= read -r controller; do if kubectl -n flux-system get deployment "${controller}" >/dev/null 2>&1; then warn "Stopping Flux auxiliary controller ${controller} for recovery hold." run kubectl -n flux-system scale deployment "${controller}" --replicas=0 fi done < <(csv_each "${RECOVERY_FLUX_AUX_CONTROLLERS}") } wait_for_kustomize_controller_scaled_down() { [[ "${EXECUTE}" -eq 1 ]] || return 0 local deadline pods deadline=$((SECONDS + 90)) while (( SECONDS < deadline )); do pods="$(kubectl -n flux-system get pods -l app=kustomize-controller -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)" if [[ -z "${pods//[[:space:]]/}" ]]; then return 0 fi sleep 2 done warn "Timed out waiting for kustomize-controller pods to terminate before final Flux suspend reassertion." return 1 } force_recovery_flux_suspend_with_controller_stop() { [[ "${EXECUTE}" -eq 1 ]] || return 0 if ! kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then warn "kustomize-controller deployment not found; cannot use controller-stop Flux suspend finalization." return 1 fi warn "Stopping kustomize-controller for final Flux suspend reassertion." run kubectl -n flux-system scale deployment kustomize-controller --replicas=0 stop_recovery_flux_aux_controllers wait_for_kustomize_controller_scaled_down || true reassert_recovery_flux_suspend_hold if [[ "${RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER}" == "1" || "${RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER}" == "true" ]]; then run kubectl -n flux-system scale deployment kustomize-controller --replicas=1 kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=2m || warn "kustomize-controller did not become Ready after final Flux suspend reassertion." sleep "${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS}" else warn "Leaving kustomize-controller stopped to preserve the recovery Flux hold." fi verify_recovery_flux_suspend_stable_window || return 1 log "recovery-flux-suspend=verified-controller-stop" } stabilize_recovery_flux_suspend() { [[ "${EXECUTE}" -eq 1 ]] || return 0 local attempt unsuspended for attempt in $(seq 1 "${RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS}"); do reassert_recovery_flux_suspend_hold sleep "${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS}" unsuspended="$(recovery_flux_unsuspended_list | paste -sd, -)" if [[ -z "${unsuspended}" ]]; then verify_recovery_flux_suspend_stable_window && { log "recovery-flux-suspend=verified attempts=${attempt}" return 0 } fi warn "Flux suspend state was overwritten during recovery thaw; reasserting attempt ${attempt}/${RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS}: ${unsuspended}" done unsuspended="$(recovery_flux_unsuspended_list | paste -sd, -)" if [[ -n "${unsuspended}" ]]; then warn "Flux suspend state is still not stable after verification attempts: ${unsuspended}" force_recovery_flux_suspend_with_controller_stop fi } restore_recovery_optional_flux_suspend() { [[ -f "${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE}" ]] || return 0 local name suspend while IFS=$'\t' read -r name suspend; do [[ -n "${name}" ]] || continue [[ "${suspend}" == "true" || "${suspend}" == "false" ]] || suspend="false" patch_kustomization_suspend "${name}" "${suspend}" done < "${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE}" } annotate_flux_kustomizations() { local now name now="$(date --iso-8601=seconds)" while IFS= read -r name; do if kubectl -n flux-system get kustomization "${name}" >/dev/null 2>&1; then run kubectl -n flux-system annotate kustomization "${name}" reconcile.fluxcd.io/requestedAt="${now}" --overwrite fi done < <(csv_each "$1") } restart_kustomize_controller_for_critical_thaw() { if [[ "${RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER}" != "1" && "${RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER}" != "true" ]]; then return 0 fi if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then warn "Restarting kustomize-controller after optional Flux suspension to clear any single-worker health-check backlog." run kubectl -n flux-system rollout restart deployment kustomize-controller fi } prepare_recovery_flux_critical_thaw() { [[ "${EXECUTE}" -eq 1 ]] || return 0 if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then warn "Stopping kustomize-controller to create a quiet Flux critical-thaw window." run kubectl -n flux-system scale deployment kustomize-controller --replicas=0 wait_for_kustomize_controller_scaled_down || true fi patch_recovery_optional_flux_suspend true patch_flux_suspend_all true if command -v flux >/dev/null 2>&1; then run flux reconcile source git flux-system -n flux-system --timeout=3m || true fi patch_recovery_critical_flux_suspend false patch_recovery_critical_helmrelease_suspend false if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then patch_kustomization_suspend flux-system true fi patch_recovery_optional_flux_suspend_without_snapshot true if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then run kubectl -n flux-system scale deployment kustomize-controller --replicas=1 kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=2m || warn "kustomize-controller did not become Ready for Flux critical thaw." fi } patch_helmrelease_suspend() { local namespace="$1" local name="$2" local value="$3" local patch patch=$(printf '{"spec":{"suspend":%s}}' "${value}") if kubectl -n "${namespace}" get helmrelease "${name}" >/dev/null 2>&1; then run kubectl -n "${namespace}" patch helmrelease "${name}" --field-manager="${RECOVERY_KUBECTL_FIELD_MANAGER}" --type=merge -p "${patch}" else warn "HelmRelease ${namespace}/${name} not found; skipping suspend=${value}." fi } wait_for_flux_reconciler_pods_stopped() { local app start now pods if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: wait for Flux reconcilers to stop" return 0 fi for app in kustomize-controller helm-controller; do start="$(date +%s)" while true; do pods="$(kubectl -n flux-system get pods -l "app=${app}" --no-headers 2>/dev/null || true)" if [[ -z "${pods}" ]]; then log "flux-reconciler-stopped=${app}" break fi now="$(date +%s)" if (( now - start >= 90 )); then warn "Timed out waiting for ${app} pods to stop." break fi sleep 2 done done } freeze_longhorn_deadlock_automation() { warn "Freezing only the automation that can fight Longhorn emergency recovery." if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then run kubectl -n flux-system scale deployment kustomize-controller --replicas=0 fi if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then run kubectl -n flux-system scale deployment helm-controller --replicas=0 fi wait_for_flux_reconciler_pods_stopped patch_kustomization_suspend flux-system true patch_kustomization_suspend helm true patch_kustomization_suspend longhorn true patch_helmrelease_suspend longhorn-system longhorn true mark_checkpoint longhorn_unlock_automation_frozen } ensure_longhorn_cache_first_policy() { local values_patch ds_patch values_patch='{"spec":{"values":{"image":{"pullPolicy":"IfNotPresent"},"defaultSettings":{"systemManagedPodsImagePullPolicy":"if-not-present"}}}}' ds_patch='{"spec":{"template":{"spec":{"containers":[{"name":"longhorn-manager","imagePullPolicy":"IfNotPresent"}]}}}}' if kubectl -n longhorn-system get helmrelease longhorn >/dev/null 2>&1; then run kubectl -n longhorn-system patch helmrelease longhorn --type=merge -p "${values_patch}" fi if kubectl -n longhorn-system get daemonset longhorn-manager >/dev/null 2>&1; then run kubectl -n longhorn-system patch daemonset longhorn-manager --type=strategic -p "${ds_patch}" fi } remove_longhorn_manager_prepull_sidecar() { local indexes index indexes="$(kubectl -n longhorn-system get daemonset longhorn-manager \ -o jsonpath='{range .spec.template.spec.containers[*]}{.name}{"\n"}{end}' 2>/dev/null \ | nl -v 0 -w 1 -s ' ' \ | awk '$2=="pre-pull-share-manager-image" {print $1}' \ | sort -rn || true)" if [[ -z "${indexes}" ]]; then log "longhorn-manager-prepull-sidecar=absent" return 0 fi while IFS= read -r index; do [[ -z "${index}" ]] && continue run kubectl -n longhorn-system patch daemonset longhorn-manager --type=json \ -p "[{\"op\":\"remove\",\"path\":\"/spec/template/spec/containers/${index}\"}]" done <<< "${indexes}" } longhorn_manager_prepull_sidecar_has_pull_failures() { kubectl -n longhorn-system get pods -l app=longhorn-manager -o json \ | jq -e ' [ .items[].status.containerStatuses[]? | select(.name == "pre-pull-share-manager-image") | select(((.state.waiting.reason // "") | test("ImagePullBackOff|ErrImagePull|CreateContainerError|RunContainerError|InvalidImageName"))) ] | length > 0' >/dev/null 2>&1 } remove_longhorn_manager_prepull_sidecar_if_needed() { if ! harbor_endpoint_is_ready 1; then warn "Removing Longhorn manager pre-pull sidecar because Harbor registry API is unhealthy." remove_longhorn_manager_prepull_sidecar return 0 fi if longhorn_manager_prepull_sidecar_has_pull_failures; then warn "Removing Longhorn manager pre-pull sidecar because it is in image/runtime failure." remove_longhorn_manager_prepull_sidecar return 0 fi log "longhorn-manager-prepull-sidecar=retained harbor=healthy pull_failures=false" } save_longhorn_unlock_optional_replica_snapshot() { if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: save optional workload snapshot to ${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" return 0 fi if [[ -s "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" ]]; then log "optional-workload-snapshot=preserved path=${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" return 0 fi mkdir -p "$(dirname "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}")" : > "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" } scale_optional_workload_for_longhorn_unlock() { local namespace="$1" local kind="$2" local name="$3" local replicas if ! kubectl -n "${namespace}" get "${kind}" "${name}" >/dev/null 2>&1; then return 0 fi replicas="$(kubectl -n "${namespace}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" [[ -n "${replicas}" ]] || replicas=1 if [[ "${EXECUTE}" -eq 1 ]] && ! awk -F '\t' -v ns="${namespace}" -v kind="${kind}" -v name="${name}" '$1==ns && $2==kind && $3==name {found=1} END {exit found ? 0 : 1}' "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" 2>/dev/null; then printf '%s\t%s\t%s\t%s\n' "${namespace}" "${kind}" "${name}" "${replicas}" >> "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" fi if [[ "${replicas}" == "0" ]]; then log "optional-workload-already-scaled-down=${namespace}/${kind}/${name}" return 0 fi warn "Temporarily scaling optional workload ${namespace}/${kind}/${name} from ${replicas} to 0 for Longhorn recovery headroom." run kubectl -n "${namespace}" scale "${kind}" "${name}" --replicas=0 } free_longhorn_instance_manager_headroom() { save_longhorn_unlock_optional_replica_snapshot while read -r namespace kind name; do [[ -z "${namespace}" || "${namespace}" == \#* ]] && continue scale_optional_workload_for_longhorn_unlock "${namespace}" "${kind}" "${name}" done <<'WORKLOADS' game-stream deployment oauth2-proxy-wolf logging deployment oauth2-proxy-logs longhorn-system deployment oauth2-proxy-longhorn maintenance deployment oauth2-proxy-metis maintenance deployment oauth2-proxy-soteria openclaw deployment oauth2-proxy-agent quality deployment oauth2-proxy-sonarqube quality deployment sonarqube-exporter sso deployment oauth2-proxy bstein-dev-home deployment bstein-dev-home-frontend WORKLOADS mark_checkpoint longhorn_unlock_optional_workloads_scaled } restore_longhorn_unlock_optional_workloads() { local namespace kind name desired current if [[ ! -f "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" ]]; then log "optional-workload-restore=not-needed snapshot=absent" return 0 fi while IFS=$'\t' read -r namespace kind name desired; do [[ -n "${namespace}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue [[ "${desired}" =~ ^[0-9]+$ ]] || continue (( desired > 0 )) || continue current="$(kubectl -n "${namespace}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" [[ "${current}" =~ ^[0-9]+$ ]] || continue if (( current == desired )); then continue fi warn "Restoring optional workload ${namespace}/${kind}/${name} to replicas=${desired} after Longhorn unlock." run kubectl -n "${namespace}" scale "${kind}" "${name}" --replicas="${desired}" done < "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" mark_checkpoint longhorn_unlock_optional_workloads_restored } restore_recovered_worker_scheduling_after_deadlock() { local rows node ready worker taints rows="$(kubectl get nodes -o json \ | jq -r '.items[] | [.metadata.name, (.spec.unschedulable // false), ([.status.conditions[]? | select(.type=="Ready") | .status][0] // "Unknown"), (.metadata.labels["node-role.kubernetes.io/worker"] // ""), ((.spec.taints // []) | map(.key + ":" + .effect) | join(","))] | @tsv' || true)" while IFS=$'\t' read -r node unschedulable ready worker taints; do [[ -n "${node}" ]] || continue [[ "${unschedulable}" == "true" ]] || continue [[ "${ready}" == "True" ]] || continue [[ "${worker}" == "true" ]] || continue if csv_has_value "${RECOVERY_UNCORDON_DENYLIST}" "${node}"; then warn "Leaving recovered worker ${node} cordoned because it is in RECOVERY_UNCORDON_DENYLIST." continue fi if [[ "${taints}" == *"node.kubernetes.io/unreachable:"* ]]; then warn "Leaving worker ${node} cordoned because it still has an unreachable taint." continue fi warn "Restoring scheduling on recovered Ready worker ${node}." run kubectl uncordon "${node}" done <<< "${rows}" mark_checkpoint longhorn_unlock_worker_scheduling_restored } delete_failed_nonstorage_pods_for_headroom() { local rows namespace name rows="$(kubectl get pods -A --field-selector=status.phase=Failed \ -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)" while read -r namespace name; do [[ -z "${namespace}" || -z "${name}" ]] && continue case "${namespace}" in longhorn-system|postgres|vault|gitea|harbor) continue ;; esac run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false done <<< "${rows}" } restart_stale_critical_pods_after_longhorn_unlock() { require_cmd jq local pods namespace name phase owners pods="$(kubectl get pods -A -o json \ | jq -r '.items[] | select(.metadata.namespace | test("^(postgres|vault|gitea|harbor)$")) | select(.status.phase == "Failed" or .status.phase == "Unknown") | [.metadata.namespace, .metadata.name, .status.phase, ((.metadata.ownerReferences // []) | length)] | @tsv' || true)" while IFS=$'\t' read -r namespace name phase owners; do [[ -z "${namespace}" || -z "${name}" ]] && continue if [[ "${owners}" == "0" ]]; then warn "Skipping stale critical pod without controller owner: ${namespace}/${name} phase=${phase}" continue fi warn "Deleting stale controller-owned critical pod ${namespace}/${name} phase=${phase} so its controller can recreate it." run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false done <<< "${pods}" pods="$(kubectl get pods -A -o json \ | jq -r '.items[] | select(.metadata.namespace | test("^(postgres|vault|gitea|harbor)$")) | select(.metadata.deletionTimestamp != null) | select(.status.phase == "Failed" or .status.phase == "Unknown") | select(((.metadata.finalizers // []) | length) == 0) | select(((.metadata.ownerReferences // []) | length) > 0) | select(([(.status.containerStatuses[]? | select(.state.terminated != null))] | length) == ((.status.containerStatuses // []) | length)) | [.metadata.namespace, .metadata.name, .status.phase] | @tsv' || true)" while IFS=$'\t' read -r namespace name phase; do [[ -z "${namespace}" || -z "${name}" ]] && continue warn "Force-deleting stale terminating critical pod object ${namespace}/${name} phase=${phase}; containers are already terminated and no finalizers are set." run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false --force --grace-period=0 done <<< "${pods}" } wait_for_postgres_dependency_ready() { local timeout_seconds="${1:-240}" local start now endpoints if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: wait for postgres/postgres-service endpoints and pg_isready" return 0 fi start="$(date +%s)" while true; do endpoints="$(kubectl -n postgres get endpoints postgres-service -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" if [[ -n "${endpoints//[[:space:]]/}" ]] \ && kubectl -n postgres exec postgres-0 -c postgres -- sh -ceu 'pg_isready -h 127.0.0.1 -p 5432 >/dev/null' >/dev/null 2>&1; then log "postgres-dependency=ready endpoints=${endpoints}" return 0 fi now="$(date +%s)" if (( now - start >= timeout_seconds )); then warn "Timed out waiting for Postgres to become ready for Harbor." return 1 fi sleep 5 done } restart_harbor_after_postgres_recovery() { require_cmd jq local pods name if harbor_endpoint_is_ready 1; then log "harbor-postgres-recovery=not-needed" return 0 fi wait_for_postgres_dependency_ready 240 || return 1 pods="$(kubectl -n harbor get pods -o json \ | jq -r '.items[] | select(.metadata.name | test("^harbor-(core|jobservice)-")) | select(((.metadata.ownerReferences // []) | length) > 0) | select(([ .status.containerStatuses[]? | select(.name == "core" or .name == "jobservice") | select((.ready != true) or (((.state.waiting.reason // "") | test("CrashLoopBackOff|ImagePullBackOff|ErrImagePull"))) or ((.lastState.terminated.reason // "") == "Error")) ] | length) > 0) | .metadata.name' \ | sort -u || true)" if [[ -z "${pods}" ]]; then warn "Harbor registry API is unhealthy, but no controller-owned core/jobservice pod needs restart." return 1 fi while IFS= read -r name; do [[ -z "${name}" ]] && continue warn "Restarting controller-owned Harbor pod ${name} after Postgres recovery." run kubectl -n harbor delete pod "${name}" --ignore-not-found --wait=false done <<< "${pods}" if [[ "${EXECUTE}" -eq 1 ]]; then kubectl -n harbor rollout status deployment/harbor-core --timeout=6m || warn "harbor-core did not become Ready after Postgres recovery restart." kubectl -n harbor rollout status deployment/harbor-jobservice --timeout=6m || warn "harbor-jobservice did not become Ready after Postgres recovery restart." harbor_endpoint_is_ready 0 || return 1 fi mark_checkpoint longhorn_unlock_harbor_postgres_recovered } delete_safe_stale_terminating_replicaset_pods_after_deadlock() { require_cmd jq local rows namespace name deleted_at deleted_epoch now age now="$(date +%s)" rows="$(kubectl get pods -A -o json \ | jq -r '.items[] | select(.metadata.namespace != "longhorn-system") | select(.metadata.deletionTimestamp != null) | select(((.metadata.finalizers // []) | length) == 0) | select(((.metadata.ownerReferences // []) | map(select(.kind=="ReplicaSet")) | length) > 0) | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) as $running | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.ready == true)] | length) as $ready | select($running == 0 and $ready == 0) | [.metadata.namespace, .metadata.name, .metadata.deletionTimestamp] | @tsv' || true)" while IFS=$'\t' read -r namespace name deleted_at; do [[ -n "${namespace}" && -n "${name}" && -n "${deleted_at}" ]] || continue deleted_epoch="$(date -d "${deleted_at}" +%s 2>/dev/null || true)" [[ "${deleted_epoch}" =~ ^[0-9]+$ ]] || continue age=$(( now - deleted_epoch )) if (( age < STALE_TERMINATING_POD_SECONDS )); then continue fi warn "Force-deleting stale terminating ReplicaSet pod ${namespace}/${name}; no containers are running and no finalizers are set." run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false --force --grace-period=0 done <<< "${rows}" mark_checkpoint longhorn_unlock_stale_replicaset_pods_cleared } restart_image_pull_backoff_pods_after_harbor_recovery() { require_cmd jq local pods namespace name if ! harbor_endpoint_is_ready 1; then warn "Skipping image-pull recovery sweep because Harbor registry API is still unhealthy." return 1 fi pods="$(kubectl get pods -A -o json \ | jq -r '.items[] | select(.metadata.namespace != "longhorn-system") | select(((.metadata.ownerReferences // []) | map(select(.kind=="ReplicaSet")) | length) > 0) | select(([ (.status.containerStatuses[]?, .status.initContainerStatuses[]?) | select(((.state.waiting.reason // "") | test("ImagePullBackOff|ErrImagePull|CreateContainerError|RunContainerError|InvalidImageName"))) ] | length) > 0) | [.metadata.namespace, .metadata.name] | @tsv' \ | sort -u || true)" if [[ -z "${pods}" ]]; then log "image-pull-recovery=not-needed" return 0 fi while IFS=$'\t' read -r namespace name; do [[ -z "${namespace}" || -z "${name}" ]] && continue warn "Restarting controller-owned pod ${namespace}/${name} after Harbor recovery to clear image-pull backoff." run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false done <<< "${pods}" mark_checkpoint longhorn_unlock_image_pull_backoff_restarted } resume_deadlock_automation_after_core_recovery() { local gitea_endpoints if ! harbor_endpoint_is_ready 1; then warn "Keeping Flux reconcilers stopped because Harbor registry API is not healthy." return 1 fi gitea_endpoints="$(kubectl -n gitea get endpoints gitea -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" if [[ -z "${gitea_endpoints//[[:space:]]/}" ]]; then warn "Keeping Flux reconcilers stopped because Gitea has no ready endpoints." return 1 fi prepare_recovery_flux_critical_thaw if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then run kubectl -n flux-system scale deployment helm-controller --replicas=1 fi if command -v flux >/dev/null 2>&1; then if [[ "${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then patch_kustomization_suspend flux-system false run flux reconcile kustomization flux-system -n flux-system --timeout="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT}" || warn "flux-system Kustomization did not apply the recovery source revision before final suspension." fi fi patch_recovery_critical_flux_suspend false if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then patch_kustomization_suspend flux-system true fi patch_recovery_optional_flux_suspend true annotate_flux_kustomizations "${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS}" || true stabilize_recovery_flux_suspend || true mark_checkpoint longhorn_unlock_automation_resumed } restart_longhorn_image_pull_backoff_pods() { require_cmd jq local pods namespace name pods="$(kubectl -n longhorn-system get pods -o json \ | jq -r '.items[] | select(([.status.containerStatuses[]?.state.waiting.reason] | map(select(. == "ImagePullBackOff" or . == "ErrImagePull")) | length) > 0) | select(.metadata.name | test("^(longhorn-manager-|longhorn-driver-deployer-|longhorn-ui-)")) | [.metadata.namespace, .metadata.name] | @tsv' || true)" while IFS=$'\t' read -r namespace name; do [[ -z "${namespace}" || -z "${name}" ]] && continue run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false done <<< "${pods}" } terminating_running_pods_for_node() { local node="$1" local now now="$(date +%s)" kubectl get pods -A -o json \ | jq -r --arg node "${node}" --argjson now "${now}" --argjson min_age "${STALE_TERMINATING_POD_SECONDS}" ' .items[] | select(.spec.nodeName == $node) | select(.metadata.deletionTimestamp != null) | select(((.metadata.finalizers // []) | length) == 0) | (.metadata.deletionTimestamp | fromdateiso8601) as $deleted | select(($now - $deleted) >= $min_age) | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) as $running | select($running > 0) | [.metadata.namespace, .metadata.name, ($running|tostring)] | @tsv' 2>/dev/null || true } stuck_terminating_runtime_cleanup_nodes() { local now now="$(date +%s)" kubectl get pods -A -o json \ | jq -r --argjson now "${now}" --argjson min_age "${STALE_TERMINATING_POD_SECONDS}" ' .items[] | select(.spec.nodeName != null) | select(.metadata.deletionTimestamp != null) | select(((.metadata.finalizers // []) | length) == 0) | (.metadata.deletionTimestamp | fromdateiso8601) as $deleted | select(($now - $deleted) >= $min_age) | select(([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) > 0) | .spec.nodeName' 2>/dev/null \ | sort -u } wait_for_node_ready() { local node="$1" local timeout_seconds="$2" local start now ready if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: wait for node ${node} Ready" return 0 fi start="$(date +%s)" while true; do ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)" if [[ "${ready}" == "True" ]]; then log "node-ready=${node}" return 0 fi now="$(date +%s)" if (( now - start >= timeout_seconds )); then warn "Timed out waiting for node ${node} to return Ready after runtime restart." return 1 fi sleep 5 done } wait_for_terminating_running_pods_to_clear() { local node="$1" local timeout_seconds="$2" local start now pods if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: wait for stuck terminating running pods to clear on ${node}" return 0 fi start="$(date +%s)" while true; do pods="$(terminating_running_pods_for_node "${node}")" if [[ -z "${pods}" ]]; then log "stuck-terminating-runtime-pods-cleared=${node}" return 0 fi now="$(date +%s)" if (( now - start >= timeout_seconds )); then warn "Stuck terminating pods with running containers remain on ${node}:" while IFS= read -r line; do [[ -n "${line}" ]] || continue warn " ${line}" done <<< "${pods}" return 1 fi sleep 5 done } run_host_command_via_agent_restart_pod() { local node="$1" local host_command="$2" local pod encoded_command pod="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app=k3s-agent-restart --field-selector "spec.nodeName=${node},status.phase=Running" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" if [[ -z "${pod}" ]]; then return 1 fi encoded_command="$(printf '%s' "${host_command}" | base64 -w0)" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: k3s-agent-restart exec via ${pod} on ${node}" return 0 fi run kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "HOST_COMMAND=\$(printf '%s' '${encoded_command}' | base64 -d); nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu \"\${HOST_COMMAND}\"" } schedule_host_service_restart_via_helper() { local node="$1" local service_name="$2" local delay_seconds="$3" local unit_name host_command unit_name="ananke-restart-${service_name}-$(date +%s)" host_command="/usr/bin/systemd-run --unit ${unit_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl restart ${service_name} || /bin/systemctl restart ${service_name}'" if run_host_command_via_agent_restart_pod "${node}" "${host_command}"; then return 0 fi if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then return 0 fi run_host_command_via_helper "${node}" "restart-${node}-${service_name}" 120 "${host_command}" } recover_stuck_terminating_node_runtime_pods_after_deadlock() { require_cmd jq if [[ "${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}" != "1" && "${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}" != "true" ]]; then warn "Skipping node runtime cleanup because RECOVERY_NODE_RUNTIME_RESTART_ENABLED=${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}." return 0 fi local nodes node ready worker control_plane restarted max_nodes restarted_nodes nodes="$(stuck_terminating_runtime_cleanup_nodes || true)" if [[ -z "${nodes}" ]]; then log "node-runtime-cleanup=not-needed" return 0 fi max_nodes="${RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES}" [[ "${max_nodes}" =~ ^[0-9]+$ ]] || max_nodes=1 restarted=0 restarted_nodes="" while IFS= read -r node; do [[ -n "${node}" ]] || continue if (( restarted >= max_nodes )); then warn "Node runtime cleanup limit reached (${max_nodes}); leaving remaining stuck nodes for a later Ananke pass." break fi if csv_has_value "${RECOVERY_NODE_RUNTIME_RESTART_DENYLIST}" "${node}"; then warn "Skipping node runtime cleanup on denylisted node ${node}." continue fi ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)" worker="$(kubectl get node "${node}" -o jsonpath='{.metadata.labels.node-role\.kubernetes\.io/worker}' 2>/dev/null || true)" control_plane="$(kubectl get node "${node}" -o jsonpath='{.metadata.labels.node-role\.kubernetes\.io/control-plane}' 2>/dev/null || true)" if [[ "${ready}" != "True" || "${worker}" != "true" || -n "${control_plane}" ]]; then warn "Skipping node runtime cleanup on ${node}; ready=${ready:-unknown} worker=${worker:-false} control_plane=${control_plane:-false}." continue fi warn "Cordoning ${node} and restarting only k3s-agent to clear stale terminating pods. Longhorn data-plane objects are not modified." run kubectl cordon "${node}" schedule_host_service_restart_via_helper "${node}" k3s-agent 5 || warn "Failed to schedule k3s-agent restart on ${node}." restarted=$((restarted + 1)) restarted_nodes="${restarted_nodes}${node}"$'\n' done <<< "${nodes}" if (( restarted == 0 )); then log "node-runtime-cleanup=no-eligible-nodes" return 0 fi sleep 15 while IFS= read -r node; do [[ -n "${node}" ]] || continue wait_for_node_ready "${node}" "${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS}" || true wait_for_terminating_running_pods_to_clear "${node}" "${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS}" || true done <<< "${restarted_nodes}" mark_checkpoint longhorn_unlock_node_runtime_cleanup } wait_for_longhorn_endpoint() { local endpoint="$1" local timeout_seconds="$2" local start now addresses if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: wait for Longhorn endpoint ${endpoint}" return 0 fi start="$(date +%s)" while true; do addresses="$(kubectl -n longhorn-system get endpoints "${endpoint}" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" if [[ -n "${addresses}" ]]; then log "longhorn-endpoint-${endpoint}=ready" return 0 fi now="$(date +%s)" if (( now - start >= timeout_seconds )); then warn "Timed out waiting for Longhorn endpoint ${endpoint}." return 1 fi sleep 5 done } wait_for_longhorn_control_endpoints() { local rc=0 wait_for_longhorn_endpoint longhorn-admission-webhook 180 || rc=1 wait_for_longhorn_endpoint longhorn-conversion-webhook 180 || rc=1 wait_for_longhorn_endpoint longhorn-backend 180 || rc=1 wait_for_longhorn_endpoint longhorn-recovery-backend 180 || rc=1 return "${rc}" } report_longhorn_unlock_status() { log "Longhorn manager DaemonSet:" kubectl -n longhorn-system get daemonset longhorn-manager \ -o custom-columns=NAME:.metadata.name,DESIRED:.status.desiredNumberScheduled,CURRENT:.status.currentNumberScheduled,READY:.status.numberReady,UPDATED:.status.updatedNumberScheduled,AVAILABLE:.status.numberAvailable || true log "Longhorn manager pods:" kubectl -n longhorn-system get pods -l app=longhorn-manager \ -o custom-columns=NAME:.metadata.name,READY:.status.containerStatuses[*].ready,STATUS:.status.phase,WAIT:.status.containerStatuses[*].state.waiting.reason,NODE:.spec.nodeName --sort-by=.spec.nodeName || true log "Longhorn instance managers:" kubectl -n longhorn-system get instancemanagers.longhorn.io \ -o custom-columns=NAME:.metadata.name,STATE:.status.currentState,NODE:.spec.nodeID,IMAGE:.spec.image,TYPE:.spec.type --sort-by=.spec.nodeID || true log "Longhorn volume summary:" kubectl -n longhorn-system get volumes.longhorn.io -o json \ | jq -r '.items | group_by(.status.state + "/" + (.status.robustness // "none"))[] | [(.[0].status.state + "/" + (.[0].status.robustness // "none")), length] | @tsv' 2>/dev/null \ | sort || true } shutdown_namespace_excluded() { local ns="$1" [[ "${ns}" =~ ${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX} ]] } startup_workload_namespace_excluded() { local ns="$1" [[ "${ns}" =~ ${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX} ]] } best_effort_scale_down_apps() { local ns_list ns ns_list="$(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')" while IFS= read -r ns; do [[ -z "${ns}" ]] && continue if shutdown_namespace_excluded "${ns}"; then continue fi run_shell "kubectl -n ${ns} scale deployment --all --replicas=0 || true" run_shell "kubectl -n ${ns} scale statefulset --all --replicas=0 || true" done <<< "${ns_list}" } save_workload_replica_snapshot() { local rows line ns kind name replicas if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: save workload replica snapshot to ${REPLICA_SNAPSHOT_FILE}" return 0 fi rows="$( { kubectl get deployment -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tdeployment\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true kubectl get statefulset -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tstatefulset\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true } | sed '/^[[:space:]]*$/d' )" mkdir -p "$(dirname "${REPLICA_SNAPSHOT_FILE}")" : > "${REPLICA_SNAPSHOT_FILE}" while IFS=$'\t' read -r ns kind name replicas; do [[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${replicas}" ]] || continue shutdown_namespace_excluded "${ns}" && continue [[ "${replicas}" =~ ^[0-9]+$ ]] || continue (( replicas > 0 )) || continue printf '%s\t%s\t%s\t%s\n' "${ns}" "${kind}" "${name}" "${replicas}" >> "${REPLICA_SNAPSHOT_FILE}" done <<< "${rows}" log "replica-snapshot-file=${REPLICA_SNAPSHOT_FILE}" log "replica-snapshot-count=$(replica_snapshot_count)" } replica_snapshot_count() { if [[ -f "${REPLICA_SNAPSHOT_FILE}" ]]; then wc -l < "${REPLICA_SNAPSHOT_FILE}" | tr -d ' ' else printf '0' fi } restore_workload_replica_snapshot() { local ns kind name desired current if [[ "${RECOVERY_PENDING}" -ne 1 ]]; then log "Skipping replica restore because recovery_pending=0." return 0 fi if [[ ! -f "${REPLICA_SNAPSHOT_FILE}" ]]; then warn "Replica snapshot file not found at ${REPLICA_SNAPSHOT_FILE}; skipping replica restore." return 0 fi while IFS=$'\t' read -r ns kind name desired; do [[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue [[ "${desired}" =~ ^[0-9]+$ ]] || continue (( desired > 0 )) || continue current="$(kubectl -n "${ns}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" [[ -n "${current}" ]] || continue [[ "${current}" =~ ^[0-9]+$ ]] || current=0 if (( current == desired )); then continue fi run kubectl -n "${ns}" scale "${kind}" "${name}" --replicas="${desired}" done < "${REPLICA_SNAPSHOT_FILE}" mark_checkpoint startup_replicas_restored } restore_zero_scaled_helm_workloads() { local rows ns kind name local restored=0 rows="$( { kubectl get deployment -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,REPLICAS:.spec.replicas,HELM:.metadata.annotations.meta\\.helm\\.sh/release-name --no-headers 2>/dev/null \ | awk '$3 ~ /^[0-9]+$/ && $3 == 0 && $4 != "" {printf "%s\tdeployment\t%s\n", $1, $2}' kubectl get statefulset -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,REPLICAS:.spec.replicas,HELM:.metadata.annotations.meta\\.helm\\.sh/release-name --no-headers 2>/dev/null \ | awk '$3 ~ /^[0-9]+$/ && $3 == 0 && $4 != "" {printf "%s\tstatefulset\t%s\n", $1, $2}' } | sed '/^[[:space:]]*$/d' )" while IFS=$'\t' read -r ns kind name; do [[ -n "${ns}" && -n "${kind}" && -n "${name}" ]] || continue startup_workload_namespace_excluded "${ns}" && continue if [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" ]] && [[ "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]]; then continue fi warn "Auto-heal: restoring zero-scaled Helm workload ${ns}/${kind}/${name} to replicas=1." run kubectl -n "${ns}" scale "${kind}" "${name}" --replicas=1 restored=$((restored + 1)) done <<< "${rows}" if (( restored > 0 )); then log "Auto-heal: restored ${restored} zero-scaled Helm workloads." mark_checkpoint startup_zero_scaled_helm_restored else log "Auto-heal: no zero-scaled Helm workloads detected." fi } list_unhealthy_workloads() { local rows line ns name desired ready available rows="$(kubectl get deployment -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas,AVAILABLE:.status.availableReplicas --no-headers 2>/dev/null || true)" while IFS= read -r line; do [[ -n "${line}" ]] || continue ns="$(awk '{print $1}' <<< "${line}")" name="$(awk '{print $2}' <<< "${line}")" desired="$(awk '{print $3}' <<< "${line}")" ready="$(awk '{print $4}' <<< "${line}")" available="$(awk '{print $5}' <<< "${line}")" startup_workload_namespace_excluded "${ns}" && continue [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue [[ "${desired}" =~ ^[0-9]+$ ]] || desired=0 [[ "${ready}" =~ ^[0-9]+$ ]] || ready=0 [[ "${available}" =~ ^[0-9]+$ ]] || available=0 (( desired > 0 )) || continue if (( ready < desired || available < desired )); then printf '%s/deployment/%s|ready=%s available=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${available}" "${desired}" fi done <<< "${rows}" rows="$(kubectl get statefulset -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas --no-headers 2>/dev/null || true)" while IFS= read -r line; do [[ -n "${line}" ]] || continue ns="$(awk '{print $1}' <<< "${line}")" name="$(awk '{print $2}' <<< "${line}")" desired="$(awk '{print $3}' <<< "${line}")" ready="$(awk '{print $4}' <<< "${line}")" startup_workload_namespace_excluded "${ns}" && continue [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue [[ "${desired}" =~ ^[0-9]+$ ]] || desired=0 [[ "${ready}" =~ ^[0-9]+$ ]] || ready=0 (( desired > 0 )) || continue if (( ready < desired )); then printf '%s/statefulset/%s|ready=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${desired}" fi done <<< "${rows}" } wait_for_startup_workloads_ready() { if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: skipping startup workload readiness checks" return 0 fi local start now unhealthy start="$(date +%s)" while true; do unhealthy="$(list_unhealthy_workloads || true)" if [[ -z "${unhealthy}" ]]; then log "startup-workloads=all-ready" return 0 fi warn "startup-workloads-not-ready:" while IFS= read -r line; do [[ -n "${line}" ]] || continue warn " ${line}" done <<< "${unhealthy}" now="$(date +%s)" if (( now - start >= STARTUP_WORKLOAD_TIMEOUT_SECONDS )); then die "Timed out waiting for startup workloads Ready after ${STARTUP_WORKLOAD_TIMEOUT_SECONDS}s." fi sleep "${STARTUP_WORKLOAD_POLL_SECONDS}" done } discover_workers_csv() { kubectl get nodes \ -o 'custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\.kubernetes\.io/control-plane,MASTER:.metadata.labels.node-role\.kubernetes\.io/master,READY:.status.conditions[?(@.type=="Ready")].status' \ --no-headers \ | awk '$2=="" && $3=="" && $4=="True" {print $1}' \ | paste -sd, - } node_is_ready() { local node="$1" [[ -n "${node}" ]] || return 1 local ready ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)" [[ "${ready}" == "True" ]] } select_ready_arm64_worker() { local rows node rows="$(kubectl get nodes -o 'custom-columns=NAME:.metadata.name,ARCH:.metadata.labels.kubernetes\.io/arch,WORKER:.metadata.labels.node-role\.kubernetes\.io/worker,HARDWARE:.metadata.labels.hardware,READY:.status.conditions[?(@.type=="Ready")].status' --no-headers 2>/dev/null || true)" [[ -n "${rows}" ]] || return 1 node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $4=="rpi5" && $5=="True" {print $1; exit}')" if [[ -n "${node}" ]]; then printf '%s' "${node}" return 0 fi node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $4=="rpi4" && $5=="True" {print $1; exit}')" if [[ -n "${node}" ]]; then printf '%s' "${node}" return 0 fi node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $5=="True" {print $1; exit}')" if [[ -n "${node}" ]]; then printf '%s' "${node}" return 0 fi return 1 } discover_harbor_pinned_node() { kubectl -n harbor get helmrelease harbor \ -o jsonpath='{range .spec.values..nodeSelector}{.kubernetes\.io/hostname}{"\n"}{end}' 2>/dev/null \ | sed '/^[[:space:]]*$/d' \ | sort -u \ | head -n 1 } ensure_harbor_target_node() { if node_is_ready "${HARBOR_TARGET_NODE}"; then return 0 fi local fallback pinned pinned="$(discover_harbor_pinned_node || true)" if node_is_ready "${pinned}"; then if [[ -n "${HARBOR_TARGET_NODE}" ]]; then warn "Configured harbor target node '${HARBOR_TARGET_NODE}' is not Ready; using live Harbor pin '${pinned}' instead." else log "harbor-target-node discovered from live HelmRelease: ${pinned}" fi HARBOR_TARGET_NODE="${pinned}" return 0 fi fallback="$(select_ready_arm64_worker || true)" [[ -n "${fallback}" ]] || die "No Ready arm64 worker available for Harbor bootstrap target." if [[ -n "${HARBOR_TARGET_NODE}" ]]; then warn "Configured harbor target node '${HARBOR_TARGET_NODE}' is not Ready; using '${fallback}' instead." else log "harbor-target-node auto-selected: ${fallback}" fi HARBOR_TARGET_NODE="${fallback}" } ensure_harbor_host_label() { [[ -n "${HARBOR_TARGET_NODE}" ]] || die "Harbor target node is not set." local labeled node labeled="$(kubectl get nodes -l "${HARBOR_HOST_LABEL_KEY}=true" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)" while IFS= read -r node; do [[ -z "${node}" ]] && continue [[ "${node}" == "${HARBOR_TARGET_NODE}" ]] && continue run kubectl label node "${node}" "${HARBOR_HOST_LABEL_KEY}-" done <<< "${labeled}" run kubectl label node "${HARBOR_TARGET_NODE}" "${HARBOR_HOST_LABEL_KEY}=true" --overwrite } as_array_from_csv() { local csv="$1" local out_var="$2" local old_ifs="${IFS}" IFS=',' read -r -a _tmp <<< "${csv}" IFS="${old_ifs}" eval "${out_var}"'=( "${_tmp[@]}" )' } best_effort_drain_workers() { local timeout_seconds="$1" shift || true local workers=("$@") local node for node in "${workers[@]}"; do [[ -z "${node}" ]] && continue run kubectl cordon "${node}" if run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s"; then continue fi warn "Gentle drain timed out for ${node}; retrying with --force." if run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s --force"; then continue fi warn "Force drain timed out for ${node}; final attempt with --disable-eviction." run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s --force --disable-eviction || true" done } wait_for_rollout() { local namespace="$1" local kind="$2" local name="$3" local timeout="$4" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: kubectl -n ${namespace} rollout status ${kind}/${name} --timeout=${timeout}" return 0 fi kubectl -n "${namespace}" rollout status "${kind}/${name}" --timeout="${timeout}" } check_ingress_stack() { kubectl get ingressclass traefik >/dev/null wait_for_rollout traefik deployment traefik 5m } check_longhorn_stack() { wait_for_rollout longhorn-system daemonset longhorn-manager 10m wait_for_rollout longhorn-system deployment longhorn-ui 10m } check_vault_stack() { wait_for_rollout vault statefulset vault 10m if [[ "${EXECUTE}" -eq 1 ]]; then kubectl -n vault exec vault-0 -- sh -ceu 'VAULT_ADDR=http://127.0.0.1:8200 vault status >/dev/null' fi } check_postgres_stack() { wait_for_rollout postgres statefulset postgres 10m if [[ "${EXECUTE}" -eq 1 ]]; then kubectl -n postgres exec postgres-0 -c postgres -- sh -ceu 'pg_isready -h 127.0.0.1 -p 5432 >/dev/null' fi } check_gitea_stack() { wait_for_rollout gitea deployment gitea 10m } check_harbor_stack() { wait_for_rollout harbor statefulset harbor-redis 10m wait_for_rollout harbor deployment harbor-core 10m wait_for_rollout harbor deployment harbor-jobservice 10m wait_for_rollout harbor deployment harbor-portal 10m wait_for_rollout harbor deployment harbor-registry 10m } harbor_registry_response_valid() { local code="$1" local headers_file="$2" local body_file="$3" local content_type case "${code}" in 200|401) ;; *) return 1 ;; esac content_type="$(awk 'BEGIN{IGNORECASE=1} /^content-type:/ {print tolower($0); exit}' "${headers_file}" 2>/dev/null || true)" if [[ "${content_type}" == *"text/html"* ]]; then return 1 fi if grep -Eiq '^docker-distribution-api-version:' "${headers_file}" 2>/dev/null; then return 0 fi if [[ "${code}" == "401" ]] && grep -Eiq 'unauthorized|authentication required' "${body_file}" 2>/dev/null; then return 0 fi return 1 } harbor_endpoint_is_ready() { local quiet="${1:-0}" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/" return 0 fi local headers_file body_file code rc content_type headers_file="$(mktemp)" body_file="$(mktemp)" rc=0 code="$(curl -ksS --max-time "${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}" -D "${headers_file}" -o "${body_file}" -w '%{http_code}' https://registry.bstein.dev/v2/ || rc=$?)" content_type="$(awk 'BEGIN{IGNORECASE=1} /^content-type:/ {print tolower($0); exit}' "${headers_file}" 2>/dev/null || true)" if (( rc == 0 )) && harbor_registry_response_valid "${code}" "${headers_file}" "${body_file}"; then [[ "${quiet}" == "1" ]] || log "harbor-endpoint=http-${code} registry-api=true" rm -f "${headers_file}" "${body_file}" return 0 fi [[ "${quiet}" == "1" ]] || warn "Harbor registry API check failed: http=${code:-unknown} content-type=${content_type:-unknown} rc=${rc}" rm -f "${headers_file}" "${body_file}" return 1 } check_harbor_endpoint() { if ! harbor_endpoint_is_ready 0; then die "Harbor endpoint is not serving the registry API." fi } wait_for_pod_phase() { local namespace="$1" local pod="$2" local expected_phase="$3" local timeout_seconds="$4" local start now phase start="$(date +%s)" while true; do phase="$(kubectl -n "${namespace}" get pod "${pod}" -o jsonpath='{.status.phase}' 2>/dev/null || true)" if [[ "${phase}" == "${expected_phase}" ]]; then return 0 fi if [[ "${phase}" == "Failed" ]]; then return 1 fi now="$(date +%s)" if (( now - start >= timeout_seconds )); then return 1 fi sleep 2 done } harbor_is_ready() { kubectl -n harbor get deploy harbor-core harbor-jobservice harbor-portal harbor-registry >/dev/null 2>&1 || return 1 harbor_endpoint_is_ready 1 } run_harbor_pull_canary() { local pod="ananke-harbor-canary" local canary_node="${HARBOR_CANARY_NODE}" if ! node_is_ready "${canary_node}"; then ensure_harbor_target_node canary_node="${HARBOR_TARGET_NODE}" if [[ -n "${HARBOR_CANARY_NODE}" ]]; then warn "Configured harbor canary node '${HARBOR_CANARY_NODE}' is not Ready; using '${canary_node}'." fi HARBOR_CANARY_NODE="${canary_node}" fi if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: create Harbor pull canary pod with ${HARBOR_CANARY_IMAGE} on ${canary_node}" return 0 fi timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true cat <&2 || true timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" >&2 || true timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true return 1 fi timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" || true timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true } run_helper_pod() { local node="$1" local purpose="$2" local timeout_seconds="$3" local script_content="$4" local pod="ananke-$(sanitize_name "${purpose}")-$(date +%H%M%S)" local encoded_script encoded_script="$(printf '%s' "${script_content}" | base64 -w0)" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: helper pod ${pod} on ${node} for ${purpose}" return 0 fi cat </tmp/ananke-step.sh chmod +x /tmp/ananke-step.sh /tmp/ananke-step.sh POD if ! wait_for_pod_phase "${NODE_HELPER_NAMESPACE}" "${pod}" Succeeded "${timeout_seconds}"; then kubectl -n "${NODE_HELPER_NAMESPACE}" describe pod "${pod}" >&2 || true timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" >&2 || true timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true return 1 fi timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" || true timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true } hostroot_pod_for_node() { local node="$1" kubectl -n "${NODE_HELPER_NAMESPACE}" get pods \ -l app=node-image-sweeper \ --field-selector "spec.nodeName=${node},status.phase=Running" \ -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true } run_hostroot_pod_script() { local node="$1" local purpose="$2" local timeout_seconds="$3" local script_content="$4" local pod encoded_script pod="$(hostroot_pod_for_node "${node}")" [[ -n "${pod}" ]] || return 1 encoded_script="$(printf '%s' "${script_content}" | base64 -w0)" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: hostroot pod ${pod} on ${node} for ${purpose}" return 0 fi timeout "${timeout_seconds}" kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "printf '%s' '${encoded_script}' | base64 -d | chroot /host /bin/sh -seu" } run_hostroot_pod_bundle_import() { local node="$1" local timeout_seconds="$2" local images_text="$3" local pod refresh_script verify_script encoded_script pod="$(hostroot_pod_for_node "${node}")" [[ -n "${pod}" ]] || return 1 if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: stream ${HARBOR_BUNDLE_FILE} through hostroot pod ${pod} on ${node}" return 0 fi if [[ "${REFRESH_BOOTSTRAP_IMAGE_ALIASES}" == "1" ]]; then refresh_script=$(cat <