diff --git a/infrastructure/longhorn/core/helmrelease.yaml b/infrastructure/longhorn/core/helmrelease.yaml
index 406b76d0..7c1eb2ef 100644
--- a/infrastructure/longhorn/core/helmrelease.yaml
+++ b/infrastructure/longhorn/core/helmrelease.yaml
@@ -37,7 +37,7 @@ spec:
       createSecret: false
       registrySecret: longhorn-registry
     image:
-      pullPolicy: Always
+      pullPolicy: IfNotPresent
       longhorn:
         engine:
           repository: registry.bstein.dev/infra/longhorn-engine
@@ -80,7 +80,7 @@ spec:
           repository: registry.bstein.dev/infra/longhorn-livenessprobe
           tag: v2.16.0
     defaultSettings:
-      systemManagedPodsImagePullPolicy: Always
+      systemManagedPodsImagePullPolicy: if-not-present
       taintToleration: veles.bstein.dev/simulation=true:NoSchedule
     longhornManager:
       tolerations:
diff --git a/scripts/bootstrap/harbor-bootstrap-images.txt b/scripts/bootstrap/harbor-bootstrap-images.txt
index fae7a6f6..414eacb8 100644
--- a/scripts/bootstrap/harbor-bootstrap-images.txt
+++ b/scripts/bootstrap/harbor-bootstrap-images.txt
@@ -1,4 +1,4 @@
-# Harbor cold-start bootstrap images.
+# Harbor and Longhorn cold-start bootstrap images.
 registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
 registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
 registry.bstein.dev/infra/harbor-portal:v2.14.1-arm64
@@ -7,3 +7,18 @@ registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
 registry.bstein.dev/infra/harbor-redis:v2.14.1-arm64
 registry.bstein.dev/infra/harbor-nginx:v2.14.1-arm64
 registry.bstein.dev/infra/harbor-prepare:v2.14.1-arm64
+
+# Longhorn must be able to start before Harbor is fully healthy.
+registry.bstein.dev/infra/longhorn-engine:v1.8.2
+registry.bstein.dev/infra/longhorn-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-ui:v1.8.2
+registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-share-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56
+registry.bstein.dev/infra/longhorn-csi-attacher:v4.9.0
+registry.bstein.dev/infra/longhorn-csi-provisioner:v5.3.0
+registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:v2.14.0
+registry.bstein.dev/infra/longhorn-csi-resizer:v1.13.2
+registry.bstein.dev/infra/longhorn-csi-snapshotter:v8.2.0
+registry.bstein.dev/infra/longhorn-livenessprobe:v2.16.0
diff --git a/scripts/bootstrap/longhorn-unlock-images.txt b/scripts/bootstrap/longhorn-unlock-images.txt
new file mode 100644
index 00000000..7cfe3794
--- /dev/null
+++ b/scripts/bootstrap/longhorn-unlock-images.txt
@@ -0,0 +1,14 @@
+# Longhorn images needed when Harbor is unhealthy during storage recovery.
+registry.bstein.dev/infra/longhorn-engine:v1.8.2
+registry.bstein.dev/infra/longhorn-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-ui:v1.8.2
+registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-share-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2
+registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56
+registry.bstein.dev/infra/longhorn-csi-attacher:v4.9.0
+registry.bstein.dev/infra/longhorn-csi-provisioner:v5.3.0
+registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:v2.14.0
+registry.bstein.dev/infra/longhorn-csi-resizer:v1.13.2
+registry.bstein.dev/infra/longhorn-csi-snapshotter:v8.2.0
+registry.bstein.dev/infra/longhorn-livenessprobe:v2.16.0
diff --git a/scripts/bootstrap/recovery-config.env b/scripts/bootstrap/recovery-config.env
index a8bae234..ee1dcab7 100644
--- a/scripts/bootstrap/recovery-config.env
+++ b/scripts/bootstrap/recovery-config.env
@@ -4,7 +4,9 @@ EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git"
 SHUTDOWN_MODE="host-poweroff"
 STATE_SUBDIR=".local/share/ananke"
 HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst"
-HARBOR_TARGET_NODE=""
+BOOTSTRAP_BUNDLE_ARCH="arm64"
+RECOVERY_UNCORDON_DENYLIST="titan-18,titan-22,titan-24"
+HARBOR_TARGET_NODE="titan-11"
 HARBOR_CANARY_NODE=""
 HARBOR_HOST_LABEL_KEY="ananke.bstein.dev/harbor-bootstrap"
 HARBOR_CANARY_IMAGE="registry.bstein.dev/bstein/kubectl:1.35.0"
@@ -33,4 +35,4 @@ STARTUP_INCLUDE_INGRESS_CHECKS="1"
 STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404"
 STARTUP_IGNORE_INGRESS_HOSTS_REGEX=""
 STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10"
-STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||'
+STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|401|unauthorized|<html|'
diff --git a/scripts/build_harbor_bootstrap_bundle.sh b/scripts/build_harbor_bootstrap_bundle.sh
index ff64e55b..5b590cf2 100755
--- a/scripts/build_harbor_bootstrap_bundle.sh
+++ b/scripts/build_harbor_bootstrap_bundle.sh
@@ -5,6 +5,7 @@ IMAGES_FILE="scripts/bootstrap/harbor-bootstrap-images.txt"
 BUNDLE_FILE="artifacts/harbor-bootstrap-v2.14.1-arm64.tar.zst"
 DOCKER_CONFIG_PATH=""
 PLATFORM="linux/arm64"
+ZSTD_LEVEL="${ZSTD_LEVEL:-19}"
 
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -24,9 +25,13 @@ while [[ $# -gt 0 ]]; do
       PLATFORM="${2:?missing platform}"
       shift 2
       ;;
+    --zstd-level)
+      ZSTD_LEVEL="${2:?missing zstd compression level}"
+      shift 2
+      ;;
     -h|--help)
       cat <<USAGE
-Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file <path>] [--bundle-file <path>] [--docker-config <path>] [--platform <linux/arm64>]
+Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file <path>] [--bundle-file <path>] [--docker-config <path>] [--platform <linux/arm64>] [--zstd-level <level>]
 USAGE
       exit 0
       ;;
@@ -47,12 +52,54 @@ if [[ ${#IMAGES[@]} -eq 0 ]]; then
   exit 1
 fi
 
+source_image_for_alias() {
+  local image="$1"
+  local tag="${image##*:}"
+  case "${image}" in
+    registry.bstein.dev/infra/longhorn-engine:*) echo "docker.io/longhornio/longhorn-engine:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-manager:*) echo "docker.io/longhornio/longhorn-manager:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-ui:*) echo "docker.io/longhornio/longhorn-ui:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-instance-manager:*) echo "docker.io/longhornio/longhorn-instance-manager:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-share-manager:*) echo "docker.io/longhornio/longhorn-share-manager:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-backing-image-manager:*) echo "docker.io/longhornio/backing-image-manager:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-support-bundle-kit:*) echo "docker.io/longhornio/support-bundle-kit:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-csi-attacher:*) echo "registry.k8s.io/sig-storage/csi-attacher:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-csi-provisioner:*) echo "registry.k8s.io/sig-storage/csi-provisioner:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:*) echo "registry.k8s.io/sig-storage/csi-node-driver-registrar:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-csi-resizer:*) echo "registry.k8s.io/sig-storage/csi-resizer:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-csi-snapshotter:*) echo "registry.k8s.io/sig-storage/csi-snapshotter:${tag}" ;;
+    registry.bstein.dev/infra/longhorn-livenessprobe:*) echo "registry.k8s.io/sig-storage/livenessprobe:${tag}" ;;
+    *) echo "${image}" ;;
+  esac
+}
+
+pull_or_tag_image() {
+  local image="$1"
+  local source_image
+  if docker image inspect "${image}" >/dev/null 2>&1; then
+    echo "Using cached ${image}" >&2
+    return 0
+  fi
+  echo "Pulling ${image}" >&2
+  if docker pull --platform "${PLATFORM}" "${image}" >/dev/null; then
+    return 0
+  fi
+  source_image="$(source_image_for_alias "${image}")"
+  if [[ "${source_image}" == "${image}" ]]; then
+    return 1
+  fi
+  echo "Pulling ${source_image} for ${image}" >&2
+  docker pull --platform "${PLATFORM}" "${source_image}" >/dev/null
+  docker tag "${source_image}" "${image}"
+}
+
 mkdir -p "$(dirname "${BUNDLE_FILE}")"
 for image in "${IMAGES[@]}"; do
-  echo "Pulling ${image}" >&2
-  docker pull --platform "${PLATFORM}" "${image}" >/dev/null
-
+  pull_or_tag_image "${image}"
 done
 
-docker save "${IMAGES[@]}" | zstd -T0 -19 -o "${BUNDLE_FILE}"
+tmp_bundle="${BUNDLE_FILE}.tmp"
+rm -f "${tmp_bundle}"
+docker save "${IMAGES[@]}" | zstd -T0 -"${ZSTD_LEVEL}" -o "${tmp_bundle}"
+mv "${tmp_bundle}" "${BUNDLE_FILE}"
 echo "Wrote ${BUNDLE_FILE}" >&2
diff --git a/scripts/cluster_power_recovery.sh b/scripts/cluster_power_recovery.sh
index 6a7a9002..ea6319fb 100755
--- a/scripts/cluster_power_recovery.sh
+++ b/scripts/cluster_power_recovery.sh
@@ -16,7 +16,7 @@ fi
 usage() {
   cat <<USAGE
 Usage:
-  scripts/cluster_power_recovery.sh <prepare|status|harbor-seed|shutdown|startup> [options]
+  scripts/cluster_power_recovery.sh <prepare|status|bootstrap-seed|harbor-seed|longhorn-seed|longhorn-unlock|shutdown|startup> [options]
 
 Options:
   --execute                     Actually run commands (default is dry-run)
@@ -30,15 +30,29 @@ Options:
   --skip-drain                  Shutdown: skip worker drain during shutdown
   --skip-local-bootstrap        Startup: skip local bootstrap fallback applies
   --skip-harbor-bootstrap       Startup: skip Harbor recovery bootstrap stage
-  --skip-harbor-seed            Startup: skip Harbor image seed/import stage
+  --skip-harbor-seed            Startup: skip bootstrap image seed/import stage
   --skip-helper-prewarm         Prepare/Shutdown/Startup: skip node-helper prewarm
+  --refresh-bootstrap-image-aliases
+                               Remove bootstrap image aliases before import, to clear poisoned registry pulls
   --min-startup-battery <pct>   Minimum UPS percent required before bootstrap (default: 35)
   --ups-host <name>             UPS identifier for upsc (default: ups@localhost)
   --ups-battery-key <key>       UPS battery key for upsc (default: battery.charge)
   --recovery-state-file <path>  Recovery state file for outage-aware restart logic
   --replica-snapshot-file <path>
                                File used to persist workload replica snapshot across shutdown/startup
-  --harbor-bundle-file <path>   Harbor bootstrap bundle on the control host
+  --bootstrap-images-file <path>
+                               Image list expected inside the bootstrap bundle
+  --harbor-bundle-file <path>   Bootstrap bundle on the control host
+  --longhorn-unlock-bundle-file <path>
+                               Longhorn-only bundle for Harbor-deadlock recovery
+  --longhorn-unlock-images-file <path>
+                               Longhorn-only image list for Harbor-deadlock recovery
+  --longhorn-manager-cache-bundle-file <path>
+                               Single-image Longhorn manager cache repair archive
+  --skip-longhorn-unlock-bundle-seed
+                               Longhorn unlock: skip full Longhorn bundle seed and run surgical repairs only
+  --bootstrap-bundle-arch <arch>
+                               Node architecture expected by the bootstrap bundle (default: ${BOOTSTRAP_BUNDLE_ARCH:-arm64})
   --harbor-target-node <name>   Node that should host Harbor during bootstrap (default: auto)
   --harbor-canary-node <name>   Node used for Harbor pull canary (default: auto)
   --harbor-host-label-key <key> Node label key used to pin Harbor bootstrap workloads (default: ${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap})
@@ -64,7 +78,9 @@ Options:
 
 Examples:
   scripts/cluster_power_recovery.sh prepare --execute
+  scripts/cluster_power_recovery.sh bootstrap-seed --execute
   scripts/cluster_power_recovery.sh harbor-seed --execute
+  scripts/cluster_power_recovery.sh longhorn-unlock --execute
   scripts/cluster_power_recovery.sh status
   scripts/cluster_power_recovery.sh shutdown --execute
   scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main
@@ -79,7 +95,7 @@ fi
 shift || true
 
 case "${MODE}" in
-  prepare|status|harbor-seed|shutdown|startup) ;;
+  prepare|status|bootstrap-seed|harbor-seed|longhorn-seed|longhorn-unlock|shutdown|startup) ;;
   *)
     echo "Unknown mode: ${MODE}" >&2
     usage
@@ -139,6 +155,19 @@ STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/ananke}"
 RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state"
 REPLICA_SNAPSHOT_FILE="${STATE_ROOT}/desired_workload_replicas.tsv"
 HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}"
+BOOTSTRAP_IMAGES_FILE="${BOOTSTRAP_IMAGES_FILE:-${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt}"
+LONGHORN_UNLOCK_IMAGES_FILE="${LONGHORN_UNLOCK_IMAGES_FILE:-${BOOTSTRAP_DIR}/longhorn-unlock-images.txt}"
+LONGHORN_UNLOCK_BUNDLE_FILE="${LONGHORN_UNLOCK_BUNDLE_FILE:-${STATE_ROOT}/bundles/longhorn-unlock-v1.8.2-${BOOTSTRAP_BUNDLE_ARCH:-arm64}.tar.zst}"
+LONGHORN_MANAGER_IMAGE="${LONGHORN_MANAGER_IMAGE:-registry.bstein.dev/infra/longhorn-manager:v1.8.2}"
+LONGHORN_MANAGER_CACHE_BUNDLE_FILE="${LONGHORN_MANAGER_CACHE_BUNDLE_FILE:-${STATE_ROOT}/bundles/longhorn-manager-v1.8.2-${BOOTSTRAP_BUNDLE_ARCH:-arm64}.tar}"
+LONGHORN_UNLOCK_SSH_KNOWN_HOSTS="${LONGHORN_UNLOCK_SSH_KNOWN_HOSTS:-/tmp/ananke_longhorn_unlock_known_hosts}"
+BOOTSTRAP_BUNDLE_ARCH="${BOOTSTRAP_BUNDLE_ARCH:-arm64}"
+RECOVERY_UNCORDON_DENYLIST="${RECOVERY_UNCORDON_DENYLIST:-titan-18,titan-22,titan-24}"
+STALE_TERMINATING_POD_SECONDS="${STALE_TERMINATING_POD_SECONDS:-300}"
+RECOVERY_NODE_RUNTIME_RESTART_ENABLED="${RECOVERY_NODE_RUNTIME_RESTART_ENABLED:-1}"
+RECOVERY_NODE_RUNTIME_RESTART_DENYLIST="${RECOVERY_NODE_RUNTIME_RESTART_DENYLIST:-${RECOVERY_UNCORDON_DENYLIST}}"
+RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES="${RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES:-3}"
+RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS="${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS:-300}"
 HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-}"
 HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-}"
 HARBOR_HOST_LABEL_KEY="${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}"
@@ -148,7 +177,11 @@ NODE_HELPER_NAMESPACE="${NODE_HELPER_NAMESPACE:-maintenance}"
 NODE_HELPER_SERVICE_ACCOUNT="${NODE_HELPER_SERVICE_ACCOUNT:-default}"
 NODE_HELPER_PREWARM_DS="${NODE_HELPER_PREWARM_DS:-ananke-node-helper-prewarm}"
 REGISTRY_PULL_SECRET="${REGISTRY_PULL_SECRET:-harbor-regcred}"
+REFRESH_BOOTSTRAP_IMAGE_ALIASES="${REFRESH_BOOTSTRAP_IMAGE_ALIASES:-0}"
+SKIP_LONGHORN_UNLOCK_BUNDLE_SEED="${SKIP_LONGHORN_UNLOCK_BUNDLE_SEED:-0}"
+LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE="${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE:-${STATE_ROOT}/longhorn_unlock_optional_replicas.tsv}"
 KEEP_PREWARM_DAEMONSET=0
+BOOTSTRAP_IMAGES_SEEDED=0
 
 RECOVERY_PENDING=0
 STARTUP_ATTEMPTED_DURING_OUTAGE=0
@@ -210,6 +243,10 @@ while [[ $# -gt 0 ]]; do
       SKIP_HELPER_PREWARM=1
       shift
       ;;
+    --refresh-bootstrap-image-aliases)
+      REFRESH_BOOTSTRAP_IMAGE_ALIASES=1
+      shift
+      ;;
     --ups-host)
       UPS_HOST="${2:?missing ups host}"
       shift 2
@@ -238,6 +275,30 @@ while [[ $# -gt 0 ]]; do
       HARBOR_BUNDLE_FILE="${2:?missing bundle file path}"
       shift 2
       ;;
+    --longhorn-unlock-bundle-file)
+      LONGHORN_UNLOCK_BUNDLE_FILE="${2:?missing Longhorn unlock bundle file path}"
+      shift 2
+      ;;
+    --bootstrap-images-file)
+      BOOTSTRAP_IMAGES_FILE="${2:?missing bootstrap image list path}"
+      shift 2
+      ;;
+    --longhorn-unlock-images-file)
+      LONGHORN_UNLOCK_IMAGES_FILE="${2:?missing Longhorn unlock image list path}"
+      shift 2
+      ;;
+    --longhorn-manager-cache-bundle-file)
+      LONGHORN_MANAGER_CACHE_BUNDLE_FILE="${2:?missing Longhorn manager cache bundle file path}"
+      shift 2
+      ;;
+    --skip-longhorn-unlock-bundle-seed)
+      SKIP_LONGHORN_UNLOCK_BUNDLE_SEED=1
+      shift
+      ;;
+    --bootstrap-bundle-arch)
+      BOOTSTRAP_BUNDLE_ARCH="${2:?missing bootstrap bundle architecture}"
+      shift 2
+      ;;
     --harbor-target-node)
       HARBOR_TARGET_NODE="${2:?missing harbor target node}"
       shift 2
@@ -615,7 +676,7 @@ default_startup_service_checklist() {
   cat <<'CHECKS'
 gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||
 grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||
-harbor|https://registry.bstein.dev/v2/|200,401|||
+harbor|https://registry.bstein.dev/v2/|401|unauthorized|<html|
 CHECKS
 }
 
@@ -892,6 +953,628 @@ patch_flux_suspend_all() {
   done <<< "${hr_list}"
 }
 
+patch_kustomization_suspend() {
+  local name="$1"
+  local value="$2"
+  local patch
+  patch=$(printf '{"spec":{"suspend":%s}}' "${value}")
+  if kubectl -n flux-system get kustomization "${name}" >/dev/null 2>&1; then
+    run kubectl -n flux-system patch kustomization "${name}" --type=merge -p "${patch}"
+  else
+    warn "Flux Kustomization ${name} not found; skipping suspend=${value}."
+  fi
+}
+
+patch_helmrelease_suspend() {
+  local namespace="$1"
+  local name="$2"
+  local value="$3"
+  local patch
+  patch=$(printf '{"spec":{"suspend":%s}}' "${value}")
+  if kubectl -n "${namespace}" get helmrelease "${name}" >/dev/null 2>&1; then
+    run kubectl -n "${namespace}" patch helmrelease "${name}" --type=merge -p "${patch}"
+  else
+    warn "HelmRelease ${namespace}/${name} not found; skipping suspend=${value}."
+  fi
+}
+
+wait_for_flux_reconciler_pods_stopped() {
+  local app start now pods
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: wait for Flux reconcilers to stop"
+    return 0
+  fi
+  for app in kustomize-controller helm-controller; do
+    start="$(date +%s)"
+    while true; do
+      pods="$(kubectl -n flux-system get pods -l "app=${app}" --no-headers 2>/dev/null || true)"
+      if [[ -z "${pods}" ]]; then
+        log "flux-reconciler-stopped=${app}"
+        break
+      fi
+      now="$(date +%s)"
+      if (( now - start >= 90 )); then
+        warn "Timed out waiting for ${app} pods to stop."
+        break
+      fi
+      sleep 2
+    done
+  done
+}
+
+freeze_longhorn_deadlock_automation() {
+  warn "Freezing only the automation that can fight Longhorn emergency recovery."
+  if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then
+    run kubectl -n flux-system scale deployment kustomize-controller --replicas=0
+  fi
+  if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then
+    run kubectl -n flux-system scale deployment helm-controller --replicas=0
+  fi
+  wait_for_flux_reconciler_pods_stopped
+  patch_kustomization_suspend flux-system true
+  patch_kustomization_suspend helm true
+  patch_kustomization_suspend longhorn true
+  patch_helmrelease_suspend longhorn-system longhorn true
+  mark_checkpoint longhorn_unlock_automation_frozen
+}
+
+ensure_longhorn_cache_first_policy() {
+  local values_patch ds_patch
+  values_patch='{"spec":{"values":{"image":{"pullPolicy":"IfNotPresent"},"defaultSettings":{"systemManagedPodsImagePullPolicy":"if-not-present"}}}}'
+  ds_patch='{"spec":{"template":{"spec":{"containers":[{"name":"longhorn-manager","imagePullPolicy":"IfNotPresent"}]}}}}'
+  if kubectl -n longhorn-system get helmrelease longhorn >/dev/null 2>&1; then
+    run kubectl -n longhorn-system patch helmrelease longhorn --type=merge -p "${values_patch}"
+  fi
+  if kubectl -n longhorn-system get daemonset longhorn-manager >/dev/null 2>&1; then
+    run kubectl -n longhorn-system patch daemonset longhorn-manager --type=strategic -p "${ds_patch}"
+  fi
+}
+
+remove_longhorn_manager_prepull_sidecar() {
+  local indexes index
+  indexes="$(kubectl -n longhorn-system get daemonset longhorn-manager \
+    -o jsonpath='{range .spec.template.spec.containers[*]}{.name}{"\n"}{end}' 2>/dev/null \
+    | nl -v 0 -w 1 -s ' ' \
+    | awk '$2=="pre-pull-share-manager-image" {print $1}' \
+    | sort -rn || true)"
+  if [[ -z "${indexes}" ]]; then
+    log "longhorn-manager-prepull-sidecar=absent"
+    return 0
+  fi
+  while IFS= read -r index; do
+    [[ -z "${index}" ]] && continue
+    run kubectl -n longhorn-system patch daemonset longhorn-manager --type=json \
+      -p "[{\"op\":\"remove\",\"path\":\"/spec/template/spec/containers/${index}\"}]"
+  done <<< "${indexes}"
+}
+
+save_longhorn_unlock_optional_replica_snapshot() {
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: save optional workload snapshot to ${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}"
+    return 0
+  fi
+  if [[ -s "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" ]]; then
+    log "optional-workload-snapshot=preserved path=${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}"
+    return 0
+  fi
+  mkdir -p "$(dirname "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}")"
+  : > "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}"
+}
+
+scale_optional_workload_for_longhorn_unlock() {
+  local namespace="$1"
+  local kind="$2"
+  local name="$3"
+  local replicas
+  if ! kubectl -n "${namespace}" get "${kind}" "${name}" >/dev/null 2>&1; then
+    return 0
+  fi
+  replicas="$(kubectl -n "${namespace}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
+  [[ -n "${replicas}" ]] || replicas=1
+  if [[ "${EXECUTE}" -eq 1 ]] && ! awk -F '\t' -v ns="${namespace}" -v kind="${kind}" -v name="${name}" '$1==ns && $2==kind && $3==name {found=1} END {exit found ? 0 : 1}' "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" 2>/dev/null; then
+    printf '%s\t%s\t%s\t%s\n' "${namespace}" "${kind}" "${name}" "${replicas}" >> "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}"
+  fi
+  if [[ "${replicas}" == "0" ]]; then
+    log "optional-workload-already-scaled-down=${namespace}/${kind}/${name}"
+    return 0
+  fi
+  warn "Temporarily scaling optional workload ${namespace}/${kind}/${name} from ${replicas} to 0 for Longhorn recovery headroom."
+  run kubectl -n "${namespace}" scale "${kind}" "${name}" --replicas=0
+}
+
+free_longhorn_instance_manager_headroom() {
+  save_longhorn_unlock_optional_replica_snapshot
+  while read -r namespace kind name; do
+    [[ -z "${namespace}" || "${namespace}" == \#* ]] && continue
+    scale_optional_workload_for_longhorn_unlock "${namespace}" "${kind}" "${name}"
+  done <<'WORKLOADS'
+game-stream deployment oauth2-proxy-wolf
+logging deployment oauth2-proxy-logs
+longhorn-system deployment oauth2-proxy-longhorn
+maintenance deployment oauth2-proxy-metis
+maintenance deployment oauth2-proxy-soteria
+openclaw deployment oauth2-proxy-agent
+quality deployment oauth2-proxy-sonarqube
+quality deployment sonarqube-exporter
+sso deployment oauth2-proxy
+bstein-dev-home deployment bstein-dev-home-frontend
+WORKLOADS
+  mark_checkpoint longhorn_unlock_optional_workloads_scaled
+}
+
+restore_longhorn_unlock_optional_workloads() {
+  local namespace kind name desired current
+  if [[ ! -f "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" ]]; then
+    log "optional-workload-restore=not-needed snapshot=absent"
+    return 0
+  fi
+
+  while IFS=$'\t' read -r namespace kind name desired; do
+    [[ -n "${namespace}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue
+    [[ "${desired}" =~ ^[0-9]+$ ]] || continue
+    (( desired > 0 )) || continue
+    current="$(kubectl -n "${namespace}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
+    [[ "${current}" =~ ^[0-9]+$ ]] || continue
+    if (( current == desired )); then
+      continue
+    fi
+    warn "Restoring optional workload ${namespace}/${kind}/${name} to replicas=${desired} after Longhorn unlock."
+    run kubectl -n "${namespace}" scale "${kind}" "${name}" --replicas="${desired}"
+  done < "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}"
+  mark_checkpoint longhorn_unlock_optional_workloads_restored
+}
+
+restore_recovered_worker_scheduling_after_deadlock() {
+  local rows node ready worker taints
+  rows="$(kubectl get nodes -o json \
+    | jq -r '.items[]
+      | [.metadata.name,
+         (.spec.unschedulable // false),
+         ([.status.conditions[]? | select(.type=="Ready") | .status][0] // "Unknown"),
+         (.metadata.labels["node-role.kubernetes.io/worker"] // ""),
+         ((.spec.taints // []) | map(.key + ":" + .effect) | join(","))]
+      | @tsv' || true)"
+
+  while IFS=$'\t' read -r node unschedulable ready worker taints; do
+    [[ -n "${node}" ]] || continue
+    [[ "${unschedulable}" == "true" ]] || continue
+    [[ "${ready}" == "True" ]] || continue
+    [[ "${worker}" == "true" ]] || continue
+    if csv_has_value "${RECOVERY_UNCORDON_DENYLIST}" "${node}"; then
+      warn "Leaving recovered worker ${node} cordoned because it is in RECOVERY_UNCORDON_DENYLIST."
+      continue
+    fi
+    if [[ "${taints}" == *"node.kubernetes.io/unreachable:"* ]]; then
+      warn "Leaving worker ${node} cordoned because it still has an unreachable taint."
+      continue
+    fi
+    warn "Restoring scheduling on recovered Ready worker ${node}."
+    run kubectl uncordon "${node}"
+  done <<< "${rows}"
+  mark_checkpoint longhorn_unlock_worker_scheduling_restored
+}
+
+delete_failed_nonstorage_pods_for_headroom() {
+  local rows namespace name
+  rows="$(kubectl get pods -A --field-selector=status.phase=Failed \
+    -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)"
+  while read -r namespace name; do
+    [[ -z "${namespace}" || -z "${name}" ]] && continue
+    case "${namespace}" in
+      longhorn-system|postgres|vault|gitea|harbor)
+        continue
+        ;;
+    esac
+    run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false
+  done <<< "${rows}"
+}
+
+restart_stale_critical_pods_after_longhorn_unlock() {
+  require_cmd jq
+  local pods namespace name phase owners
+  pods="$(kubectl get pods -A -o json \
+    | jq -r '.items[]
+      | select(.metadata.namespace | test("^(postgres|vault|gitea|harbor)$"))
+      | select(.status.phase == "Failed" or .status.phase == "Unknown")
+      | [.metadata.namespace, .metadata.name, .status.phase, ((.metadata.ownerReferences // []) | length)] | @tsv' || true)"
+  while IFS=$'\t' read -r namespace name phase owners; do
+    [[ -z "${namespace}" || -z "${name}" ]] && continue
+    if [[ "${owners}" == "0" ]]; then
+      warn "Skipping stale critical pod without controller owner: ${namespace}/${name} phase=${phase}"
+      continue
+    fi
+    warn "Deleting stale controller-owned critical pod ${namespace}/${name} phase=${phase} so its controller can recreate it."
+    run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false
+  done <<< "${pods}"
+
+  pods="$(kubectl get pods -A -o json \
+    | jq -r '.items[]
+      | select(.metadata.namespace | test("^(postgres|vault|gitea|harbor)$"))
+      | select(.metadata.deletionTimestamp != null)
+      | select(.status.phase == "Failed" or .status.phase == "Unknown")
+      | select(((.metadata.finalizers // []) | length) == 0)
+      | select(((.metadata.ownerReferences // []) | length) > 0)
+      | select(([(.status.containerStatuses[]? | select(.state.terminated != null))] | length) == ((.status.containerStatuses // []) | length))
+      | [.metadata.namespace, .metadata.name, .status.phase] | @tsv' || true)"
+  while IFS=$'\t' read -r namespace name phase; do
+    [[ -z "${namespace}" || -z "${name}" ]] && continue
+    warn "Force-deleting stale terminating critical pod object ${namespace}/${name} phase=${phase}; containers are already terminated and no finalizers are set."
+    run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false --force --grace-period=0
+  done <<< "${pods}"
+}
+
+wait_for_postgres_dependency_ready() {
+  local timeout_seconds="${1:-240}"
+  local start now endpoints
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: wait for postgres/postgres-service endpoints and pg_isready"
+    return 0
+  fi
+
+  start="$(date +%s)"
+  while true; do
+    endpoints="$(kubectl -n postgres get endpoints postgres-service -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)"
+    if [[ -n "${endpoints//[[:space:]]/}" ]] \
+      && kubectl -n postgres exec postgres-0 -c postgres -- sh -ceu 'pg_isready -h 127.0.0.1 -p 5432 >/dev/null' >/dev/null 2>&1; then
+      log "postgres-dependency=ready endpoints=${endpoints}"
+      return 0
+    fi
+
+    now="$(date +%s)"
+    if (( now - start >= timeout_seconds )); then
+      warn "Timed out waiting for Postgres to become ready for Harbor."
+      return 1
+    fi
+    sleep 5
+  done
+}
+
+restart_harbor_after_postgres_recovery() {
+  require_cmd jq
+  local pods name
+
+  if harbor_endpoint_is_ready 1; then
+    log "harbor-postgres-recovery=not-needed"
+    return 0
+  fi
+
+  wait_for_postgres_dependency_ready 240 || return 1
+
+  pods="$(kubectl -n harbor get pods -o json \
+    | jq -r '.items[]
+      | select(.metadata.name | test("^harbor-(core|jobservice)-"))
+      | select(((.metadata.ownerReferences // []) | length) > 0)
+      | select(([
+          .status.containerStatuses[]?
+          | select(.name == "core" or .name == "jobservice")
+          | select((.ready != true)
+            or (((.state.waiting.reason // "") | test("CrashLoopBackOff|ImagePullBackOff|ErrImagePull")))
+            or ((.lastState.terminated.reason // "") == "Error"))
+        ] | length) > 0)
+      | .metadata.name' \
+    | sort -u || true)"
+
+  if [[ -z "${pods}" ]]; then
+    warn "Harbor registry API is unhealthy, but no controller-owned core/jobservice pod needs restart."
+    return 1
+  fi
+
+  while IFS= read -r name; do
+    [[ -z "${name}" ]] && continue
+    warn "Restarting controller-owned Harbor pod ${name} after Postgres recovery."
+    run kubectl -n harbor delete pod "${name}" --ignore-not-found --wait=false
+  done <<< "${pods}"
+
+  if [[ "${EXECUTE}" -eq 1 ]]; then
+    kubectl -n harbor rollout status deployment/harbor-core --timeout=6m || warn "harbor-core did not become Ready after Postgres recovery restart."
+    kubectl -n harbor rollout status deployment/harbor-jobservice --timeout=6m || warn "harbor-jobservice did not become Ready after Postgres recovery restart."
+    harbor_endpoint_is_ready 0 || return 1
+  fi
+  mark_checkpoint longhorn_unlock_harbor_postgres_recovered
+}
+
+delete_safe_stale_terminating_replicaset_pods_after_deadlock() {
+  require_cmd jq
+  local rows namespace name deleted_at deleted_epoch now age
+  now="$(date +%s)"
+  rows="$(kubectl get pods -A -o json \
+    | jq -r '.items[]
+      | select(.metadata.namespace != "longhorn-system")
+      | select(.metadata.deletionTimestamp != null)
+      | select(((.metadata.finalizers // []) | length) == 0)
+      | select(((.metadata.ownerReferences // []) | map(select(.kind=="ReplicaSet")) | length) > 0)
+      | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) as $running
+      | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.ready == true)] | length) as $ready
+      | select($running == 0 and $ready == 0)
+      | [.metadata.namespace, .metadata.name, .metadata.deletionTimestamp] | @tsv' || true)"
+
+  while IFS=$'\t' read -r namespace name deleted_at; do
+    [[ -n "${namespace}" && -n "${name}" && -n "${deleted_at}" ]] || continue
+    deleted_epoch="$(date -d "${deleted_at}" +%s 2>/dev/null || true)"
+    [[ "${deleted_epoch}" =~ ^[0-9]+$ ]] || continue
+    age=$(( now - deleted_epoch ))
+    if (( age < STALE_TERMINATING_POD_SECONDS )); then
+      continue
+    fi
+    warn "Force-deleting stale terminating ReplicaSet pod ${namespace}/${name}; no containers are running and no finalizers are set."
+    run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false --force --grace-period=0
+  done <<< "${rows}"
+  mark_checkpoint longhorn_unlock_stale_replicaset_pods_cleared
+}
+
+restart_image_pull_backoff_pods_after_harbor_recovery() {
+  require_cmd jq
+  local pods namespace name
+  if ! harbor_endpoint_is_ready 1; then
+    warn "Skipping image-pull recovery sweep because Harbor registry API is still unhealthy."
+    return 1
+  fi
+
+  pods="$(kubectl get pods -A -o json \
+    | jq -r '.items[]
+      | select(.metadata.namespace != "longhorn-system")
+      | select(((.metadata.ownerReferences // []) | map(select(.kind=="ReplicaSet")) | length) > 0)
+      | select(([
+          (.status.containerStatuses[]?, .status.initContainerStatuses[]?)
+          | select(((.state.waiting.reason // "") | test("ImagePullBackOff|ErrImagePull|CreateContainerError|RunContainerError|InvalidImageName")))
+        ] | length) > 0)
+      | [.metadata.namespace, .metadata.name] | @tsv' \
+    | sort -u || true)"
+
+  if [[ -z "${pods}" ]]; then
+    log "image-pull-recovery=not-needed"
+    return 0
+  fi
+
+  while IFS=$'\t' read -r namespace name; do
+    [[ -z "${namespace}" || -z "${name}" ]] && continue
+    warn "Restarting controller-owned pod ${namespace}/${name} after Harbor recovery to clear image-pull backoff."
+    run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false
+  done <<< "${pods}"
+  mark_checkpoint longhorn_unlock_image_pull_backoff_restarted
+}
+
+resume_deadlock_automation_after_core_recovery() {
+  local gitea_endpoints
+  if ! harbor_endpoint_is_ready 1; then
+    warn "Keeping Flux reconcilers stopped because Harbor registry API is not healthy."
+    return 1
+  fi
+  gitea_endpoints="$(kubectl -n gitea get endpoints gitea -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)"
+  if [[ -z "${gitea_endpoints//[[:space:]]/}" ]]; then
+    warn "Keeping Flux reconcilers stopped because Gitea has no ready endpoints."
+    return 1
+  fi
+
+  patch_flux_suspend_all false
+  if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then
+    run kubectl -n flux-system scale deployment kustomize-controller --replicas=1
+  fi
+  if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then
+    run kubectl -n flux-system scale deployment helm-controller --replicas=1
+  fi
+  trigger_flux_reconcile_all || true
+  mark_checkpoint longhorn_unlock_automation_resumed
+}
+
+restart_longhorn_image_pull_backoff_pods() {
+  require_cmd jq
+  local pods namespace name
+  pods="$(kubectl -n longhorn-system get pods -o json \
+    | jq -r '.items[]
+      | select(([.status.containerStatuses[]?.state.waiting.reason] | map(select(. == "ImagePullBackOff" or . == "ErrImagePull")) | length) > 0)
+      | select(.metadata.name | test("^(longhorn-manager-|longhorn-driver-deployer-|longhorn-ui-)"))
+      | [.metadata.namespace, .metadata.name] | @tsv' || true)"
+  while IFS=$'\t' read -r namespace name; do
+    [[ -z "${namespace}" || -z "${name}" ]] && continue
+    run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false
+  done <<< "${pods}"
+}
+
+terminating_running_pods_for_node() {
+  local node="$1"
+  local now
+  now="$(date +%s)"
+  kubectl get pods -A -o json \
+    | jq -r --arg node "${node}" --argjson now "${now}" --argjson min_age "${STALE_TERMINATING_POD_SECONDS}" '
+      .items[]
+      | select(.spec.nodeName == $node)
+      | select(.metadata.deletionTimestamp != null)
+      | select(((.metadata.finalizers // []) | length) == 0)
+      | (.metadata.deletionTimestamp | fromdateiso8601) as $deleted
+      | select(($now - $deleted) >= $min_age)
+      | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) as $running
+      | select($running > 0)
+      | [.metadata.namespace, .metadata.name, ($running|tostring)] | @tsv' 2>/dev/null || true
+}
+
+stuck_terminating_runtime_cleanup_nodes() {
+  local now
+  now="$(date +%s)"
+  kubectl get pods -A -o json \
+    | jq -r --argjson now "${now}" --argjson min_age "${STALE_TERMINATING_POD_SECONDS}" '
+      .items[]
+      | select(.spec.nodeName != null)
+      | select(.metadata.deletionTimestamp != null)
+      | select(((.metadata.finalizers // []) | length) == 0)
+      | (.metadata.deletionTimestamp | fromdateiso8601) as $deleted
+      | select(($now - $deleted) >= $min_age)
+      | select(([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) > 0)
+      | .spec.nodeName' 2>/dev/null \
+    | sort -u
+}
+
+wait_for_node_ready() {
+  local node="$1"
+  local timeout_seconds="$2"
+  local start now ready
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: wait for node ${node} Ready"
+    return 0
+  fi
+  start="$(date +%s)"
+  while true; do
+    ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)"
+    if [[ "${ready}" == "True" ]]; then
+      log "node-ready=${node}"
+      return 0
+    fi
+    now="$(date +%s)"
+    if (( now - start >= timeout_seconds )); then
+      warn "Timed out waiting for node ${node} to return Ready after runtime restart."
+      return 1
+    fi
+    sleep 5
+  done
+}
+
+wait_for_terminating_running_pods_to_clear() {
+  local node="$1"
+  local timeout_seconds="$2"
+  local start now pods
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: wait for stuck terminating running pods to clear on ${node}"
+    return 0
+  fi
+  start="$(date +%s)"
+  while true; do
+    pods="$(terminating_running_pods_for_node "${node}")"
+    if [[ -z "${pods}" ]]; then
+      log "stuck-terminating-runtime-pods-cleared=${node}"
+      return 0
+    fi
+    now="$(date +%s)"
+    if (( now - start >= timeout_seconds )); then
+      warn "Stuck terminating pods with running containers remain on ${node}:"
+      while IFS= read -r line; do
+        [[ -n "${line}" ]] || continue
+        warn "  ${line}"
+      done <<< "${pods}"
+      return 1
+    fi
+    sleep 5
+  done
+}
+
+schedule_host_service_restart_via_helper() {
+  local node="$1"
+  local service_name="$2"
+  local delay_seconds="$3"
+  local unit_name host_command
+  unit_name="ananke-restart-${service_name}-$(date +%s)"
+  host_command="/usr/bin/systemd-run --unit ${unit_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl restart ${service_name} || /bin/systemctl restart ${service_name}'"
+  if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then
+    return 0
+  fi
+  run_host_command_via_helper "${node}" "restart-${node}-${service_name}" 120 "${host_command}"
+}
+
+recover_stuck_terminating_node_runtime_pods_after_deadlock() {
+  require_cmd jq
+  if [[ "${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}" != "1" && "${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}" != "true" ]]; then
+    warn "Skipping node runtime cleanup because RECOVERY_NODE_RUNTIME_RESTART_ENABLED=${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}."
+    return 0
+  fi
+
+  local nodes node ready worker control_plane restarted max_nodes restarted_nodes
+  nodes="$(stuck_terminating_runtime_cleanup_nodes || true)"
+  if [[ -z "${nodes}" ]]; then
+    log "node-runtime-cleanup=not-needed"
+    return 0
+  fi
+
+  max_nodes="${RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES}"
+  [[ "${max_nodes}" =~ ^[0-9]+$ ]] || max_nodes=1
+  restarted=0
+  restarted_nodes=""
+  while IFS= read -r node; do
+    [[ -n "${node}" ]] || continue
+    if (( restarted >= max_nodes )); then
+      warn "Node runtime cleanup limit reached (${max_nodes}); leaving remaining stuck nodes for a later Ananke pass."
+      break
+    fi
+    if csv_has_value "${RECOVERY_NODE_RUNTIME_RESTART_DENYLIST}" "${node}"; then
+      warn "Skipping node runtime cleanup on denylisted node ${node}."
+      continue
+    fi
+    ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)"
+    worker="$(kubectl get node "${node}" -o jsonpath='{.metadata.labels.node-role\.kubernetes\.io/worker}' 2>/dev/null || true)"
+    control_plane="$(kubectl get node "${node}" -o jsonpath='{.metadata.labels.node-role\.kubernetes\.io/control-plane}' 2>/dev/null || true)"
+    if [[ "${ready}" != "True" || "${worker}" != "true" || -n "${control_plane}" ]]; then
+      warn "Skipping node runtime cleanup on ${node}; ready=${ready:-unknown} worker=${worker:-false} control_plane=${control_plane:-false}."
+      continue
+    fi
+
+    warn "Cordoning ${node} and restarting only k3s-agent to clear stale terminating pods. Longhorn data-plane objects are not modified."
+    run kubectl cordon "${node}"
+    schedule_host_service_restart_via_helper "${node}" k3s-agent 5 || warn "Failed to schedule k3s-agent restart on ${node}."
+    restarted=$((restarted + 1))
+    restarted_nodes="${restarted_nodes}${node}"$'\n'
+  done <<< "${nodes}"
+
+  if (( restarted == 0 )); then
+    log "node-runtime-cleanup=no-eligible-nodes"
+    return 0
+  fi
+
+  sleep 15
+  while IFS= read -r node; do
+    [[ -n "${node}" ]] || continue
+    wait_for_node_ready "${node}" "${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS}" || true
+    wait_for_terminating_running_pods_to_clear "${node}" "${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS}" || true
+  done <<< "${restarted_nodes}"
+  mark_checkpoint longhorn_unlock_node_runtime_cleanup
+}
+
+wait_for_longhorn_endpoint() {
+  local endpoint="$1"
+  local timeout_seconds="$2"
+  local start now addresses
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: wait for Longhorn endpoint ${endpoint}"
+    return 0
+  fi
+  start="$(date +%s)"
+  while true; do
+    addresses="$(kubectl -n longhorn-system get endpoints "${endpoint}" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)"
+    if [[ -n "${addresses}" ]]; then
+      log "longhorn-endpoint-${endpoint}=ready"
+      return 0
+    fi
+    now="$(date +%s)"
+    if (( now - start >= timeout_seconds )); then
+      warn "Timed out waiting for Longhorn endpoint ${endpoint}."
+      return 1
+    fi
+    sleep 5
+  done
+}
+
+wait_for_longhorn_control_endpoints() {
+  local rc=0
+  wait_for_longhorn_endpoint longhorn-admission-webhook 180 || rc=1
+  wait_for_longhorn_endpoint longhorn-conversion-webhook 180 || rc=1
+  wait_for_longhorn_endpoint longhorn-backend 180 || rc=1
+  wait_for_longhorn_endpoint longhorn-recovery-backend 180 || rc=1
+  return "${rc}"
+}
+
+report_longhorn_unlock_status() {
+  log "Longhorn manager DaemonSet:"
+  kubectl -n longhorn-system get daemonset longhorn-manager \
+    -o custom-columns=NAME:.metadata.name,DESIRED:.status.desiredNumberScheduled,CURRENT:.status.currentNumberScheduled,READY:.status.numberReady,UPDATED:.status.updatedNumberScheduled,AVAILABLE:.status.numberAvailable || true
+  log "Longhorn manager pods:"
+  kubectl -n longhorn-system get pods -l app=longhorn-manager \
+    -o custom-columns=NAME:.metadata.name,READY:.status.containerStatuses[*].ready,STATUS:.status.phase,WAIT:.status.containerStatuses[*].state.waiting.reason,NODE:.spec.nodeName --sort-by=.spec.nodeName || true
+  log "Longhorn instance managers:"
+  kubectl -n longhorn-system get instancemanagers.longhorn.io \
+    -o custom-columns=NAME:.metadata.name,STATE:.status.currentState,NODE:.spec.nodeID,IMAGE:.spec.image,TYPE:.spec.type --sort-by=.spec.nodeID || true
+  log "Longhorn volume summary:"
+  kubectl -n longhorn-system get volumes.longhorn.io -o json \
+    | jq -r '.items | group_by(.status.state + "/" + (.status.robustness // "none"))[] | [(.[0].status.state + "/" + (.[0].status.robustness // "none")), length] | @tsv' 2>/dev/null \
+    | sort || true
+}
+
 shutdown_namespace_excluded() {
   local ns="$1"
   [[ "${ns}" =~ ${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX} ]]
@@ -1105,11 +1788,29 @@ select_ready_arm64_worker() {
   return 1
 }
 
+discover_harbor_pinned_node() {
+  kubectl -n harbor get helmrelease harbor \
+    -o jsonpath='{range .spec.values..nodeSelector}{.kubernetes\.io/hostname}{"\n"}{end}' 2>/dev/null \
+    | sed '/^[[:space:]]*$/d' \
+    | sort -u \
+    | head -n 1
+}
+
 ensure_harbor_target_node() {
   if node_is_ready "${HARBOR_TARGET_NODE}"; then
     return 0
   fi
-  local fallback
+  local fallback pinned
+  pinned="$(discover_harbor_pinned_node || true)"
+  if node_is_ready "${pinned}"; then
+    if [[ -n "${HARBOR_TARGET_NODE}" ]]; then
+      warn "Configured harbor target node '${HARBOR_TARGET_NODE}' is not Ready; using live Harbor pin '${pinned}' instead."
+    else
+      log "harbor-target-node discovered from live HelmRelease: ${pinned}"
+    fi
+    HARBOR_TARGET_NODE="${pinned}"
+    return 0
+  fi
   fallback="$(select_ready_arm64_worker || true)"
   [[ -n "${fallback}" ]] || die "No Ready arm64 worker available for Harbor bootstrap target."
   if [[ -n "${HARBOR_TARGET_NODE}" ]]; then
@@ -1209,21 +1910,54 @@ check_harbor_stack() {
   wait_for_rollout harbor deployment harbor-registry 10m
 }
 
-check_harbor_endpoint() {
+harbor_registry_response_valid() {
+  local code="$1"
+  local headers_file="$2"
+  local body_file="$3"
+  local content_type
+  case "${code}" in
+    200|401) ;;
+    *) return 1 ;;
+  esac
+  content_type="$(awk 'BEGIN{IGNORECASE=1} /^content-type:/ {print tolower($0); exit}' "${headers_file}" 2>/dev/null || true)"
+  if [[ "${content_type}" == *"text/html"* ]]; then
+    return 1
+  fi
+  if grep -Eiq '^docker-distribution-api-version:' "${headers_file}" 2>/dev/null; then
+    return 0
+  fi
+  if [[ "${code}" == "401" ]] && grep -Eiq 'unauthorized|authentication required' "${body_file}" 2>/dev/null; then
+    return 0
+  fi
+  return 1
+}
+
+harbor_endpoint_is_ready() {
+  local quiet="${1:-0}"
   if [[ "${EXECUTE}" -eq 0 ]]; then
     log "DRY-RUN: curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/"
     return 0
   fi
-  local code
-  code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
-  case "${code}" in
-    200|401)
-      log "harbor-endpoint=http-${code}"
-      ;;
-    *)
-      die "Harbor endpoint check failed with HTTP ${code:-unknown}"
-      ;;
-  esac
+  local headers_file body_file code rc content_type
+  headers_file="$(mktemp)"
+  body_file="$(mktemp)"
+  rc=0
+  code="$(curl -ksS --max-time "${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}" -D "${headers_file}" -o "${body_file}" -w '%{http_code}' https://registry.bstein.dev/v2/ || rc=$?)"
+  content_type="$(awk 'BEGIN{IGNORECASE=1} /^content-type:/ {print tolower($0); exit}' "${headers_file}" 2>/dev/null || true)"
+  if (( rc == 0 )) && harbor_registry_response_valid "${code}" "${headers_file}" "${body_file}"; then
+    [[ "${quiet}" == "1" ]] || log "harbor-endpoint=http-${code} registry-api=true"
+    rm -f "${headers_file}" "${body_file}"
+    return 0
+  fi
+  [[ "${quiet}" == "1" ]] || warn "Harbor registry API check failed: http=${code:-unknown} content-type=${content_type:-unknown} rc=${rc}"
+  rm -f "${headers_file}" "${body_file}"
+  return 1
+}
+
+check_harbor_endpoint() {
+  if ! harbor_endpoint_is_ready 0; then
+    die "Harbor endpoint is not serving the registry API."
+  fi
 }
 
 wait_for_pod_phase() {
@@ -1251,9 +1985,7 @@ wait_for_pod_phase() {
 
 harbor_is_ready() {
   kubectl -n harbor get deploy harbor-core harbor-jobservice harbor-portal harbor-registry >/dev/null 2>&1 || return 1
-  local code
-  code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
-  [[ "${code}" == "200" || "${code}" == "401" ]]
+  harbor_endpoint_is_ready 1
 }
 
 run_harbor_pull_canary() {
@@ -1355,6 +2087,71 @@ POD
   timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
 }
 
+hostroot_pod_for_node() {
+  local node="$1"
+  kubectl -n "${NODE_HELPER_NAMESPACE}" get pods \
+    -l app=node-image-sweeper \
+    --field-selector "spec.nodeName=${node},status.phase=Running" \
+    -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true
+}
+
+run_hostroot_pod_script() {
+  local node="$1"
+  local purpose="$2"
+  local timeout_seconds="$3"
+  local script_content="$4"
+  local pod encoded_script
+  pod="$(hostroot_pod_for_node "${node}")"
+  [[ -n "${pod}" ]] || return 1
+  encoded_script="$(printf '%s' "${script_content}" | base64 -w0)"
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: hostroot pod ${pod} on ${node} for ${purpose}"
+    return 0
+  fi
+  timeout "${timeout_seconds}" kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "printf '%s' '${encoded_script}' | base64 -d | chroot /host /bin/sh -seu"
+}
+
+run_hostroot_pod_bundle_import() {
+  local node="$1"
+  local timeout_seconds="$2"
+  local images_text="$3"
+  local pod refresh_script verify_script encoded_script
+  pod="$(hostroot_pod_for_node "${node}")"
+  [[ -n "${pod}" ]] || return 1
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: stream ${HARBOR_BUNDLE_FILE} through hostroot pod ${pod} on ${node}"
+    return 0
+  fi
+  if [[ "${REFRESH_BOOTSTRAP_IMAGE_ALIASES}" == "1" ]]; then
+    refresh_script=$(cat <<SCRIPT
+set -eu
+while IFS= read -r image; do
+  [ -z "\${image}" ] && continue
+  /usr/local/bin/k3s ctr images rm "\${image}" >/dev/null 2>&1 || true
+done <<'IMAGES'
+${images_text}
+IMAGES
+SCRIPT
+)
+    encoded_script="$(printf '%s' "${refresh_script}" | base64 -w0)"
+    timeout 120 kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "printf '%s' '${encoded_script}' | base64 -d | chroot /host /bin/sh -seu"
+  fi
+  timeout "${timeout_seconds}" kubectl -n "${NODE_HELPER_NAMESPACE}" exec -i "${pod}" -- \
+    chroot /host /bin/sh -ceu '/usr/bin/zstd -dc | /usr/local/bin/k3s ctr images import -' < "${HARBOR_BUNDLE_FILE}"
+  verify_script=$(cat <<SCRIPT
+set -eu
+while IFS= read -r image; do
+  [ -z "\${image}" ] && continue
+  /usr/local/bin/k3s ctr images ls | awk '{print \$1}' | grep -Fx "\${image}" >/dev/null
+done <<'IMAGES'
+${images_text}
+IMAGES
+SCRIPT
+)
+  encoded_script="$(printf '%s' "${verify_script}" | base64 -w0)"
+  timeout 120 kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "printf '%s' '${encoded_script}' | base64 -d | chroot /host /bin/sh -seu"
+}
+
 run_host_command_via_helper() {
   local node="$1"
   local purpose="$2"
@@ -1454,7 +2251,7 @@ ${node_affinity_block}
       containers:
         - name: helper
           image: ${NODE_HELPER_IMAGE}
-          imagePullPolicy: Always
+          imagePullPolicy: IfNotPresent
           command: ["/bin/sh", "-ceu", "sleep 300"]
 DS
   local i desired ready
@@ -1489,7 +2286,7 @@ cleanup_prewarm_daemonset() {
 }
 
 start_bundle_server() {
-  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}"
+  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Bootstrap bundle not found at ${HARBOR_BUNDLE_FILE}"
   require_cmd python3
   local bundle_dir bundle_name
   bundle_dir="$(dirname "${HARBOR_BUNDLE_FILE}")"
@@ -1522,21 +2319,224 @@ stop_bundle_server() {
 trap stop_bundle_server EXIT
 
 control_host_ip() {
-  hostname -I | awk '{print $1}'
+  local ip_addr
+  if command -v hostname >/dev/null 2>&1; then
+    ip_addr="$(hostname -I 2>/dev/null | awk '{print $1}')"
+    if [[ -n "${ip_addr}" ]]; then
+      printf '%s\n' "${ip_addr}"
+      return 0
+    fi
+  fi
+  if command -v ip >/dev/null 2>&1; then
+    ip_addr="$(ip -4 route get 1.1.1.1 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i=="src") {print $(i+1); exit}}')"
+    if [[ -n "${ip_addr}" ]]; then
+      printf '%s\n' "${ip_addr}"
+      return 0
+    fi
+  fi
+  die "Unable to determine control host IP; install hostname or iproute2."
 }
 
-seed_harbor_images() {
-  local images_text control_ip bundle_name script_content seed_rc=0
-  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}"
+bootstrap_images_text() {
+  [[ -f "${BOOTSTRAP_IMAGES_FILE}" ]] || die "Bootstrap image list not found at ${BOOTSTRAP_IMAGES_FILE}"
+  sed '/^[[:space:]]*#/d;/^[[:space:]]*$/d' "${BOOTSTRAP_IMAGES_FILE}"
+}
+
+longhorn_unlock_images_text() {
+  [[ -f "${LONGHORN_UNLOCK_IMAGES_FILE}" ]] || die "Longhorn unlock image list not found at ${LONGHORN_UNLOCK_IMAGES_FILE}"
+  sed '/^[[:space:]]*#/d;/^[[:space:]]*$/d' "${LONGHORN_UNLOCK_IMAGES_FILE}"
+}
+
+ssh_host_for_node() {
+  local node="$1"
+  case "${node}" in
+    titan-23) printf '%s\n' "oceanus" ;;
+    *) printf '%s\n' "${node}" ;;
+  esac
+}
+
+ssh_recovery_opts() {
+  printf '%s\n' \
+    -o BatchMode=yes \
+    -o ConnectTimeout=10 \
+    -o StrictHostKeyChecking=accept-new \
+    -o UserKnownHostsFile="${LONGHORN_UNLOCK_SSH_KNOWN_HOSTS}"
+}
+
+run_ssh_longhorn_bundle_import() {
+  local node="$1"
+  local bundle_file="$2"
+  local images_text="$3"
+  local host remote_bundle host_script
+  local -a ssh_opts
+  [[ -f "${bundle_file}" ]] || die "Longhorn unlock bundle not found at ${bundle_file}"
+  host="$(ssh_host_for_node "${node}")"
+  remote_bundle="/tmp/$(basename "${bundle_file}")"
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: scp ${bundle_file} to ${host}:${remote_bundle} and import into k3s containerd"
+    return 0
+  fi
+  mapfile -t ssh_opts < <(ssh_recovery_opts)
+  log "ssh-image-seed-node=${node} host=${host} bundle=$(basename "${bundle_file}")"
+  scp "${ssh_opts[@]}" "${bundle_file}" "${host}:${remote_bundle}"
+  host_script=$(cat <<SCRIPT
+set -eu
+bundle='${remote_bundle}'
+if [ ! -s "\${bundle}" ]; then
+  echo "bundle missing or empty: \${bundle}" >&2
+  exit 1
+fi
+while IFS= read -r image; do
+  [ -z "\${image}" ] && continue
+  /usr/bin/timeout 60 /usr/local/bin/k3s crictl rmi "\${image}" >/dev/null 2>&1 || true
+  /usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images rm "\${image}" >/dev/null 2>&1 || true
+done <<'IMAGES'
+${images_text}
+IMAGES
+/usr/bin/zstd -dc "\${bundle}" | /usr/bin/timeout 1800 /usr/local/bin/k3s ctr -n k8s.io images import --platform linux/${BOOTSTRAP_BUNDLE_ARCH} -
+while IFS= read -r image; do
+  [ -z "\${image}" ] && continue
+  repo="\${image%:*}"
+  digest_ref="\$(/usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images ls -q | grep -E "^\${repo}@sha256:" | head -n 1 || true)"
+  if [ -n "\${digest_ref}" ]; then
+    /usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images tag --force "\${digest_ref}" "\${image}" >/dev/null 2>&1 || true
+  fi
+  /usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images ls -q | grep -Fx "\${image}" >/dev/null
+done <<'IMAGES'
+${images_text}
+IMAGES
+SCRIPT
+)
+  if ! run_hostroot_pod_script "${node}" "longhorn-unlock-import-${node}" 1800 "${host_script}"; then
+    warn "Hostroot import failed on ${node}; SSH staging succeeded but no sudo-capable remote import was attempted."
+    return 1
+  fi
+}
+
+longhorn_manager_image_pull_nodes() {
+  kubectl -n longhorn-system get pods -l app=longhorn-manager -o json \
+    | jq -r --arg image "${LONGHORN_MANAGER_IMAGE}" '.items[]
+      | select(.spec.nodeName != null)
+      | select([.status.containerStatuses[]?.state.waiting.reason]
+          | map(select(. == "ImagePullBackOff" or . == "ErrImagePull")) | length > 0)
+      | select([.spec.containers[]?.image] | index($image))
+      | .spec.nodeName' 2>/dev/null \
+    | sort -u
+}
+
+repair_longhorn_manager_cache_node() {
+  local node="$1"
+  local host remote_bundle host_script
+  local -a ssh_opts
+  [[ -f "${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}" ]] || die "Longhorn manager cache bundle missing at ${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}."
+  host="$(ssh_host_for_node "${node}")"
+  remote_bundle="/tmp/$(basename "${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}")"
+  if [[ "${EXECUTE}" -eq 0 ]]; then
+    log "DRY-RUN: repair ${LONGHORN_MANAGER_IMAGE} cache on ${node} using ${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}"
+    return 0
+  fi
+  mapfile -t ssh_opts < <(ssh_recovery_opts)
+  log "longhorn-manager-cache-repair-node=${node} host=${host}"
+  scp "${ssh_opts[@]}" "${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}" "${host}:${remote_bundle}"
+  host_script=$(cat <<SCRIPT
+set -eu
+image='${LONGHORN_MANAGER_IMAGE}'
+bundle='${remote_bundle}'
+if [ ! -s "\${bundle}" ]; then
+  echo "manager cache bundle missing or empty: \${bundle}" >&2
+  exit 1
+fi
+/usr/bin/timeout 60 /usr/local/bin/k3s crictl rmi "\${image}" >/dev/null 2>&1 || true
+/usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images rm "\${image}" >/dev/null 2>&1 || true
+/usr/bin/timeout 600 /usr/local/bin/k3s ctr -n k8s.io images import --platform linux/${BOOTSTRAP_BUNDLE_ARCH} "\${bundle}"
+repo="\${image%:*}"
+digest_ref="\$(/usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images ls -q | grep -E "^\${repo}@sha256:" | head -n 1 || true)"
+if [ -n "\${digest_ref}" ]; then
+  /usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images tag --force "\${digest_ref}" "\${image}" >/dev/null 2>&1 || true
+fi
+if ! /usr/bin/timeout 60 /usr/local/bin/k3s crictl inspecti "\${image}" >/dev/null 2>&1; then
+  echo "warning: CRI inspect did not see \${image}; kubelet will be verified by pod state" >&2
+fi
+/usr/bin/timeout 60 /usr/local/bin/k3s ctr -n k8s.io images ls -q | grep -Fx "\${image}" >/dev/null
+SCRIPT
+)
+  run_hostroot_pod_script "${node}" "longhorn-manager-cache-repair-${node}" 900 "${host_script}"
+}
+
+repair_longhorn_manager_cache_deadlock() {
+  local nodes node rc=0
+  nodes="$(longhorn_manager_image_pull_nodes || true)"
+  if [[ -z "${nodes}" ]]; then
+    log "longhorn-manager-cache-repair=not-needed"
+    return 0
+  fi
+  if [[ ! -f "${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}" ]]; then
+    warn "Longhorn manager cache bundle not found at ${LONGHORN_MANAGER_CACHE_BUNDLE_FILE}; skipping surgical manager cache repair."
+    return 1
+  fi
+  while IFS= read -r node; do
+    [[ -z "${node}" ]] && continue
+    repair_longhorn_manager_cache_node "${node}" || rc=$?
+  done <<< "${nodes}"
+  return "${rc}"
+}
+
+seed_longhorn_unlock_images_ssh() {
+  local images_text nodes node rc=0
+  [[ -f "${LONGHORN_UNLOCK_BUNDLE_FILE}" ]] || die "Longhorn unlock bundle missing at ${LONGHORN_UNLOCK_BUNDLE_FILE}."
+  images_text="$(longhorn_unlock_images_text)"
+  [[ -n "${images_text}" ]] || die "No Longhorn unlock images listed in ${LONGHORN_UNLOCK_IMAGES_FILE}"
+  nodes="$(list_ready_longhorn_seed_nodes)"
+  [[ -n "${nodes}" ]] || die "No Ready Longhorn nodes match architecture ${BOOTSTRAP_BUNDLE_ARCH}."
+  while IFS= read -r node; do
+    [[ -z "${node}" ]] && continue
+    run_ssh_longhorn_bundle_import "${node}" "${LONGHORN_UNLOCK_BUNDLE_FILE}" "${images_text}" || rc=$?
+    if [[ "${rc}" -ne 0 ]]; then
+      warn "SSH image import failed on ${node}."
+      break
+    fi
+  done <<< "${nodes}"
+  return "${rc}"
+}
+
+list_ready_longhorn_seed_nodes() {
+  kubectl get nodes -l longhorn-host=true \
+    -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status,ARCH:.metadata.labels.kubernetes\.io/arch' \
+    --no-headers 2>/dev/null \
+    | awk -v arch="${BOOTSTRAP_BUNDLE_ARCH}" '$2=="True" && $3==arch {print $1}'
+}
+
+list_bootstrap_seed_nodes() {
+  local nodes
+  nodes="$(list_ready_longhorn_seed_nodes || true)"
+  if [[ -n "${HARBOR_TARGET_NODE}" ]] && node_is_ready "${HARBOR_TARGET_NODE}"; then
+    nodes="$(printf '%s\n%s\n' "${nodes}" "${HARBOR_TARGET_NODE}")"
+  fi
+  printf '%s\n' "${nodes}" | sed '/^[[:space:]]*$/d' | sort -u
+}
+
+seed_bootstrap_images() {
+  local images_text control_ip bundle_name helper_script_content seed_rc=0 node nodes
+  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Bootstrap bundle not found at ${HARBOR_BUNDLE_FILE}"
   ensure_harbor_target_node
   ensure_harbor_host_label
-  images_text="$(sed '/^[[:space:]]*#/d;/^[[:space:]]*$/d' "${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt")"
-  [[ -n "${images_text}" ]] || die "No Harbor images listed in ${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt"
+  images_text="$(bootstrap_images_text)"
+  [[ -n "${images_text}" ]] || die "No bootstrap images listed in ${BOOTSTRAP_IMAGES_FILE}"
+  nodes="$(list_bootstrap_seed_nodes)"
+  [[ -n "${nodes}" ]] || die "No Ready Longhorn or Harbor bootstrap nodes available for image seed."
   bundle_name="$(basename "${HARBOR_BUNDLE_FILE}")"
   start_bundle_server
   control_ip="$(control_host_ip)"
-  script_content=$(cat <<SCRIPT
+  helper_script_content=$(cat <<SCRIPT
 set -euo pipefail
+if [[ "${REFRESH_BOOTSTRAP_IMAGE_ALIASES}" == "1" ]]; then
+  while IFS= read -r image; do
+    [[ -z "\${image}" ]] && continue
+    nsenter --target 1 --mount --uts --ipc --net --pid /usr/local/bin/k3s ctr images rm "\${image}" >/dev/null 2>&1 || true
+  done <<'IMAGES'
+${images_text}
+IMAGES
+fi
 curl -fsSL "http://${control_ip}:${BUNDLE_HTTP_PORT}/${bundle_name}" \
   | zstd -dc \
   | nsenter --target 1 --mount --uts --ipc --net --pid /usr/local/bin/k3s ctr images import -
@@ -1548,10 +2548,45 @@ ${images_text}
 IMAGES
 SCRIPT
 )
-  run_helper_pod "${HARBOR_TARGET_NODE}" "harbor-seed" 900 "${script_content}" || seed_rc=$?
+  while IFS= read -r node; do
+    [[ -n "${node}" ]] || continue
+    log "bootstrap-image-seed-node=${node}"
+    if run_hostroot_pod_bundle_import "${node}" 1800 "${images_text}"; then
+      continue
+    fi
+    warn "Hostroot seed pod unavailable or failed on ${node}; falling back to dedicated helper pod."
+    run_helper_pod "${node}" "bootstrap-seed-${node}" 1800 "${helper_script_content}" || seed_rc=$?
+    if [[ "${seed_rc}" -ne 0 ]]; then
+      break
+    fi
+  done <<< "${nodes}"
   stop_bundle_server
   [[ "${seed_rc}" -eq 0 ]] || return "${seed_rc}"
-  mark_checkpoint startup_harbor_seeded
+  BOOTSTRAP_IMAGES_SEEDED=1
+  mark_checkpoint startup_bootstrap_images_seeded
+}
+
+seed_bootstrap_images_if_needed() {
+  if [[ "${BOOTSTRAP_IMAGES_SEEDED}" -eq 1 ]]; then
+    log "Bootstrap images already seeded during this run."
+    return 0
+  fi
+  if harbor_is_ready; then
+    log "Harbor registry API is healthy; skipping bootstrap image seed."
+    return 0
+  fi
+  if [[ "${SKIP_HARBOR_SEED}" -ne 0 ]]; then
+    warn "Skipping bootstrap image seed/import by request."
+    return 0
+  fi
+  if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
+    prewarm_node_helper_image
+  fi
+  seed_bootstrap_images
+}
+
+seed_harbor_images() {
+  seed_bootstrap_images
 }
 
 bootstrap_local_minimal() {
@@ -1665,6 +2700,9 @@ status_report() {
   echo "shutdown_mode=${SHUTDOWN_MODE}"
   echo "bundle_file=${HARBOR_BUNDLE_FILE}"
   echo "bundle_present=$([[ -f "${HARBOR_BUNDLE_FILE}" ]] && echo true || echo false)"
+  echo "bootstrap_images_file=${BOOTSTRAP_IMAGES_FILE}"
+  echo "bootstrap_images_file_present=$([[ -f "${BOOTSTRAP_IMAGES_FILE}" ]] && echo true || echo false)"
+  echo "bootstrap_bundle_arch=${BOOTSTRAP_BUNDLE_ARCH}"
   echo "replica_snapshot_file=${REPLICA_SNAPSHOT_FILE}"
   echo "replica_snapshot_present=$([[ -f "${REPLICA_SNAPSHOT_FILE}" ]] && echo true || echo false)"
   echo "node_helper_image=${NODE_HELPER_IMAGE}"
@@ -1839,6 +2877,7 @@ startup_flow() {
   fi
 
   assert_flux_source_expected
+  seed_bootstrap_images_if_needed
 
   if [[ "${SKIP_LOCAL_BOOTSTRAP}" -eq 0 ]]; then
     if ! kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q True; then
@@ -1855,14 +2894,7 @@ startup_flow() {
         if harbor_is_ready; then
           log "Harbor already healthy; skipping Harbor seed/bootstrap."
         else
-          if [[ "${SKIP_HARBOR_SEED}" -eq 0 ]]; then
-            if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
-              prewarm_node_helper_image
-            fi
-            seed_harbor_images
-          else
-            warn "Skipping Harbor seed/import by request."
-          fi
+          seed_bootstrap_images_if_needed
           bootstrap_local_harbor
           mark_checkpoint startup_local_harbor_applied
           check_harbor_stack
@@ -1892,7 +2924,8 @@ startup_flow() {
 }
 
 prepare_flow() {
-  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
+  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Bootstrap bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
+  [[ -f "${BOOTSTRAP_IMAGES_FILE}" ]] || die "Bootstrap image list missing at ${BOOTSTRAP_IMAGES_FILE}."
   ensure_harbor_target_node
   ensure_harbor_host_label
   mark_checkpoint prepare_harbor_host_labeled
@@ -1904,15 +2937,64 @@ prepare_flow() {
 }
 
 harbor_seed_flow() {
-  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
+  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Bootstrap bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
+  [[ -f "${BOOTSTRAP_IMAGES_FILE}" ]] || die "Bootstrap image list missing at ${BOOTSTRAP_IMAGES_FILE}."
   if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
     prewarm_node_helper_image
     mark_checkpoint harbor_seed_helper_prewarmed
   fi
-  seed_harbor_images
+  seed_bootstrap_images
   check_harbor_endpoint
   run_harbor_pull_canary
-  log "Harbor seed flow complete."
+  log "Bootstrap seed flow complete."
+}
+
+longhorn_unlock_flow() {
+  require_cmd jq
+  [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Bootstrap bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
+  [[ -f "${BOOTSTRAP_IMAGES_FILE}" ]] || die "Bootstrap image list missing at ${BOOTSTRAP_IMAGES_FILE}."
+  if ! wait_for_api; then
+    die "Kubernetes API did not become reachable in time."
+  fi
+
+  warn "Longhorn unlock mode will not mutate Longhorn volumes, replicas, engines, disks, PVs, or PVCs."
+  if ! harbor_endpoint_is_ready 1; then
+    warn "Harbor registry API is unhealthy; using local bootstrap image cache path."
+  fi
+
+  REFRESH_BOOTSTRAP_IMAGE_ALIASES=1
+  freeze_longhorn_deadlock_automation
+  ensure_longhorn_cache_first_policy
+  remove_longhorn_manager_prepull_sidecar
+  free_longhorn_instance_manager_headroom
+  delete_failed_nonstorage_pods_for_headroom
+  repair_longhorn_manager_cache_deadlock || warn "Surgical Longhorn manager cache repair did not complete on every affected node."
+  if [[ "${SKIP_LONGHORN_UNLOCK_BUNDLE_SEED}" -eq 0 ]]; then
+    seed_longhorn_unlock_images_ssh
+  else
+    warn "Skipping full Longhorn unlock bundle seed by operator request."
+  fi
+  restart_longhorn_image_pull_backoff_pods
+  recover_stuck_terminating_node_runtime_pods_after_deadlock
+
+  if [[ "${EXECUTE}" -eq 1 ]]; then
+    kubectl -n longhorn-system rollout status daemonset/longhorn-manager --timeout=5m || warn "longhorn-manager DaemonSet did not fully roll out yet."
+    sleep 30
+  fi
+  wait_for_longhorn_control_endpoints || true
+  restart_stale_critical_pods_after_longhorn_unlock
+  restart_harbor_after_postgres_recovery || warn "Harbor did not fully recover after Postgres became ready."
+  if harbor_endpoint_is_ready 1; then
+    run_harbor_pull_canary || warn "Harbor pull canary failed after registry recovery."
+    restore_recovered_worker_scheduling_after_deadlock
+    restore_longhorn_unlock_optional_workloads
+    delete_safe_stale_terminating_replicaset_pods_after_deadlock
+    restart_image_pull_backoff_pods_after_harbor_recovery || true
+    resume_deadlock_automation_after_core_recovery || true
+  fi
+  report_longhorn_unlock_status
+  mark_checkpoint longhorn_unlock_complete
+  log "Longhorn unlock flow complete."
 }
 
 load_recovery_state
@@ -1920,6 +3002,8 @@ log "mode=${MODE} execute=${EXECUTE}"
 log "shutdown-mode=${SHUTDOWN_MODE}"
 log "recovery-state-file=${RECOVERY_STATE_FILE}"
 log "bundle-file=${HARBOR_BUNDLE_FILE}"
+log "bootstrap-images-file=${BOOTSTRAP_IMAGES_FILE}"
+log "bootstrap-bundle-arch=${BOOTSTRAP_BUNDLE_ARCH}"
 log "node-helper-image=${NODE_HELPER_IMAGE}"
 log "harbor-target-node-config=${HARBOR_TARGET_NODE:-auto}"
 log "harbor-canary-node-config=${HARBOR_CANARY_NODE:-auto}"
@@ -1936,9 +3020,12 @@ case "${MODE}" in
   prepare)
     prepare_flow
     ;;
-  harbor-seed)
+  bootstrap-seed|harbor-seed|longhorn-seed)
     harbor_seed_flow
     ;;
+  longhorn-unlock)
+    longhorn_unlock_flow
+    ;;
   shutdown)
     planned_shutdown
     ;;