diff --git a/infrastructure/longhorn/core/helmrelease.yaml b/infrastructure/longhorn/core/helmrelease.yaml index 406b76d0..7c1eb2ef 100644 --- a/infrastructure/longhorn/core/helmrelease.yaml +++ b/infrastructure/longhorn/core/helmrelease.yaml @@ -37,7 +37,7 @@ spec: createSecret: false registrySecret: longhorn-registry image: - pullPolicy: Always + pullPolicy: IfNotPresent longhorn: engine: repository: registry.bstein.dev/infra/longhorn-engine @@ -80,7 +80,7 @@ spec: repository: registry.bstein.dev/infra/longhorn-livenessprobe tag: v2.16.0 defaultSettings: - systemManagedPodsImagePullPolicy: Always + systemManagedPodsImagePullPolicy: if-not-present taintToleration: veles.bstein.dev/simulation=true:NoSchedule longhornManager: tolerations: diff --git a/scripts/bootstrap/harbor-bootstrap-images.txt b/scripts/bootstrap/harbor-bootstrap-images.txt index fae7a6f6..414eacb8 100644 --- a/scripts/bootstrap/harbor-bootstrap-images.txt +++ b/scripts/bootstrap/harbor-bootstrap-images.txt @@ -1,4 +1,4 @@ -# Harbor cold-start bootstrap images. +# Harbor and Longhorn cold-start bootstrap images. registry.bstein.dev/infra/harbor-core:v2.14.1-arm64 registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64 registry.bstein.dev/infra/harbor-portal:v2.14.1-arm64 @@ -7,3 +7,18 @@ registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64 registry.bstein.dev/infra/harbor-redis:v2.14.1-arm64 registry.bstein.dev/infra/harbor-nginx:v2.14.1-arm64 registry.bstein.dev/infra/harbor-prepare:v2.14.1-arm64 + +# Longhorn must be able to start before Harbor is fully healthy. +registry.bstein.dev/infra/longhorn-engine:v1.8.2 +registry.bstein.dev/infra/longhorn-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-ui:v1.8.2 +registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-share-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56 +registry.bstein.dev/infra/longhorn-csi-attacher:v4.9.0 +registry.bstein.dev/infra/longhorn-csi-provisioner:v5.3.0 +registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:v2.14.0 +registry.bstein.dev/infra/longhorn-csi-resizer:v1.13.2 +registry.bstein.dev/infra/longhorn-csi-snapshotter:v8.2.0 +registry.bstein.dev/infra/longhorn-livenessprobe:v2.16.0 diff --git a/scripts/bootstrap/longhorn-unlock-images.txt b/scripts/bootstrap/longhorn-unlock-images.txt new file mode 100644 index 00000000..7cfe3794 --- /dev/null +++ b/scripts/bootstrap/longhorn-unlock-images.txt @@ -0,0 +1,14 @@ +# Longhorn images needed when Harbor is unhealthy during storage recovery. +registry.bstein.dev/infra/longhorn-engine:v1.8.2 +registry.bstein.dev/infra/longhorn-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-ui:v1.8.2 +registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-share-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2 +registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56 +registry.bstein.dev/infra/longhorn-csi-attacher:v4.9.0 +registry.bstein.dev/infra/longhorn-csi-provisioner:v5.3.0 +registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:v2.14.0 +registry.bstein.dev/infra/longhorn-csi-resizer:v1.13.2 +registry.bstein.dev/infra/longhorn-csi-snapshotter:v8.2.0 +registry.bstein.dev/infra/longhorn-livenessprobe:v2.16.0 diff --git a/scripts/bootstrap/recovery-config.env b/scripts/bootstrap/recovery-config.env index a8bae234..ee1dcab7 100644 --- a/scripts/bootstrap/recovery-config.env +++ b/scripts/bootstrap/recovery-config.env @@ -4,7 +4,9 @@ EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git" SHUTDOWN_MODE="host-poweroff" STATE_SUBDIR=".local/share/ananke" HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst" -HARBOR_TARGET_NODE="" +BOOTSTRAP_BUNDLE_ARCH="arm64" +RECOVERY_UNCORDON_DENYLIST="titan-18,titan-22,titan-24" +HARBOR_TARGET_NODE="titan-11" HARBOR_CANARY_NODE="" HARBOR_HOST_LABEL_KEY="ananke.bstein.dev/harbor-bootstrap" HARBOR_CANARY_IMAGE="registry.bstein.dev/bstein/kubectl:1.35.0" @@ -33,4 +35,4 @@ STARTUP_INCLUDE_INGRESS_CHECKS="1" STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404" STARTUP_IGNORE_INGRESS_HOSTS_REGEX="" STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10" -STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||' +STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|401|unauthorized|] [--bundle-file ] [--docker-config ] [--platform ] +Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file ] [--bundle-file ] [--docker-config ] [--platform ] [--zstd-level ] USAGE exit 0 ;; @@ -47,12 +52,54 @@ if [[ ${#IMAGES[@]} -eq 0 ]]; then exit 1 fi +source_image_for_alias() { + local image="$1" + local tag="${image##*:}" + case "${image}" in + registry.bstein.dev/infra/longhorn-engine:*) echo "docker.io/longhornio/longhorn-engine:${tag}" ;; + registry.bstein.dev/infra/longhorn-manager:*) echo "docker.io/longhornio/longhorn-manager:${tag}" ;; + registry.bstein.dev/infra/longhorn-ui:*) echo "docker.io/longhornio/longhorn-ui:${tag}" ;; + registry.bstein.dev/infra/longhorn-instance-manager:*) echo "docker.io/longhornio/longhorn-instance-manager:${tag}" ;; + registry.bstein.dev/infra/longhorn-share-manager:*) echo "docker.io/longhornio/longhorn-share-manager:${tag}" ;; + registry.bstein.dev/infra/longhorn-backing-image-manager:*) echo "docker.io/longhornio/backing-image-manager:${tag}" ;; + registry.bstein.dev/infra/longhorn-support-bundle-kit:*) echo "docker.io/longhornio/support-bundle-kit:${tag}" ;; + registry.bstein.dev/infra/longhorn-csi-attacher:*) echo "registry.k8s.io/sig-storage/csi-attacher:${tag}" ;; + registry.bstein.dev/infra/longhorn-csi-provisioner:*) echo "registry.k8s.io/sig-storage/csi-provisioner:${tag}" ;; + registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:*) echo "registry.k8s.io/sig-storage/csi-node-driver-registrar:${tag}" ;; + registry.bstein.dev/infra/longhorn-csi-resizer:*) echo "registry.k8s.io/sig-storage/csi-resizer:${tag}" ;; + registry.bstein.dev/infra/longhorn-csi-snapshotter:*) echo "registry.k8s.io/sig-storage/csi-snapshotter:${tag}" ;; + registry.bstein.dev/infra/longhorn-livenessprobe:*) echo "registry.k8s.io/sig-storage/livenessprobe:${tag}" ;; + *) echo "${image}" ;; + esac +} + +pull_or_tag_image() { + local image="$1" + local source_image + if docker image inspect "${image}" >/dev/null 2>&1; then + echo "Using cached ${image}" >&2 + return 0 + fi + echo "Pulling ${image}" >&2 + if docker pull --platform "${PLATFORM}" "${image}" >/dev/null; then + return 0 + fi + source_image="$(source_image_for_alias "${image}")" + if [[ "${source_image}" == "${image}" ]]; then + return 1 + fi + echo "Pulling ${source_image} for ${image}" >&2 + docker pull --platform "${PLATFORM}" "${source_image}" >/dev/null + docker tag "${source_image}" "${image}" +} + mkdir -p "$(dirname "${BUNDLE_FILE}")" for image in "${IMAGES[@]}"; do - echo "Pulling ${image}" >&2 - docker pull --platform "${PLATFORM}" "${image}" >/dev/null - + pull_or_tag_image "${image}" done -docker save "${IMAGES[@]}" | zstd -T0 -19 -o "${BUNDLE_FILE}" +tmp_bundle="${BUNDLE_FILE}.tmp" +rm -f "${tmp_bundle}" +docker save "${IMAGES[@]}" | zstd -T0 -"${ZSTD_LEVEL}" -o "${tmp_bundle}" +mv "${tmp_bundle}" "${BUNDLE_FILE}" echo "Wrote ${BUNDLE_FILE}" >&2 diff --git a/scripts/cluster_power_recovery.sh b/scripts/cluster_power_recovery.sh index 6a7a9002..ea6319fb 100755 --- a/scripts/cluster_power_recovery.sh +++ b/scripts/cluster_power_recovery.sh @@ -16,7 +16,7 @@ fi usage() { cat < [options] + scripts/cluster_power_recovery.sh [options] Options: --execute Actually run commands (default is dry-run) @@ -30,15 +30,29 @@ Options: --skip-drain Shutdown: skip worker drain during shutdown --skip-local-bootstrap Startup: skip local bootstrap fallback applies --skip-harbor-bootstrap Startup: skip Harbor recovery bootstrap stage - --skip-harbor-seed Startup: skip Harbor image seed/import stage + --skip-harbor-seed Startup: skip bootstrap image seed/import stage --skip-helper-prewarm Prepare/Shutdown/Startup: skip node-helper prewarm + --refresh-bootstrap-image-aliases + Remove bootstrap image aliases before import, to clear poisoned registry pulls --min-startup-battery Minimum UPS percent required before bootstrap (default: 35) --ups-host UPS identifier for upsc (default: ups@localhost) --ups-battery-key UPS battery key for upsc (default: battery.charge) --recovery-state-file Recovery state file for outage-aware restart logic --replica-snapshot-file File used to persist workload replica snapshot across shutdown/startup - --harbor-bundle-file Harbor bootstrap bundle on the control host + --bootstrap-images-file + Image list expected inside the bootstrap bundle + --harbor-bundle-file Bootstrap bundle on the control host + --longhorn-unlock-bundle-file + Longhorn-only bundle for Harbor-deadlock recovery + --longhorn-unlock-images-file + Longhorn-only image list for Harbor-deadlock recovery + --longhorn-manager-cache-bundle-file + Single-image Longhorn manager cache repair archive + --skip-longhorn-unlock-bundle-seed + Longhorn unlock: skip full Longhorn bundle seed and run surgical repairs only + --bootstrap-bundle-arch + Node architecture expected by the bootstrap bundle (default: ${BOOTSTRAP_BUNDLE_ARCH:-arm64}) --harbor-target-node Node that should host Harbor during bootstrap (default: auto) --harbor-canary-node Node used for Harbor pull canary (default: auto) --harbor-host-label-key Node label key used to pin Harbor bootstrap workloads (default: ${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}) @@ -64,7 +78,9 @@ Options: Examples: scripts/cluster_power_recovery.sh prepare --execute + scripts/cluster_power_recovery.sh bootstrap-seed --execute scripts/cluster_power_recovery.sh harbor-seed --execute + scripts/cluster_power_recovery.sh longhorn-unlock --execute scripts/cluster_power_recovery.sh status scripts/cluster_power_recovery.sh shutdown --execute scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main @@ -79,7 +95,7 @@ fi shift || true case "${MODE}" in - prepare|status|harbor-seed|shutdown|startup) ;; + prepare|status|bootstrap-seed|harbor-seed|longhorn-seed|longhorn-unlock|shutdown|startup) ;; *) echo "Unknown mode: ${MODE}" >&2 usage @@ -139,6 +155,19 @@ STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/ananke}" RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state" REPLICA_SNAPSHOT_FILE="${STATE_ROOT}/desired_workload_replicas.tsv" HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}" +BOOTSTRAP_IMAGES_FILE="${BOOTSTRAP_IMAGES_FILE:-${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt}" +LONGHORN_UNLOCK_IMAGES_FILE="${LONGHORN_UNLOCK_IMAGES_FILE:-${BOOTSTRAP_DIR}/longhorn-unlock-images.txt}" +LONGHORN_UNLOCK_BUNDLE_FILE="${LONGHORN_UNLOCK_BUNDLE_FILE:-${STATE_ROOT}/bundles/longhorn-unlock-v1.8.2-${BOOTSTRAP_BUNDLE_ARCH:-arm64}.tar.zst}" +LONGHORN_MANAGER_IMAGE="${LONGHORN_MANAGER_IMAGE:-registry.bstein.dev/infra/longhorn-manager:v1.8.2}" +LONGHORN_MANAGER_CACHE_BUNDLE_FILE="${LONGHORN_MANAGER_CACHE_BUNDLE_FILE:-${STATE_ROOT}/bundles/longhorn-manager-v1.8.2-${BOOTSTRAP_BUNDLE_ARCH:-arm64}.tar}" +LONGHORN_UNLOCK_SSH_KNOWN_HOSTS="${LONGHORN_UNLOCK_SSH_KNOWN_HOSTS:-/tmp/ananke_longhorn_unlock_known_hosts}" +BOOTSTRAP_BUNDLE_ARCH="${BOOTSTRAP_BUNDLE_ARCH:-arm64}" +RECOVERY_UNCORDON_DENYLIST="${RECOVERY_UNCORDON_DENYLIST:-titan-18,titan-22,titan-24}" +STALE_TERMINATING_POD_SECONDS="${STALE_TERMINATING_POD_SECONDS:-300}" +RECOVERY_NODE_RUNTIME_RESTART_ENABLED="${RECOVERY_NODE_RUNTIME_RESTART_ENABLED:-1}" +RECOVERY_NODE_RUNTIME_RESTART_DENYLIST="${RECOVERY_NODE_RUNTIME_RESTART_DENYLIST:-${RECOVERY_UNCORDON_DENYLIST}}" +RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES="${RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES:-3}" +RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS="${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS:-300}" HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-}" HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-}" HARBOR_HOST_LABEL_KEY="${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}" @@ -148,7 +177,11 @@ NODE_HELPER_NAMESPACE="${NODE_HELPER_NAMESPACE:-maintenance}" NODE_HELPER_SERVICE_ACCOUNT="${NODE_HELPER_SERVICE_ACCOUNT:-default}" NODE_HELPER_PREWARM_DS="${NODE_HELPER_PREWARM_DS:-ananke-node-helper-prewarm}" REGISTRY_PULL_SECRET="${REGISTRY_PULL_SECRET:-harbor-regcred}" +REFRESH_BOOTSTRAP_IMAGE_ALIASES="${REFRESH_BOOTSTRAP_IMAGE_ALIASES:-0}" +SKIP_LONGHORN_UNLOCK_BUNDLE_SEED="${SKIP_LONGHORN_UNLOCK_BUNDLE_SEED:-0}" +LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE="${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE:-${STATE_ROOT}/longhorn_unlock_optional_replicas.tsv}" KEEP_PREWARM_DAEMONSET=0 +BOOTSTRAP_IMAGES_SEEDED=0 RECOVERY_PENDING=0 STARTUP_ATTEMPTED_DURING_OUTAGE=0 @@ -210,6 +243,10 @@ while [[ $# -gt 0 ]]; do SKIP_HELPER_PREWARM=1 shift ;; + --refresh-bootstrap-image-aliases) + REFRESH_BOOTSTRAP_IMAGE_ALIASES=1 + shift + ;; --ups-host) UPS_HOST="${2:?missing ups host}" shift 2 @@ -238,6 +275,30 @@ while [[ $# -gt 0 ]]; do HARBOR_BUNDLE_FILE="${2:?missing bundle file path}" shift 2 ;; + --longhorn-unlock-bundle-file) + LONGHORN_UNLOCK_BUNDLE_FILE="${2:?missing Longhorn unlock bundle file path}" + shift 2 + ;; + --bootstrap-images-file) + BOOTSTRAP_IMAGES_FILE="${2:?missing bootstrap image list path}" + shift 2 + ;; + --longhorn-unlock-images-file) + LONGHORN_UNLOCK_IMAGES_FILE="${2:?missing Longhorn unlock image list path}" + shift 2 + ;; + --longhorn-manager-cache-bundle-file) + LONGHORN_MANAGER_CACHE_BUNDLE_FILE="${2:?missing Longhorn manager cache bundle file path}" + shift 2 + ;; + --skip-longhorn-unlock-bundle-seed) + SKIP_LONGHORN_UNLOCK_BUNDLE_SEED=1 + shift + ;; + --bootstrap-bundle-arch) + BOOTSTRAP_BUNDLE_ARCH="${2:?missing bootstrap bundle architecture}" + shift 2 + ;; --harbor-target-node) HARBOR_TARGET_NODE="${2:?missing harbor target node}" shift 2 @@ -615,7 +676,7 @@ default_startup_service_checklist() { cat <<'CHECKS' gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"|| grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"|| -harbor|https://registry.bstein.dev/v2/|200,401||| +harbor|https://registry.bstein.dev/v2/|401|unauthorized|/dev/null 2>&1; then + run kubectl -n flux-system patch kustomization "${name}" --type=merge -p "${patch}" + else + warn "Flux Kustomization ${name} not found; skipping suspend=${value}." + fi +} + +patch_helmrelease_suspend() { + local namespace="$1" + local name="$2" + local value="$3" + local patch + patch=$(printf '{"spec":{"suspend":%s}}' "${value}") + if kubectl -n "${namespace}" get helmrelease "${name}" >/dev/null 2>&1; then + run kubectl -n "${namespace}" patch helmrelease "${name}" --type=merge -p "${patch}" + else + warn "HelmRelease ${namespace}/${name} not found; skipping suspend=${value}." + fi +} + +wait_for_flux_reconciler_pods_stopped() { + local app start now pods + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: wait for Flux reconcilers to stop" + return 0 + fi + for app in kustomize-controller helm-controller; do + start="$(date +%s)" + while true; do + pods="$(kubectl -n flux-system get pods -l "app=${app}" --no-headers 2>/dev/null || true)" + if [[ -z "${pods}" ]]; then + log "flux-reconciler-stopped=${app}" + break + fi + now="$(date +%s)" + if (( now - start >= 90 )); then + warn "Timed out waiting for ${app} pods to stop." + break + fi + sleep 2 + done + done +} + +freeze_longhorn_deadlock_automation() { + warn "Freezing only the automation that can fight Longhorn emergency recovery." + if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then + run kubectl -n flux-system scale deployment kustomize-controller --replicas=0 + fi + if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then + run kubectl -n flux-system scale deployment helm-controller --replicas=0 + fi + wait_for_flux_reconciler_pods_stopped + patch_kustomization_suspend flux-system true + patch_kustomization_suspend helm true + patch_kustomization_suspend longhorn true + patch_helmrelease_suspend longhorn-system longhorn true + mark_checkpoint longhorn_unlock_automation_frozen +} + +ensure_longhorn_cache_first_policy() { + local values_patch ds_patch + values_patch='{"spec":{"values":{"image":{"pullPolicy":"IfNotPresent"},"defaultSettings":{"systemManagedPodsImagePullPolicy":"if-not-present"}}}}' + ds_patch='{"spec":{"template":{"spec":{"containers":[{"name":"longhorn-manager","imagePullPolicy":"IfNotPresent"}]}}}}' + if kubectl -n longhorn-system get helmrelease longhorn >/dev/null 2>&1; then + run kubectl -n longhorn-system patch helmrelease longhorn --type=merge -p "${values_patch}" + fi + if kubectl -n longhorn-system get daemonset longhorn-manager >/dev/null 2>&1; then + run kubectl -n longhorn-system patch daemonset longhorn-manager --type=strategic -p "${ds_patch}" + fi +} + +remove_longhorn_manager_prepull_sidecar() { + local indexes index + indexes="$(kubectl -n longhorn-system get daemonset longhorn-manager \ + -o jsonpath='{range .spec.template.spec.containers[*]}{.name}{"\n"}{end}' 2>/dev/null \ + | nl -v 0 -w 1 -s ' ' \ + | awk '$2=="pre-pull-share-manager-image" {print $1}' \ + | sort -rn || true)" + if [[ -z "${indexes}" ]]; then + log "longhorn-manager-prepull-sidecar=absent" + return 0 + fi + while IFS= read -r index; do + [[ -z "${index}" ]] && continue + run kubectl -n longhorn-system patch daemonset longhorn-manager --type=json \ + -p "[{\"op\":\"remove\",\"path\":\"/spec/template/spec/containers/${index}\"}]" + done <<< "${indexes}" +} + +save_longhorn_unlock_optional_replica_snapshot() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: save optional workload snapshot to ${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" + return 0 + fi + if [[ -s "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" ]]; then + log "optional-workload-snapshot=preserved path=${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" + return 0 + fi + mkdir -p "$(dirname "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}")" + : > "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" +} + +scale_optional_workload_for_longhorn_unlock() { + local namespace="$1" + local kind="$2" + local name="$3" + local replicas + if ! kubectl -n "${namespace}" get "${kind}" "${name}" >/dev/null 2>&1; then + return 0 + fi + replicas="$(kubectl -n "${namespace}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" + [[ -n "${replicas}" ]] || replicas=1 + if [[ "${EXECUTE}" -eq 1 ]] && ! awk -F '\t' -v ns="${namespace}" -v kind="${kind}" -v name="${name}" '$1==ns && $2==kind && $3==name {found=1} END {exit found ? 0 : 1}' "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" 2>/dev/null; then + printf '%s\t%s\t%s\t%s\n' "${namespace}" "${kind}" "${name}" "${replicas}" >> "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" + fi + if [[ "${replicas}" == "0" ]]; then + log "optional-workload-already-scaled-down=${namespace}/${kind}/${name}" + return 0 + fi + warn "Temporarily scaling optional workload ${namespace}/${kind}/${name} from ${replicas} to 0 for Longhorn recovery headroom." + run kubectl -n "${namespace}" scale "${kind}" "${name}" --replicas=0 +} + +free_longhorn_instance_manager_headroom() { + save_longhorn_unlock_optional_replica_snapshot + while read -r namespace kind name; do + [[ -z "${namespace}" || "${namespace}" == \#* ]] && continue + scale_optional_workload_for_longhorn_unlock "${namespace}" "${kind}" "${name}" + done <<'WORKLOADS' +game-stream deployment oauth2-proxy-wolf +logging deployment oauth2-proxy-logs +longhorn-system deployment oauth2-proxy-longhorn +maintenance deployment oauth2-proxy-metis +maintenance deployment oauth2-proxy-soteria +openclaw deployment oauth2-proxy-agent +quality deployment oauth2-proxy-sonarqube +quality deployment sonarqube-exporter +sso deployment oauth2-proxy +bstein-dev-home deployment bstein-dev-home-frontend +WORKLOADS + mark_checkpoint longhorn_unlock_optional_workloads_scaled +} + +restore_longhorn_unlock_optional_workloads() { + local namespace kind name desired current + if [[ ! -f "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" ]]; then + log "optional-workload-restore=not-needed snapshot=absent" + return 0 + fi + + while IFS=$'\t' read -r namespace kind name desired; do + [[ -n "${namespace}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue + [[ "${desired}" =~ ^[0-9]+$ ]] || continue + (( desired > 0 )) || continue + current="$(kubectl -n "${namespace}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" + [[ "${current}" =~ ^[0-9]+$ ]] || continue + if (( current == desired )); then + continue + fi + warn "Restoring optional workload ${namespace}/${kind}/${name} to replicas=${desired} after Longhorn unlock." + run kubectl -n "${namespace}" scale "${kind}" "${name}" --replicas="${desired}" + done < "${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}" + mark_checkpoint longhorn_unlock_optional_workloads_restored +} + +restore_recovered_worker_scheduling_after_deadlock() { + local rows node ready worker taints + rows="$(kubectl get nodes -o json \ + | jq -r '.items[] + | [.metadata.name, + (.spec.unschedulable // false), + ([.status.conditions[]? | select(.type=="Ready") | .status][0] // "Unknown"), + (.metadata.labels["node-role.kubernetes.io/worker"] // ""), + ((.spec.taints // []) | map(.key + ":" + .effect) | join(","))] + | @tsv' || true)" + + while IFS=$'\t' read -r node unschedulable ready worker taints; do + [[ -n "${node}" ]] || continue + [[ "${unschedulable}" == "true" ]] || continue + [[ "${ready}" == "True" ]] || continue + [[ "${worker}" == "true" ]] || continue + if csv_has_value "${RECOVERY_UNCORDON_DENYLIST}" "${node}"; then + warn "Leaving recovered worker ${node} cordoned because it is in RECOVERY_UNCORDON_DENYLIST." + continue + fi + if [[ "${taints}" == *"node.kubernetes.io/unreachable:"* ]]; then + warn "Leaving worker ${node} cordoned because it still has an unreachable taint." + continue + fi + warn "Restoring scheduling on recovered Ready worker ${node}." + run kubectl uncordon "${node}" + done <<< "${rows}" + mark_checkpoint longhorn_unlock_worker_scheduling_restored +} + +delete_failed_nonstorage_pods_for_headroom() { + local rows namespace name + rows="$(kubectl get pods -A --field-selector=status.phase=Failed \ + -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)" + while read -r namespace name; do + [[ -z "${namespace}" || -z "${name}" ]] && continue + case "${namespace}" in + longhorn-system|postgres|vault|gitea|harbor) + continue + ;; + esac + run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false + done <<< "${rows}" +} + +restart_stale_critical_pods_after_longhorn_unlock() { + require_cmd jq + local pods namespace name phase owners + pods="$(kubectl get pods -A -o json \ + | jq -r '.items[] + | select(.metadata.namespace | test("^(postgres|vault|gitea|harbor)$")) + | select(.status.phase == "Failed" or .status.phase == "Unknown") + | [.metadata.namespace, .metadata.name, .status.phase, ((.metadata.ownerReferences // []) | length)] | @tsv' || true)" + while IFS=$'\t' read -r namespace name phase owners; do + [[ -z "${namespace}" || -z "${name}" ]] && continue + if [[ "${owners}" == "0" ]]; then + warn "Skipping stale critical pod without controller owner: ${namespace}/${name} phase=${phase}" + continue + fi + warn "Deleting stale controller-owned critical pod ${namespace}/${name} phase=${phase} so its controller can recreate it." + run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false + done <<< "${pods}" + + pods="$(kubectl get pods -A -o json \ + | jq -r '.items[] + | select(.metadata.namespace | test("^(postgres|vault|gitea|harbor)$")) + | select(.metadata.deletionTimestamp != null) + | select(.status.phase == "Failed" or .status.phase == "Unknown") + | select(((.metadata.finalizers // []) | length) == 0) + | select(((.metadata.ownerReferences // []) | length) > 0) + | select(([(.status.containerStatuses[]? | select(.state.terminated != null))] | length) == ((.status.containerStatuses // []) | length)) + | [.metadata.namespace, .metadata.name, .status.phase] | @tsv' || true)" + while IFS=$'\t' read -r namespace name phase; do + [[ -z "${namespace}" || -z "${name}" ]] && continue + warn "Force-deleting stale terminating critical pod object ${namespace}/${name} phase=${phase}; containers are already terminated and no finalizers are set." + run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false --force --grace-period=0 + done <<< "${pods}" +} + +wait_for_postgres_dependency_ready() { + local timeout_seconds="${1:-240}" + local start now endpoints + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: wait for postgres/postgres-service endpoints and pg_isready" + return 0 + fi + + start="$(date +%s)" + while true; do + endpoints="$(kubectl -n postgres get endpoints postgres-service -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" + if [[ -n "${endpoints//[[:space:]]/}" ]] \ + && kubectl -n postgres exec postgres-0 -c postgres -- sh -ceu 'pg_isready -h 127.0.0.1 -p 5432 >/dev/null' >/dev/null 2>&1; then + log "postgres-dependency=ready endpoints=${endpoints}" + return 0 + fi + + now="$(date +%s)" + if (( now - start >= timeout_seconds )); then + warn "Timed out waiting for Postgres to become ready for Harbor." + return 1 + fi + sleep 5 + done +} + +restart_harbor_after_postgres_recovery() { + require_cmd jq + local pods name + + if harbor_endpoint_is_ready 1; then + log "harbor-postgres-recovery=not-needed" + return 0 + fi + + wait_for_postgres_dependency_ready 240 || return 1 + + pods="$(kubectl -n harbor get pods -o json \ + | jq -r '.items[] + | select(.metadata.name | test("^harbor-(core|jobservice)-")) + | select(((.metadata.ownerReferences // []) | length) > 0) + | select(([ + .status.containerStatuses[]? + | select(.name == "core" or .name == "jobservice") + | select((.ready != true) + or (((.state.waiting.reason // "") | test("CrashLoopBackOff|ImagePullBackOff|ErrImagePull"))) + or ((.lastState.terminated.reason // "") == "Error")) + ] | length) > 0) + | .metadata.name' \ + | sort -u || true)" + + if [[ -z "${pods}" ]]; then + warn "Harbor registry API is unhealthy, but no controller-owned core/jobservice pod needs restart." + return 1 + fi + + while IFS= read -r name; do + [[ -z "${name}" ]] && continue + warn "Restarting controller-owned Harbor pod ${name} after Postgres recovery." + run kubectl -n harbor delete pod "${name}" --ignore-not-found --wait=false + done <<< "${pods}" + + if [[ "${EXECUTE}" -eq 1 ]]; then + kubectl -n harbor rollout status deployment/harbor-core --timeout=6m || warn "harbor-core did not become Ready after Postgres recovery restart." + kubectl -n harbor rollout status deployment/harbor-jobservice --timeout=6m || warn "harbor-jobservice did not become Ready after Postgres recovery restart." + harbor_endpoint_is_ready 0 || return 1 + fi + mark_checkpoint longhorn_unlock_harbor_postgres_recovered +} + +delete_safe_stale_terminating_replicaset_pods_after_deadlock() { + require_cmd jq + local rows namespace name deleted_at deleted_epoch now age + now="$(date +%s)" + rows="$(kubectl get pods -A -o json \ + | jq -r '.items[] + | select(.metadata.namespace != "longhorn-system") + | select(.metadata.deletionTimestamp != null) + | select(((.metadata.finalizers // []) | length) == 0) + | select(((.metadata.ownerReferences // []) | map(select(.kind=="ReplicaSet")) | length) > 0) + | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) as $running + | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.ready == true)] | length) as $ready + | select($running == 0 and $ready == 0) + | [.metadata.namespace, .metadata.name, .metadata.deletionTimestamp] | @tsv' || true)" + + while IFS=$'\t' read -r namespace name deleted_at; do + [[ -n "${namespace}" && -n "${name}" && -n "${deleted_at}" ]] || continue + deleted_epoch="$(date -d "${deleted_at}" +%s 2>/dev/null || true)" + [[ "${deleted_epoch}" =~ ^[0-9]+$ ]] || continue + age=$(( now - deleted_epoch )) + if (( age < STALE_TERMINATING_POD_SECONDS )); then + continue + fi + warn "Force-deleting stale terminating ReplicaSet pod ${namespace}/${name}; no containers are running and no finalizers are set." + run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false --force --grace-period=0 + done <<< "${rows}" + mark_checkpoint longhorn_unlock_stale_replicaset_pods_cleared +} + +restart_image_pull_backoff_pods_after_harbor_recovery() { + require_cmd jq + local pods namespace name + if ! harbor_endpoint_is_ready 1; then + warn "Skipping image-pull recovery sweep because Harbor registry API is still unhealthy." + return 1 + fi + + pods="$(kubectl get pods -A -o json \ + | jq -r '.items[] + | select(.metadata.namespace != "longhorn-system") + | select(((.metadata.ownerReferences // []) | map(select(.kind=="ReplicaSet")) | length) > 0) + | select(([ + (.status.containerStatuses[]?, .status.initContainerStatuses[]?) + | select(((.state.waiting.reason // "") | test("ImagePullBackOff|ErrImagePull|CreateContainerError|RunContainerError|InvalidImageName"))) + ] | length) > 0) + | [.metadata.namespace, .metadata.name] | @tsv' \ + | sort -u || true)" + + if [[ -z "${pods}" ]]; then + log "image-pull-recovery=not-needed" + return 0 + fi + + while IFS=$'\t' read -r namespace name; do + [[ -z "${namespace}" || -z "${name}" ]] && continue + warn "Restarting controller-owned pod ${namespace}/${name} after Harbor recovery to clear image-pull backoff." + run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false + done <<< "${pods}" + mark_checkpoint longhorn_unlock_image_pull_backoff_restarted +} + +resume_deadlock_automation_after_core_recovery() { + local gitea_endpoints + if ! harbor_endpoint_is_ready 1; then + warn "Keeping Flux reconcilers stopped because Harbor registry API is not healthy." + return 1 + fi + gitea_endpoints="$(kubectl -n gitea get endpoints gitea -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" + if [[ -z "${gitea_endpoints//[[:space:]]/}" ]]; then + warn "Keeping Flux reconcilers stopped because Gitea has no ready endpoints." + return 1 + fi + + patch_flux_suspend_all false + if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then + run kubectl -n flux-system scale deployment kustomize-controller --replicas=1 + fi + if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then + run kubectl -n flux-system scale deployment helm-controller --replicas=1 + fi + trigger_flux_reconcile_all || true + mark_checkpoint longhorn_unlock_automation_resumed +} + +restart_longhorn_image_pull_backoff_pods() { + require_cmd jq + local pods namespace name + pods="$(kubectl -n longhorn-system get pods -o json \ + | jq -r '.items[] + | select(([.status.containerStatuses[]?.state.waiting.reason] | map(select(. == "ImagePullBackOff" or . == "ErrImagePull")) | length) > 0) + | select(.metadata.name | test("^(longhorn-manager-|longhorn-driver-deployer-|longhorn-ui-)")) + | [.metadata.namespace, .metadata.name] | @tsv' || true)" + while IFS=$'\t' read -r namespace name; do + [[ -z "${namespace}" || -z "${name}" ]] && continue + run kubectl -n "${namespace}" delete pod "${name}" --ignore-not-found --wait=false + done <<< "${pods}" +} + +terminating_running_pods_for_node() { + local node="$1" + local now + now="$(date +%s)" + kubectl get pods -A -o json \ + | jq -r --arg node "${node}" --argjson now "${now}" --argjson min_age "${STALE_TERMINATING_POD_SECONDS}" ' + .items[] + | select(.spec.nodeName == $node) + | select(.metadata.deletionTimestamp != null) + | select(((.metadata.finalizers // []) | length) == 0) + | (.metadata.deletionTimestamp | fromdateiso8601) as $deleted + | select(($now - $deleted) >= $min_age) + | ([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) as $running + | select($running > 0) + | [.metadata.namespace, .metadata.name, ($running|tostring)] | @tsv' 2>/dev/null || true +} + +stuck_terminating_runtime_cleanup_nodes() { + local now + now="$(date +%s)" + kubectl get pods -A -o json \ + | jq -r --argjson now "${now}" --argjson min_age "${STALE_TERMINATING_POD_SECONDS}" ' + .items[] + | select(.spec.nodeName != null) + | select(.metadata.deletionTimestamp != null) + | select(((.metadata.finalizers // []) | length) == 0) + | (.metadata.deletionTimestamp | fromdateiso8601) as $deleted + | select(($now - $deleted) >= $min_age) + | select(([(.status.initContainerStatuses[]?, .status.containerStatuses[]?) | select(.state.running != null)] | length) > 0) + | .spec.nodeName' 2>/dev/null \ + | sort -u +} + +wait_for_node_ready() { + local node="$1" + local timeout_seconds="$2" + local start now ready + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: wait for node ${node} Ready" + return 0 + fi + start="$(date +%s)" + while true; do + ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)" + if [[ "${ready}" == "True" ]]; then + log "node-ready=${node}" + return 0 + fi + now="$(date +%s)" + if (( now - start >= timeout_seconds )); then + warn "Timed out waiting for node ${node} to return Ready after runtime restart." + return 1 + fi + sleep 5 + done +} + +wait_for_terminating_running_pods_to_clear() { + local node="$1" + local timeout_seconds="$2" + local start now pods + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: wait for stuck terminating running pods to clear on ${node}" + return 0 + fi + start="$(date +%s)" + while true; do + pods="$(terminating_running_pods_for_node "${node}")" + if [[ -z "${pods}" ]]; then + log "stuck-terminating-runtime-pods-cleared=${node}" + return 0 + fi + now="$(date +%s)" + if (( now - start >= timeout_seconds )); then + warn "Stuck terminating pods with running containers remain on ${node}:" + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + warn " ${line}" + done <<< "${pods}" + return 1 + fi + sleep 5 + done +} + +schedule_host_service_restart_via_helper() { + local node="$1" + local service_name="$2" + local delay_seconds="$3" + local unit_name host_command + unit_name="ananke-restart-${service_name}-$(date +%s)" + host_command="/usr/bin/systemd-run --unit ${unit_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl restart ${service_name} || /bin/systemctl restart ${service_name}'" + if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then + return 0 + fi + run_host_command_via_helper "${node}" "restart-${node}-${service_name}" 120 "${host_command}" +} + +recover_stuck_terminating_node_runtime_pods_after_deadlock() { + require_cmd jq + if [[ "${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}" != "1" && "${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}" != "true" ]]; then + warn "Skipping node runtime cleanup because RECOVERY_NODE_RUNTIME_RESTART_ENABLED=${RECOVERY_NODE_RUNTIME_RESTART_ENABLED}." + return 0 + fi + + local nodes node ready worker control_plane restarted max_nodes restarted_nodes + nodes="$(stuck_terminating_runtime_cleanup_nodes || true)" + if [[ -z "${nodes}" ]]; then + log "node-runtime-cleanup=not-needed" + return 0 + fi + + max_nodes="${RECOVERY_NODE_RUNTIME_RESTART_MAX_NODES}" + [[ "${max_nodes}" =~ ^[0-9]+$ ]] || max_nodes=1 + restarted=0 + restarted_nodes="" + while IFS= read -r node; do + [[ -n "${node}" ]] || continue + if (( restarted >= max_nodes )); then + warn "Node runtime cleanup limit reached (${max_nodes}); leaving remaining stuck nodes for a later Ananke pass." + break + fi + if csv_has_value "${RECOVERY_NODE_RUNTIME_RESTART_DENYLIST}" "${node}"; then + warn "Skipping node runtime cleanup on denylisted node ${node}." + continue + fi + ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)" + worker="$(kubectl get node "${node}" -o jsonpath='{.metadata.labels.node-role\.kubernetes\.io/worker}' 2>/dev/null || true)" + control_plane="$(kubectl get node "${node}" -o jsonpath='{.metadata.labels.node-role\.kubernetes\.io/control-plane}' 2>/dev/null || true)" + if [[ "${ready}" != "True" || "${worker}" != "true" || -n "${control_plane}" ]]; then + warn "Skipping node runtime cleanup on ${node}; ready=${ready:-unknown} worker=${worker:-false} control_plane=${control_plane:-false}." + continue + fi + + warn "Cordoning ${node} and restarting only k3s-agent to clear stale terminating pods. Longhorn data-plane objects are not modified." + run kubectl cordon "${node}" + schedule_host_service_restart_via_helper "${node}" k3s-agent 5 || warn "Failed to schedule k3s-agent restart on ${node}." + restarted=$((restarted + 1)) + restarted_nodes="${restarted_nodes}${node}"$'\n' + done <<< "${nodes}" + + if (( restarted == 0 )); then + log "node-runtime-cleanup=no-eligible-nodes" + return 0 + fi + + sleep 15 + while IFS= read -r node; do + [[ -n "${node}" ]] || continue + wait_for_node_ready "${node}" "${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS}" || true + wait_for_terminating_running_pods_to_clear "${node}" "${RECOVERY_NODE_RUNTIME_RESTART_WAIT_SECONDS}" || true + done <<< "${restarted_nodes}" + mark_checkpoint longhorn_unlock_node_runtime_cleanup +} + +wait_for_longhorn_endpoint() { + local endpoint="$1" + local timeout_seconds="$2" + local start now addresses + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: wait for Longhorn endpoint ${endpoint}" + return 0 + fi + start="$(date +%s)" + while true; do + addresses="$(kubectl -n longhorn-system get endpoints "${endpoint}" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)" + if [[ -n "${addresses}" ]]; then + log "longhorn-endpoint-${endpoint}=ready" + return 0 + fi + now="$(date +%s)" + if (( now - start >= timeout_seconds )); then + warn "Timed out waiting for Longhorn endpoint ${endpoint}." + return 1 + fi + sleep 5 + done +} + +wait_for_longhorn_control_endpoints() { + local rc=0 + wait_for_longhorn_endpoint longhorn-admission-webhook 180 || rc=1 + wait_for_longhorn_endpoint longhorn-conversion-webhook 180 || rc=1 + wait_for_longhorn_endpoint longhorn-backend 180 || rc=1 + wait_for_longhorn_endpoint longhorn-recovery-backend 180 || rc=1 + return "${rc}" +} + +report_longhorn_unlock_status() { + log "Longhorn manager DaemonSet:" + kubectl -n longhorn-system get daemonset longhorn-manager \ + -o custom-columns=NAME:.metadata.name,DESIRED:.status.desiredNumberScheduled,CURRENT:.status.currentNumberScheduled,READY:.status.numberReady,UPDATED:.status.updatedNumberScheduled,AVAILABLE:.status.numberAvailable || true + log "Longhorn manager pods:" + kubectl -n longhorn-system get pods -l app=longhorn-manager \ + -o custom-columns=NAME:.metadata.name,READY:.status.containerStatuses[*].ready,STATUS:.status.phase,WAIT:.status.containerStatuses[*].state.waiting.reason,NODE:.spec.nodeName --sort-by=.spec.nodeName || true + log "Longhorn instance managers:" + kubectl -n longhorn-system get instancemanagers.longhorn.io \ + -o custom-columns=NAME:.metadata.name,STATE:.status.currentState,NODE:.spec.nodeID,IMAGE:.spec.image,TYPE:.spec.type --sort-by=.spec.nodeID || true + log "Longhorn volume summary:" + kubectl -n longhorn-system get volumes.longhorn.io -o json \ + | jq -r '.items | group_by(.status.state + "/" + (.status.robustness // "none"))[] | [(.[0].status.state + "/" + (.[0].status.robustness // "none")), length] | @tsv' 2>/dev/null \ + | sort || true +} + shutdown_namespace_excluded() { local ns="$1" [[ "${ns}" =~ ${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX} ]] @@ -1105,11 +1788,29 @@ select_ready_arm64_worker() { return 1 } +discover_harbor_pinned_node() { + kubectl -n harbor get helmrelease harbor \ + -o jsonpath='{range .spec.values..nodeSelector}{.kubernetes\.io/hostname}{"\n"}{end}' 2>/dev/null \ + | sed '/^[[:space:]]*$/d' \ + | sort -u \ + | head -n 1 +} + ensure_harbor_target_node() { if node_is_ready "${HARBOR_TARGET_NODE}"; then return 0 fi - local fallback + local fallback pinned + pinned="$(discover_harbor_pinned_node || true)" + if node_is_ready "${pinned}"; then + if [[ -n "${HARBOR_TARGET_NODE}" ]]; then + warn "Configured harbor target node '${HARBOR_TARGET_NODE}' is not Ready; using live Harbor pin '${pinned}' instead." + else + log "harbor-target-node discovered from live HelmRelease: ${pinned}" + fi + HARBOR_TARGET_NODE="${pinned}" + return 0 + fi fallback="$(select_ready_arm64_worker || true)" [[ -n "${fallback}" ]] || die "No Ready arm64 worker available for Harbor bootstrap target." if [[ -n "${HARBOR_TARGET_NODE}" ]]; then @@ -1209,21 +1910,54 @@ check_harbor_stack() { wait_for_rollout harbor deployment harbor-registry 10m } -check_harbor_endpoint() { +harbor_registry_response_valid() { + local code="$1" + local headers_file="$2" + local body_file="$3" + local content_type + case "${code}" in + 200|401) ;; + *) return 1 ;; + esac + content_type="$(awk 'BEGIN{IGNORECASE=1} /^content-type:/ {print tolower($0); exit}' "${headers_file}" 2>/dev/null || true)" + if [[ "${content_type}" == *"text/html"* ]]; then + return 1 + fi + if grep -Eiq '^docker-distribution-api-version:' "${headers_file}" 2>/dev/null; then + return 0 + fi + if [[ "${code}" == "401" ]] && grep -Eiq 'unauthorized|authentication required' "${body_file}" 2>/dev/null; then + return 0 + fi + return 1 +} + +harbor_endpoint_is_ready() { + local quiet="${1:-0}" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/" return 0 fi - local code - code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)" - case "${code}" in - 200|401) - log "harbor-endpoint=http-${code}" - ;; - *) - die "Harbor endpoint check failed with HTTP ${code:-unknown}" - ;; - esac + local headers_file body_file code rc content_type + headers_file="$(mktemp)" + body_file="$(mktemp)" + rc=0 + code="$(curl -ksS --max-time "${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}" -D "${headers_file}" -o "${body_file}" -w '%{http_code}' https://registry.bstein.dev/v2/ || rc=$?)" + content_type="$(awk 'BEGIN{IGNORECASE=1} /^content-type:/ {print tolower($0); exit}' "${headers_file}" 2>/dev/null || true)" + if (( rc == 0 )) && harbor_registry_response_valid "${code}" "${headers_file}" "${body_file}"; then + [[ "${quiet}" == "1" ]] || log "harbor-endpoint=http-${code} registry-api=true" + rm -f "${headers_file}" "${body_file}" + return 0 + fi + [[ "${quiet}" == "1" ]] || warn "Harbor registry API check failed: http=${code:-unknown} content-type=${content_type:-unknown} rc=${rc}" + rm -f "${headers_file}" "${body_file}" + return 1 +} + +check_harbor_endpoint() { + if ! harbor_endpoint_is_ready 0; then + die "Harbor endpoint is not serving the registry API." + fi } wait_for_pod_phase() { @@ -1251,9 +1985,7 @@ wait_for_pod_phase() { harbor_is_ready() { kubectl -n harbor get deploy harbor-core harbor-jobservice harbor-portal harbor-registry >/dev/null 2>&1 || return 1 - local code - code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)" - [[ "${code}" == "200" || "${code}" == "401" ]] + harbor_endpoint_is_ready 1 } run_harbor_pull_canary() { @@ -1355,6 +2087,71 @@ POD timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true } +hostroot_pod_for_node() { + local node="$1" + kubectl -n "${NODE_HELPER_NAMESPACE}" get pods \ + -l app=node-image-sweeper \ + --field-selector "spec.nodeName=${node},status.phase=Running" \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true +} + +run_hostroot_pod_script() { + local node="$1" + local purpose="$2" + local timeout_seconds="$3" + local script_content="$4" + local pod encoded_script + pod="$(hostroot_pod_for_node "${node}")" + [[ -n "${pod}" ]] || return 1 + encoded_script="$(printf '%s' "${script_content}" | base64 -w0)" + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: hostroot pod ${pod} on ${node} for ${purpose}" + return 0 + fi + timeout "${timeout_seconds}" kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "printf '%s' '${encoded_script}' | base64 -d | chroot /host /bin/sh -seu" +} + +run_hostroot_pod_bundle_import() { + local node="$1" + local timeout_seconds="$2" + local images_text="$3" + local pod refresh_script verify_script encoded_script + pod="$(hostroot_pod_for_node "${node}")" + [[ -n "${pod}" ]] || return 1 + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: stream ${HARBOR_BUNDLE_FILE} through hostroot pod ${pod} on ${node}" + return 0 + fi + if [[ "${REFRESH_BOOTSTRAP_IMAGE_ALIASES}" == "1" ]]; then + refresh_script=$(cat <