diff --git a/scripts/cluster_power_recovery.sh b/scripts/cluster_power_recovery.sh index de4925ac..87381d0f 100755 --- a/scripts/cluster_power_recovery.sh +++ b/scripts/cluster_power_recovery.sh @@ -146,6 +146,7 @@ RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUS RECOVERY_FLUX_ROOT_APPLY_TIMEOUT="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT:-15m}" RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS="${RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS:-6}" RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS="${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS:-10}" +RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER="${RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER:-0}" STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS:-10}" STARTUP_SERVICE_CHECKLIST="${STARTUP_SERVICE_CHECKLIST:-}" STARTUP_INCLUDE_INGRESS_CHECKS="${STARTUP_INCLUDE_INGRESS_CHECKS:-1}" @@ -1095,9 +1096,13 @@ force_recovery_flux_suspend_with_controller_stop() { fi patch_recovery_optional_flux_suspend_without_snapshot true - run kubectl -n flux-system scale deployment kustomize-controller --replicas=1 - kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=2m || warn "kustomize-controller did not become Ready after final Flux suspend reassertion." - sleep "${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS}" + if [[ "${RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER}" == "1" || "${RECOVERY_FLUX_FINAL_RESTART_KUSTOMIZE_CONTROLLER}" == "true" ]]; then + run kubectl -n flux-system scale deployment kustomize-controller --replicas=1 + kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=2m || warn "kustomize-controller did not become Ready after final Flux suspend reassertion." + sleep "${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS}" + else + warn "Leaving kustomize-controller stopped to preserve the recovery Flux hold." + fi local unsuspended unsuspended="$(recovery_flux_unsuspended_list | paste -sd, -)"