From bb07f1598fc179d4620aae59cc905672c9252446 Mon Sep 17 00:00:00 2001 From: jenkins Date: Thu, 18 Jun 2026 18:42:37 -0300 Subject: [PATCH] recovery(ananke): keep optional flux blocked during thaw --- scripts/cluster_power_recovery.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/cluster_power_recovery.sh b/scripts/cluster_power_recovery.sh index 0b9bbd6c..30078f84 100755 --- a/scripts/cluster_power_recovery.sh +++ b/scripts/cluster_power_recovery.sh @@ -137,9 +137,10 @@ STARTUP_IGNORE_WORKLOADS_REGEX="${STARTUP_IGNORE_WORKLOADS_REGEX:-}" STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system)$}" STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}" RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS="${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS:-ai-llm,descheduler,finance,game-stream,gitops-ui,health,jellyfin,jenkins,longhorn-ui,mailu,nextcloud,nextcloud-mail-sync,outline,planka,quality,resource-guardrails,typhon,vaultwarden,veles,wallet-monero-temp,xmr-miner}" -RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS="${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS:-flux-system,core,helm,cert-manager,longhorn-adopt,longhorn,metallb,traefik,vault-csi,vault-injector,vault,postgres,harbor,gitea,keycloak,oauth2-proxy,openldap,openclaw,monitoring,bstein-dev-home,bstein-dev-home-migrations,comms,crypto,logging,maintenance,monerod,sui-metrics}" +RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS="${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS:-core,helm,cert-manager,longhorn-adopt,longhorn,metallb,traefik,vault-csi,vault-injector,vault,postgres,harbor,gitea,keycloak,oauth2-proxy,openldap,openclaw,monitoring,bstein-dev-home,bstein-dev-home-migrations,comms,crypto,logging,maintenance,monerod,sui-metrics}" RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE="${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE:-${HOME}/${STATE_SUBDIR:-.local/share/ananke}/longhorn_unlock_optional_flux.tsv}" RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER="${RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER:-1}" +RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION:-1}" STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS:-10}" STARTUP_SERVICE_CHECKLIST="${STARTUP_SERVICE_CHECKLIST:-}" STARTUP_INCLUDE_INGRESS_CHECKS="${STARTUP_INCLUDE_INGRESS_CHECKS:-1}" @@ -1444,6 +1445,9 @@ resume_deadlock_automation_after_core_recovery() { fi patch_flux_suspend_all false + if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then + patch_kustomization_suspend flux-system true + fi patch_recovery_optional_flux_suspend true if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then run kubectl -n flux-system scale deployment kustomize-controller --replicas=1 @@ -1451,10 +1455,14 @@ resume_deadlock_automation_after_core_recovery() { if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then run kubectl -n flux-system scale deployment helm-controller --replicas=1 fi - restart_kustomize_controller_for_critical_thaw if command -v flux >/dev/null 2>&1; then run flux reconcile source git flux-system -n flux-system --timeout=3m || true fi + if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then + patch_kustomization_suspend flux-system true + fi + patch_recovery_optional_flux_suspend true + restart_kustomize_controller_for_critical_thaw annotate_flux_kustomizations "${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS}" || true mark_checkpoint longhorn_unlock_automation_resumed }