recovery(ananke): thaw critical flux without root apply
This commit is contained in:
parent
6d3c59ad8d
commit
ea333f6648
@ -138,10 +138,11 @@ STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES
|
|||||||
STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}"
|
STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}"
|
||||||
RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS="${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS:-ai-llm,bstein-dev-home-migrations,descheduler,finance,game-stream,gitops-ui,health,jellyfin,jenkins,longhorn-ui,mailu,nextcloud,nextcloud-mail-sync,outline,planka,quality,resource-guardrails,typhon,vaultwarden,veles,wallet-monero-temp,xmr-miner}"
|
RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS="${RECOVERY_FLUX_OPTIONAL_KUSTOMIZATIONS:-ai-llm,bstein-dev-home-migrations,descheduler,finance,game-stream,gitops-ui,health,jellyfin,jenkins,longhorn-ui,mailu,nextcloud,nextcloud-mail-sync,outline,planka,quality,resource-guardrails,typhon,vaultwarden,veles,wallet-monero-temp,xmr-miner}"
|
||||||
RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS="${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS:-core,helm,cert-manager,longhorn-adopt,longhorn,metallb,traefik,vault-csi,vault-injector,vault,postgres,harbor,gitea,keycloak,oauth2-proxy,openldap,openclaw,monitoring,bstein-dev-home,comms,crypto,logging,maintenance,monerod,sui-metrics}"
|
RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS="${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS:-core,helm,cert-manager,longhorn-adopt,longhorn,metallb,traefik,vault-csi,vault-injector,vault,postgres,harbor,gitea,keycloak,oauth2-proxy,openldap,openclaw,monitoring,bstein-dev-home,comms,crypto,logging,maintenance,monerod,sui-metrics}"
|
||||||
|
RECOVERY_FLUX_CRITICAL_HELMRELEASES="${RECOVERY_FLUX_CRITICAL_HELMRELEASES:-cert-manager/cert-manager,comms/othrys-element,comms/othrys-synapse,harbor/harbor,kube-system/secrets-store-csi-driver,logging/data-prepper,logging/fluent-bit,logging/opensearch,logging/opensearch-dashboards,logging/otel-collector,longhorn-system/longhorn,metallb-system/metallb,monitoring/alertmanager,monitoring/grafana,monitoring/kube-state-metrics,monitoring/node-exporter,monitoring/victoria-metrics-single,vault/vault-injector}"
|
||||||
RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE="${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE:-${HOME}/${STATE_SUBDIR:-.local/share/ananke}/longhorn_unlock_optional_flux.tsv}"
|
RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE="${RECOVERY_FLUX_OPTIONAL_SNAPSHOT_FILE:-${HOME}/${STATE_SUBDIR:-.local/share/ananke}/longhorn_unlock_optional_flux.tsv}"
|
||||||
RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER="${RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER:-1}"
|
RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER="${RECOVERY_FLUX_RESTART_KUSTOMIZE_CONTROLLER:-1}"
|
||||||
RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION:-1}"
|
RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION:-1}"
|
||||||
RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION:-1}"
|
RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION="${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION:-0}"
|
||||||
RECOVERY_FLUX_ROOT_APPLY_TIMEOUT="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT:-15m}"
|
RECOVERY_FLUX_ROOT_APPLY_TIMEOUT="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT:-15m}"
|
||||||
RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS="${RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS:-6}"
|
RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS="${RECOVERY_FLUX_SUSPEND_VERIFY_ATTEMPTS:-6}"
|
||||||
RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS="${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS:-10}"
|
RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS="${RECOVERY_FLUX_SUSPEND_VERIFY_SLEEP_SECONDS:-10}"
|
||||||
@ -1028,6 +1029,17 @@ patch_recovery_critical_flux_suspend() {
|
|||||||
done < <(csv_each "${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS}")
|
done < <(csv_each "${RECOVERY_FLUX_CRITICAL_KUSTOMIZATIONS}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
patch_recovery_critical_helmrelease_suspend() {
|
||||||
|
local value="$1"
|
||||||
|
local ref namespace name
|
||||||
|
while IFS= read -r ref; do
|
||||||
|
namespace="${ref%%/*}"
|
||||||
|
name="${ref##*/}"
|
||||||
|
[[ -n "${namespace}" && -n "${name}" && "${namespace}" != "${name}" ]] || continue
|
||||||
|
patch_helmrelease_suspend "${namespace}" "${name}" "${value}"
|
||||||
|
done < <(csv_each "${RECOVERY_FLUX_CRITICAL_HELMRELEASES}")
|
||||||
|
}
|
||||||
|
|
||||||
recovery_flux_unsuspended_list() {
|
recovery_flux_unsuspended_list() {
|
||||||
local names=()
|
local names=()
|
||||||
local name suspend
|
local name suspend
|
||||||
@ -1153,26 +1165,32 @@ restart_kustomize_controller_for_critical_thaw() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
prepare_recovery_flux_root_apply_window() {
|
prepare_recovery_flux_critical_thaw() {
|
||||||
[[ "${EXECUTE}" -eq 1 ]] || return 0
|
[[ "${EXECUTE}" -eq 1 ]] || return 0
|
||||||
|
|
||||||
if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then
|
if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then
|
||||||
warn "Stopping kustomize-controller to create a quiet Flux root-apply window."
|
warn "Stopping kustomize-controller to create a quiet Flux critical-thaw window."
|
||||||
run kubectl -n flux-system scale deployment kustomize-controller --replicas=0
|
run kubectl -n flux-system scale deployment kustomize-controller --replicas=0
|
||||||
wait_for_kustomize_controller_scaled_down || true
|
wait_for_kustomize_controller_scaled_down || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
patch_recovery_optional_flux_suspend true
|
patch_recovery_optional_flux_suspend true
|
||||||
patch_flux_suspend_all true
|
patch_flux_suspend_all true
|
||||||
patch_kustomization_suspend flux-system false
|
|
||||||
|
|
||||||
if command -v flux >/dev/null 2>&1; then
|
if command -v flux >/dev/null 2>&1; then
|
||||||
run flux reconcile source git flux-system -n flux-system --timeout=3m || true
|
run flux reconcile source git flux-system -n flux-system --timeout=3m || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
patch_recovery_critical_flux_suspend false
|
||||||
|
patch_recovery_critical_helmrelease_suspend false
|
||||||
|
if [[ "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_SUSPEND_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then
|
||||||
|
patch_kustomization_suspend flux-system true
|
||||||
|
fi
|
||||||
|
patch_recovery_optional_flux_suspend_without_snapshot true
|
||||||
|
|
||||||
if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then
|
if kubectl -n flux-system get deployment kustomize-controller >/dev/null 2>&1; then
|
||||||
run kubectl -n flux-system scale deployment kustomize-controller --replicas=1
|
run kubectl -n flux-system scale deployment kustomize-controller --replicas=1
|
||||||
kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=2m || warn "kustomize-controller did not become Ready for Flux root apply."
|
kubectl -n flux-system rollout status deployment/kustomize-controller --timeout=2m || warn "kustomize-controller did not become Ready for Flux critical thaw."
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1582,12 +1600,13 @@ resume_deadlock_automation_after_core_recovery() {
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
prepare_recovery_flux_root_apply_window
|
prepare_recovery_flux_critical_thaw
|
||||||
if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then
|
if kubectl -n flux-system get deployment helm-controller >/dev/null 2>&1; then
|
||||||
run kubectl -n flux-system scale deployment helm-controller --replicas=1
|
run kubectl -n flux-system scale deployment helm-controller --replicas=1
|
||||||
fi
|
fi
|
||||||
if command -v flux >/dev/null 2>&1; then
|
if command -v flux >/dev/null 2>&1; then
|
||||||
if [[ "${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then
|
if [[ "${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION}" == "1" || "${RECOVERY_FLUX_APPLY_BOOTSTRAP_KUSTOMIZATION}" == "true" ]]; then
|
||||||
|
patch_kustomization_suspend flux-system false
|
||||||
run flux reconcile kustomization flux-system -n flux-system --timeout="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT}" || warn "flux-system Kustomization did not apply the recovery source revision before final suspension."
|
run flux reconcile kustomization flux-system -n flux-system --timeout="${RECOVERY_FLUX_ROOT_APPLY_TIMEOUT}" || warn "flux-system Kustomization did not apply the recovery source revision before final suspension."
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user