ananke: harden recovery checks and finalize naming migration

2026-04-07 12:30:28 -03:00 · 2026-04-07 12:30:28 -03:00 · fa160f5f9b
commit fa160f5f9b
parent 9a07aa9be9
7 changed files with 846 additions and 73 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,80 @@
 # titan-iac
-Flux-managed Kubernetes cluster for bstein.dev services.
+Flux-managed Kubernetes cluster config for bstein.dev.
 Canonical repo URL:
 - `ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
 ## Why `ananke`
 `Ananke` is inevitability and constraint. That is exactly what this tooling is for:
 - power events happen
 - recovery windows are finite
 - bootstrap has to be deterministic
 The point is not clever automation. The point is boring, repeatable recovery.
 ## Power Domains
 Two UPS domains matter during shutdown/startup drills:
 - `Statera`: `titan-23`, `titan-24`, `titan-jh`
 - `Pyrphoros`: all other nodes
 Default UPS checks in Ananke read from `Pyrphoros` (`pyrphoros@localhost`) unless overridden.
 ## Breakglass
 If primary operator access is lost, breakglass is on the remote Magic Mirror.
 ## Ananke Commands
 Ananke is the recovery orchestrator. Flux desired-state source remains `titan-iac.git`.
 Use `titan-db` as the canonical control host. `tethys` (`titan-24`) is the backup operator host.
 From `titan-db`:
 ```bash
 ~/ananke-cluster-power status
 ~/ananke-cluster-power prepare --execute
 ~/ananke-cluster-power shutdown --execute --require-ups-battery
 ~/ananke-cluster-power startup --execute --force-flux-branch main --require-ups-battery
 ```
 From `tethys` / `titan-24` (delegating to `titan-db`):
 ```bash
 ~/ananke-tools/cluster_power_console.sh --delegate-host titan-db status
 ~/ananke-tools/cluster_power_console.sh --delegate-host titan-db prepare --execute
 ~/ananke-tools/cluster_power_console.sh --delegate-host titan-db shutdown --execute --require-ups-battery
 ~/ananke-tools/cluster_power_console.sh --delegate-host titan-db startup --execute --force-flux-branch main --require-ups-battery
 ```
 ## Shutdown Modes
 `cluster_power_recovery.sh` supports two shutdown behaviors:
 - `--shutdown-mode host-poweroff` (default): graceful cluster shutdown plus scheduled host poweroff.
 - `--shutdown-mode cluster-only`: graceful cluster shutdown without host poweroff (stops `k3s` / `k3s-agent` only).
 ## Startup Completion Rules
 Ananke startup is not “done” just because Flux says green once.
 Startup now completes only after:
 - Flux source drift checks pass (expected URL and branch)
 - all non-optional Flux kustomizations report `Ready=True`
 - external service checklist passes (default includes Gitea, Grafana, Harbor)
 - generated ingress reachability checks pass (default accepted statuses: `200,301,302,307,308,401,403,404`)
 - a stability soak window passes with no `CrashLoopBackOff` / image-pull failures and checklist still healthy
 If you intentionally need to correct Flux source during recovery, use:
 - `--force-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
 - `--force-flux-branch main`
 `--force-flux-url` is breakglass-only and requires `--allow-flux-source-mutation`.
 The defaults live in:
 - `scripts/bootstrap/recovery-config.env`
 Detailed runbook:
 - `knowledge/runbooks/cluster-power-recovery.md`
--- a/dockerfiles/Dockerfile.ananke-node-helper
+++ b/dockerfiles/Dockerfile.ananke-node-helper
--- a/knowledge/runbooks/cluster-power-recovery.md
+++ b/knowledge/runbooks/cluster-power-recovery.md
@ -45,33 +45,37 @@ Execute examples
 Manual remote console examples
 - Canonical operator hosts:
  - `titan-db`
-  - `titan-24`
+  - `tethys` (`titan-24`)
 - Both hosts now have:
-  - `~/hecate-tools/cluster_power_recovery.sh`
+  - `~/ananke-tools/cluster_power_recovery.sh`
-  - `~/hecate-tools/cluster_power_console.sh`
+  - `~/ananke-tools/cluster_power_console.sh`
-  - `~/hecate-tools/bootstrap/recovery-config.env`
+  - `~/ananke-tools/bootstrap/recovery-config.env`
-  - `~/hecate-tools/bootstrap/harbor-bootstrap-images.txt`
+  - `~/ananke-tools/bootstrap/harbor-bootstrap-images.txt`
-  - `~/hecate-tools/kubeconfig`
+  - `~/ananke-tools/kubeconfig`
-  - `~/hecate-cluster-power`
+  - `~/ananke-cluster-power`
-  - `~/bin/hecate-cluster-power`
+  - `~/bin/ananke-cluster-power`
-  - `~/hecate-repo/{infrastructure,services,scripts}`
+  - `~/ananke-repo/{infrastructure,services,scripts}`
 - Both hosts also keep the Harbor bootstrap bundle at:
-  - `~/.local/share/hecate/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
+  - `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
 - Remote usage:
  - `ssh titan-db`
-  - `~/hecate-cluster-power status`
+  - `~/ananke-cluster-power status`
-  - `~/hecate-cluster-power prepare --execute`
+  - `~/ananke-cluster-power prepare --execute`
-  - `~/hecate-cluster-power shutdown --execute`
+  - `~/ananke-cluster-power shutdown --execute`
-  - `~/hecate-cluster-power startup --execute --force-flux-branch main`
+  - `~/ananke-cluster-power startup --execute --force-flux-branch main`
-  - `ssh titan-24`
+  - `ssh tethys`
-  - `~/hecate-cluster-power status`
+  - `~/ananke-cluster-power status`
-  - `~/hecate-cluster-power prepare --execute`
+  - `~/ananke-cluster-power prepare --execute`
-  - `~/hecate-cluster-power shutdown --execute`
+  - `~/ananke-cluster-power shutdown --execute`
-  - `~/hecate-cluster-power startup --execute --force-flux-branch main`
+  - `~/ananke-cluster-power startup --execute --force-flux-branch main`
 Useful options
 - `--shutdown-mode host-poweroff|cluster-only`
 - `--expected-flux-branch main`
 - `--expected-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
 - `--force-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
 - `--force-flux-branch main`
 - `--allow-flux-source-mutation` (required with `--force-flux-url`; breakglass only)
 - `--skip-local-bootstrap` (not recommended for cold-start recovery)
 - `--skip-harbor-bootstrap` (skip the Harbor recovery stage if you know Harbor should stay deferred)
 - `--skip-harbor-seed` (skip bundle import if Harbor images are already cached on the target node)
@ -81,8 +85,12 @@ Useful options
 - `--require-ups-battery`
 - `--drain-timeout 180`
 - `--emergency-drain-timeout 45`
- `--recovery-state-file ~/.local/share/hecate/cluster_power_recovery.state`
+- `--flux-ready-timeout 1200`
- `--harbor-bundle-file ~/.local/share/hecate/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
+- `--startup-checklist-timeout 900`
 - `--startup-stability-window 180`
 - `--startup-stability-timeout 900`
 - `--recovery-state-file ~/.local/share/ananke/cluster_power_recovery.state`
 - `--harbor-bundle-file ~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
 Controlled drill checklist (recommended)
 - Operator host: use `titan-db` as canonical control host for the drill.
@ -91,37 +99,48 @@ Controlled drill checklist (recommended)
  - Confirm they will manually power cluster nodes back on after shutdown completes.
  - Confirm who will announce "all nodes powered on" to resume startup.
 - Preflight on `titan-db`:
-  - `mkdir -p ~/hecate-logs`
+  - `mkdir -p ~/ananke-logs`
-  - `~/hecate-cluster-power status` and verify:
+  - `~/ananke-cluster-power status` and verify:
    - `ups_host=pyrphoros@localhost`
    - `ups_battery` is numeric
    - `flux_source_ready=True`
 - Warm helper image just before shutdown:
-  - `~/hecate-cluster-power prepare --execute`
+  - `~/ananke-cluster-power prepare --execute`
 - Run in a persistent shell and capture logs:
-  - `tmux new -s hecate-drill`
+  - `tmux new -s ananke-drill`
-  - `script -q -a ~/hecate-logs/hecate-drill-$(date +%Y%m%d-%H%M%S).log`
+  - `script -q -a ~/ananke-logs/ananke-drill-$(date +%Y%m%d-%H%M%S).log`
 - Execute controlled shutdown with telemetry enforcement:
-  - `~/hecate-cluster-power shutdown --execute --require-ups-battery`
+  - `~/ananke-cluster-power shutdown --execute --require-ups-battery`
 - After on-site power-on confirmation, execute startup:
-  - `~/hecate-cluster-power startup --execute --force-flux-branch main --require-ups-battery`
+  - `~/ananke-cluster-power startup --execute --force-flux-branch main --require-ups-battery`
 - Post-check:
-  - `~/hecate-cluster-power status`
+  - `~/ananke-cluster-power status`
  - Verify critical services (`longhorn`, `vault`, `postgres`, `gitea`, `harbor`, `pegasus`) and no widespread pull/crash failures.
 Operational notes
 - The flow suspends Flux Kustomizations/HelmReleases during shutdown to prevent churn.
 - Shutdown behavior is explicit:
  - `host-poweroff` schedules host poweroff after service stop.
  - `cluster-only` stops `k3s`/`k3s-agent` without powering hosts off.
 - Worker drain is no longer best-effort only. The script now escalates from normal drain, to `--force`, to `--disable-eviction` once the configured timeout is exhausted.
- During startup, if Flux source is not `Ready`, local bootstrap fallback is applied first using the repo snapshot under `~/hecate-repo`.
+- Startup fails fast if Flux source URL/branch drift from expected values (unless branch override is explicitly requested with `--force-flux-branch`).
 - Flux desired-state source remains `titan-iac.git`. Ananke orchestrates runtime recovery and should not be used as the normal Flux source repo.
 - During startup, if Flux source is not `Ready`, local bootstrap fallback is applied first using the repo snapshot under `~/ananke-repo`.
 - Longhorn is reconciled before Vault/Postgres/Gitea so storage-backed services are not racing the volume layer.
 - Harbor is reconciled after the first critical stateful services.
 - Harbor bootstrap is now designed around a control-host bundle:
  - Build the Harbor bundle locally with `scripts/build_harbor_bootstrap_bundle.sh`.
-  - Stage it on the operator host at `~/.local/share/hecate/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`.
+  - Stage it on the operator host at `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`.
  - Use `harbor-seed --execute` or a full `startup --execute` to stream/import that bundle onto `titan-05`.
- The Harbor bundle remains arm64-only because Harbor is pinned to arm64 nodes. The node-helper image is multi-arch because Hecate uses it across both arm64 and amd64 nodes during prepare/shutdown operations.
+- The Harbor bundle remains arm64-only because Harbor is pinned to arm64 nodes. The node-helper image is multi-arch because Ananke uses it across both arm64 and amd64 nodes during prepare/shutdown operations.
- Hecate uses a temporary privileged helper pod for host-side operations. The helper image is prewarmed with `prepare --execute` so later shutdown/startup steps do not stall on image pulls.
+- Ananke uses a temporary privileged helper pod for host-side operations. The helper image is prewarmed with `prepare --execute` so later shutdown/startup steps do not stall on image pulls.
- The script persists outage state in `~/.local/state/cluster_power_recovery.state` by default. If startup is attempted during an outage window and power becomes unstable again, rerunning startup with insufficient UPS charge will flip into the emergency shutdown path instead of continuing to bootstrap.
+- The script persists outage state in `~/.local/share/ananke/cluster_power_recovery.state` by default. If startup is attempted during an outage window and power becomes unstable again, rerunning startup with insufficient UPS charge will flip into the emergency shutdown path instead of continuing to bootstrap.
 - Startup completion is strict now:
  - all non-optional Flux kustomizations must be `Ready=True`
  - external service checklist must pass (defaults include Gitea, Grafana, Harbor)
  - generated ingress reachability checks must pass (default accepted codes: `200,301,302,307,308,401,403,404`)
  - stability soak must pass with no crashloop/pull-failure churn
 - If Flux hits immutable one-off Job drift during reconcile, Ananke now attempts self-heal by pruning failed Flux-managed Jobs and retrying reconcile.
 - In dry-run mode, the script now skips the live API wait step so preview runs do not stall on an offline cluster.
 - Dry-run mode no longer mutates outage recovery state.
 - `harbor-seed --execute` was validated by:
--- a/scripts/bootstrap/recovery-config.env
+++ b/scripts/bootstrap/recovery-config.env
@ -1,5 +1,7 @@
 CANONICAL_CONTROL_HOST="titan-db"
 DEFAULT_FLUX_BRANCH="main"
 EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git"
 SHUTDOWN_MODE="host-poweroff"
 STATE_SUBDIR=".local/share/ananke"
 HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst"
 HARBOR_TARGET_NODE=""
@ -13,3 +15,22 @@ REGISTRY_PULL_SECRET="harbor-regcred"
 BUNDLE_HTTP_PORT="8877"
 UPS_HOST="pyrphoros@localhost"
 UPS_BATTERY_KEY="battery.charge"
 FLUX_READY_TIMEOUT_SECONDS="1200"
 FLUX_READY_POLL_SECONDS="10"
 STARTUP_CHECKLIST_TIMEOUT_SECONDS="900"
 STARTUP_CHECKLIST_POLL_SECONDS="10"
 STARTUP_WORKLOAD_TIMEOUT_SECONDS="900"
 STARTUP_WORKLOAD_POLL_SECONDS="10"
 STARTUP_STABILITY_WINDOW_SECONDS="180"
 STARTUP_STABILITY_TIMEOUT_SECONDS="900"
 STARTUP_STABILITY_POLL_SECONDS="10"
 STARTUP_OPTIONAL_KUSTOMIZATIONS=""
 STARTUP_IGNORE_PODS_REGEX=""
 STARTUP_IGNORE_WORKLOADS_REGEX=""
 STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="^(kube-system|kube-public|kube-node-lease|flux-system)$"
 STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="10"
 STARTUP_INCLUDE_INGRESS_CHECKS="1"
 STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404"
 STARTUP_IGNORE_INGRESS_HOSTS_REGEX=""
 STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10"
 STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||'
--- a/scripts/build_ananke_node_helper.sh
+++ b/scripts/build_ananke_node_helper.sh
@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 set -euo pipefail
-IMAGE="registry.bstein.dev/bstein/hecate-node-helper:0.1.0"
+IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0"
 DOCKER_CONFIG_PATH=""
 PLATFORMS="linux/amd64,linux/arm64"
-BUILDER_NAME="hecate-node-helper-builder"
+BUILDER_NAME="ananke-node-helper-builder"
 while [[ $# -gt 0 ]]; do
  case "$1" in
@ -26,7 +26,7 @@ while [[ $# -gt 0 ]]; do
      ;;
    -h|--help)
      cat <<USAGE
-Usage: scripts/build_hecate_node_helper.sh [--image <image>] [--docker-config <path>] [--platforms <csv>] [--builder <name>]
+Usage: scripts/build_ananke_node_helper.sh [--image <image>] [--docker-config <path>] [--platforms <csv>] [--builder <name>]
 USAGE
      exit 0
      ;;
@ -50,7 +50,7 @@ fi
 docker buildx inspect --bootstrap >/dev/null
 docker buildx build \
  --platform "${PLATFORMS}" \
-  -f dockerfiles/Dockerfile.hecate-node-helper \
+  -f dockerfiles/Dockerfile.ananke-node-helper \
  -t "${IMAGE}" \
  --push \
  .
--- a/scripts/cluster_power_console.sh
+++ b/scripts/cluster_power_console.sh
@ -1,19 +1,20 @@
 #!/usr/bin/env bash
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 usage() {
  cat <<'USAGE'
 Usage:
  scripts/cluster_power_console.sh [--repo-dir <path>] [--delegate-host <host>] <shutdown|startup> [recovery-script-options...]
 Purpose:
-  Friendly manual entrypoint for running cluster power recovery from a remote console.
+  Friendly manual entrypoint for running Ananke from a remote console.
-  Prefer a sibling cluster_power_recovery.sh when installed as a standalone helper.
+  Canonical control host is titan-db by default so bundle/state handling stays in one place.
  Otherwise use a repo checkout if available, or delegate to another host.
 Defaults:
-  --repo-dir       $HOME/Development/titan-iac
+  --repo-dir       $HOME/Development/ananke (fallback: $HOME/Development/titan-iac)
-  --delegate-host  titan-24
+  --delegate-host  titan-db
 Examples:
  scripts/cluster_power_console.sh shutdown --execute
@ -22,9 +23,13 @@ Examples:
 USAGE
 }
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [[ -d "${HOME}/Development/ananke" ]]; then
-REPO_DIR="${HOME}/Development/titan-iac"
+  REPO_DIR="${HOME}/Development/ananke"
-DELEGATE_HOST="titan-24"
+else
  REPO_DIR="${HOME}/Development/titan-iac"
 fi
 DELEGATE_HOST="titan-db"
 REMOTE_REPO_DIR="${ANANKE_REMOTE_REPO_DIR:-}"
 while [[ $# -gt 0 ]]; do
  case "$1" in
@ -71,17 +76,12 @@ if [[ -z "${DELEGATE_HOST}" ]]; then
 fi
 quoted_args="$(printf '%q ' "$@")"
 if [[ -r "${SIBLING_SCRIPT}" ]]; then
  exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" \
    "bash -s -- ${quoted_args}" < "${SIBLING_SCRIPT}"
 fi
 if [[ -r "${REPO_SCRIPT}" ]]; then
  exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" \
    "bash -s -- ${quoted_args}" < "${REPO_SCRIPT}"
 fi
 quoted_repo_dir="$(printf '%q' "${REPO_DIR}")"
-exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" \
+
-  "cd ${quoted_repo_dir} && ./scripts/cluster_power_recovery.sh ${quoted_args}"
+remote_cmd=""
 if [[ -n "${REMOTE_REPO_DIR}" ]]; then
  remote_cmd+="ANANKE_REPO_DIR=$(printf '%q' "${REMOTE_REPO_DIR}") "
 fi
 remote_cmd+="if [ -x ~/ananke-tools/cluster_power_recovery.sh ]; then ~/ananke-tools/cluster_power_recovery.sh ${quoted_args}; elif [ -x ${quoted_repo_dir}/scripts/cluster_power_recovery.sh ]; then ${quoted_repo_dir}/scripts/cluster_power_recovery.sh ${quoted_args}; else echo 'cluster-power-console: remote recovery script not found' >&2; exit 1; fi"
 exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" "${remote_cmd}"
--- a/scripts/cluster_power_recovery.sh
+++ b/scripts/cluster_power_recovery.sh
@ -20,7 +20,11 @@ Usage:
 Options:
  --execute                     Actually run commands (default is dry-run)
  --shutdown-mode <mode>        Shutdown behavior: host-poweroff or cluster-only (default: ${SHUTDOWN_MODE:-host-poweroff})
  --expected-flux-branch <name> Expected Flux source branch during startup checks (default: ${DEFAULT_FLUX_BRANCH:-main})
  --expected-flux-url <url>     Expected Flux source URL during startup checks
  --allow-flux-source-mutation  Required to allow --force-flux-url during startup
  --force-flux-url <url>        Startup: patch flux-system GitRepository URL to this value
  --force-flux-branch <name>    Startup: patch flux-system GitRepository branch to this value
  --skip-etcd-snapshot          Shutdown: skip etcd snapshot before shutdown
  --skip-drain                  Shutdown: skip worker drain during shutdown
@ -32,6 +36,8 @@ Options:
  --ups-host <name>             UPS identifier for upsc (default: ups@localhost)
  --ups-battery-key <key>       UPS battery key for upsc (default: battery.charge)
  --recovery-state-file <path>  Recovery state file for outage-aware restart logic
  --replica-snapshot-file <path>
                               File used to persist workload replica snapshot across shutdown/startup
  --harbor-bundle-file <path>   Harbor bootstrap bundle on the control host
  --harbor-target-node <name>   Node that should host Harbor during bootstrap (default: auto)
  --harbor-canary-node <name>   Node used for Harbor pull canary (default: auto)
@ -43,6 +49,16 @@ Options:
  --drain-timeout <seconds>     Worker drain timeout for normal shutdown (default: 180)
  --emergency-drain-timeout <seconds>
                               Worker drain timeout for emergency fallback (default: 45)
  --flux-ready-timeout <seconds>
                               Startup: max time to wait for Flux kustomizations Ready (default: 1200)
  --startup-checklist-timeout <seconds>
                               Startup: max time to wait for external service checklist (default: 900)
  --startup-workload-timeout <seconds>
                               Startup: max time to wait for workload readiness checks (default: 900)
  --startup-stability-window <seconds>
                               Startup: continuous healthy window required before success (default: 180)
  --startup-stability-timeout <seconds>
                               Startup: max time allowed to achieve the healthy window (default: 900)
  --require-ups-battery         Hard-fail startup if UPS battery cannot be read
  -h, --help                    Show help
@ -72,7 +88,11 @@ case "${MODE}" in
 esac
 EXECUTE=0
 SHUTDOWN_MODE="${SHUTDOWN_MODE:-host-poweroff}"
 EXPECTED_FLUX_BRANCH="${DEFAULT_FLUX_BRANCH:-main}"
 EXPECTED_FLUX_URL="${EXPECTED_FLUX_URL:-ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git}"
 ALLOW_FLUX_SOURCE_MUTATION=0
 FORCE_FLUX_URL=""
 FORCE_FLUX_BRANCH=""
 SKIP_ETCD_SNAPSHOT=0
 SKIP_DRAIN=0
@ -87,9 +107,30 @@ REQUIRE_UPS_BATTERY="${REQUIRE_UPS_BATTERY:-0}"
 DRAIN_TIMEOUT_SECONDS=180
 EMERGENCY_DRAIN_TIMEOUT_SECONDS=45
 API_WAIT_TIMEOUT_SECONDS=600
 FLUX_READY_TIMEOUT_SECONDS="${FLUX_READY_TIMEOUT_SECONDS:-1200}"
 FLUX_READY_POLL_SECONDS="${FLUX_READY_POLL_SECONDS:-10}"
 STARTUP_CHECKLIST_TIMEOUT_SECONDS="${STARTUP_CHECKLIST_TIMEOUT_SECONDS:-900}"
 STARTUP_CHECKLIST_POLL_SECONDS="${STARTUP_CHECKLIST_POLL_SECONDS:-10}"
 STARTUP_WORKLOAD_TIMEOUT_SECONDS="${STARTUP_WORKLOAD_TIMEOUT_SECONDS:-900}"
 STARTUP_WORKLOAD_POLL_SECONDS="${STARTUP_WORKLOAD_POLL_SECONDS:-10}"
 STARTUP_STABILITY_WINDOW_SECONDS="${STARTUP_STABILITY_WINDOW_SECONDS:-180}"
 STARTUP_STABILITY_TIMEOUT_SECONDS="${STARTUP_STABILITY_TIMEOUT_SECONDS:-900}"
 STARTUP_STABILITY_POLL_SECONDS="${STARTUP_STABILITY_POLL_SECONDS:-10}"
 STARTUP_IGNORE_PODS_REGEX="${STARTUP_IGNORE_PODS_REGEX:-}"
 STARTUP_IGNORE_WORKLOADS_REGEX="${STARTUP_IGNORE_WORKLOADS_REGEX:-}"
 STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system)$}"
 STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}"
 STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS:-10}"
 STARTUP_SERVICE_CHECKLIST="${STARTUP_SERVICE_CHECKLIST:-}"
 STARTUP_INCLUDE_INGRESS_CHECKS="${STARTUP_INCLUDE_INGRESS_CHECKS:-1}"
 STARTUP_INGRESS_ALLOWED_STATUSES="${STARTUP_INGRESS_ALLOWED_STATUSES:-200,301,302,307,308,401,403,404}"
 STARTUP_IGNORE_INGRESS_HOSTS_REGEX="${STARTUP_IGNORE_INGRESS_HOSTS_REGEX:-}"
 STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS:-10}"
 SHUTDOWN_NAMESPACE_EXCLUDES_REGEX="${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$}"
 BUNDLE_HTTP_PORT="${BUNDLE_HTTP_PORT:-8877}"
 STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/ananke}"
 RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state"
 REPLICA_SNAPSHOT_FILE="${STATE_ROOT}/desired_workload_replicas.tsv"
 HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}"
 HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-}"
 HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-}"
@ -114,10 +155,26 @@ while [[ $# -gt 0 ]]; do
      EXECUTE=1
      shift
      ;;
    --shutdown-mode)
      SHUTDOWN_MODE="${2:?missing shutdown mode}"
      shift 2
      ;;
    --expected-flux-branch)
      EXPECTED_FLUX_BRANCH="${2:?missing branch}"
      shift 2
      ;;
    --expected-flux-url)
      EXPECTED_FLUX_URL="${2:?missing flux url}"
      shift 2
      ;;
    --allow-flux-source-mutation)
      ALLOW_FLUX_SOURCE_MUTATION=1
      shift
      ;;
    --force-flux-url)
      FORCE_FLUX_URL="${2:?missing flux url}"
      shift 2
      ;;
    --force-flux-branch)
      FORCE_FLUX_BRANCH="${2:?missing branch}"
      shift 2
@ -166,6 +223,10 @@ while [[ $# -gt 0 ]]; do
      RECOVERY_STATE_FILE="${2:?missing state file path}"
      shift 2
      ;;
    --replica-snapshot-file)
      REPLICA_SNAPSHOT_FILE="${2:?missing replica snapshot file path}"
      shift 2
      ;;
    --harbor-bundle-file)
      HARBOR_BUNDLE_FILE="${2:?missing bundle file path}"
      shift 2
@ -198,6 +259,26 @@ while [[ $# -gt 0 ]]; do
      API_WAIT_TIMEOUT_SECONDS="${2:?missing api wait timeout}"
      shift 2
      ;;
    --flux-ready-timeout)
      FLUX_READY_TIMEOUT_SECONDS="${2:?missing flux ready timeout}"
      shift 2
      ;;
    --startup-checklist-timeout)
      STARTUP_CHECKLIST_TIMEOUT_SECONDS="${2:?missing startup checklist timeout}"
      shift 2
      ;;
    --startup-workload-timeout)
      STARTUP_WORKLOAD_TIMEOUT_SECONDS="${2:?missing startup workload timeout}"
      shift 2
      ;;
    --startup-stability-window)
      STARTUP_STABILITY_WINDOW_SECONDS="${2:?missing startup stability window}"
      shift 2
      ;;
    --startup-stability-timeout)
      STARTUP_STABILITY_TIMEOUT_SECONDS="${2:?missing startup stability timeout}"
      shift 2
      ;;
    --drain-timeout)
      DRAIN_TIMEOUT_SECONDS="${2:?missing drain timeout}"
      shift 2
@ -218,6 +299,19 @@ while [[ $# -gt 0 ]]; do
  esac
 done
 case "${SHUTDOWN_MODE}" in
  host-poweroff|cluster-only) ;;
  *)
    echo "Invalid --shutdown-mode '${SHUTDOWN_MODE}'. Expected host-poweroff or cluster-only." >&2
    exit 1
    ;;
 esac
 if [[ -n "${FORCE_FLUX_URL}" && "${ALLOW_FLUX_SOURCE_MUTATION}" -ne 1 ]]; then
  echo "--force-flux-url requires --allow-flux-source-mutation (breakglass)." >&2
  exit 1
 fi
 require_cmd() {
  local cmd="$1"
  if ! command -v "${cmd}" >/dev/null 2>&1; then
@ -376,10 +470,335 @@ report_flux_source_state() {
  [[ -n "${flux_url}" ]] && log "flux-source-url=${flux_url}"
  if [[ -n "${flux_branch}" ]]; then
    log "flux-source-branch=${flux_branch}"
-    if [[ "${MODE}" == "startup" && -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then
+  fi
-      warn "Flux source branch is '${flux_branch}'. Expected '${EXPECTED_FLUX_BRANCH}' for canonical recovery."
+}
 csv_has_value() {
  local csv="$1"
  local value="$2"
  local needle=",${value},"
  local haystack=",${csv},"
  [[ "${haystack}" == *"${needle}"* ]]
 }
 assert_flux_source_expected() {
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: skipping strict Flux source drift guard"
    return 0
  fi
  local flux_url flux_branch
  flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)"
  flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)"
  [[ -n "${flux_url}" ]] || die "Unable to read Flux source URL from flux-system/gitrepository."
  [[ -n "${flux_branch}" ]] || die "Unable to read Flux source branch from flux-system/gitrepository."
  if [[ -n "${EXPECTED_FLUX_URL}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then
    die "Flux source URL drift detected: got '${flux_url}', expected '${EXPECTED_FLUX_URL}'. Refusing startup."
  fi
  if [[ -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then
    die "Flux source branch drift detected: got '${flux_branch}', expected '${EXPECTED_FLUX_BRANCH}'. Use --force-flux-branch to correct."
  fi
 }
 kustomization_is_optional() {
  local name="$1"
  [[ -n "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" ]] || return 1
  csv_has_value "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" "${name}"
 }
 list_not_ready_kustomizations() {
  local rows line name ready message
  rows="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io \
    -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status,MESSAGE:.status.conditions[?(@.type=="Ready")].message' \
    --no-headers 2>/dev/null || true)"
  [[ -n "${rows}" ]] || return 0
  while IFS= read -r line; do
    [[ -n "${line}" ]] || continue
    name="$(awk '{print $1}' <<< "${line}")"
    ready="$(awk '{print $2}' <<< "${line}")"
    message="${line#${name} }"
    message="${message#${ready} }"
    if kustomization_is_optional "${name}"; then
      continue
    fi
    if [[ "${ready}" != "True" ]]; then
      printf '%s|%s\n' "${name}" "${message}"
    fi
  done <<< "${rows}"
 }
 trigger_flux_reconcile_all() {
  local now
  now="$(date --iso-8601=seconds)"
  run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="${now}" --overwrite
  if command -v flux >/dev/null 2>&1; then
    run flux reconcile source git flux-system -n flux-system --timeout=3m
  fi
 }
 heal_failed_flux_jobs() {
  local rows line ns name failed flux_owner helm_owner healed
  healed=0
  rows="$(kubectl get jobs.batch -A \
    -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,FAILED:.status.failed,FLUX_OWNER:.metadata.labels.kustomize\\.toolkit\\.fluxcd\\.io/name,HELM_OWNER:.metadata.labels.helm\\.toolkit\\.fluxcd\\.io/name \
    --no-headers 2>/dev/null || true)"
  [[ -n "${rows}" ]] || return 1
  while IFS= read -r line; do
    [[ -n "${line}" ]] || continue
    ns="$(awk '{print $1}' <<< "${line}")"
    name="$(awk '{print $2}' <<< "${line}")"
    failed="$(awk '{print $3}' <<< "${line}")"
    flux_owner="$(awk '{print $4}' <<< "${line}")"
    helm_owner="$(awk '{print $5}' <<< "${line}")"
    [[ "${failed}" != "<none>" ]] || continue
    [[ "${failed}" =~ ^[0-9]+$ ]] || continue
    (( failed > 0 )) || continue
    if [[ "${flux_owner}" == "<none>" && "${helm_owner}" == "<none>" ]]; then
      continue
    fi
    warn "Deleting failed Flux-managed Job ${ns}/${name} to heal immutable-template drift."
    run kubectl -n "${ns}" delete job "${name}" --ignore-not-found
    healed=1
  done <<< "${rows}"
  (( healed == 1 ))
 }
 wait_for_flux_kustomizations_ready() {
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: skipping wait for all Flux kustomizations Ready"
    return 0
  fi
  local start now not_ready immutable_hits
  start="$(date +%s)"
  immutable_hits=0
  while true; do
    not_ready="$(list_not_ready_kustomizations || true)"
    if [[ -z "${not_ready}" ]]; then
      log "flux-kustomizations=all-ready"
      return 0
    fi
    log "flux-kustomizations-not-ready:"
    while IFS= read -r line; do
      [[ -n "${line}" ]] || continue
      log "  ${line}"
    done <<< "${not_ready}"
    if grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${not_ready}"; then
      if (( immutable_hits < 3 )); then
        immutable_hits=$(( immutable_hits + 1 ))
        warn "Detected immutable Job failure signal in Flux status. Attempting automated Job cleanup (${immutable_hits}/3)."
        if heal_failed_flux_jobs; then
          trigger_flux_reconcile_all
        fi
      fi
    fi
    now="$(date +%s)"
    if (( now - start >= FLUX_READY_TIMEOUT_SECONDS )); then
      die "Timed out waiting for Flux kustomizations Ready after ${FLUX_READY_TIMEOUT_SECONDS}s."
    fi
    sleep "${FLUX_READY_POLL_SECONDS}"
  done
 }
 default_startup_service_checklist() {
  cat <<'CHECKS'
 gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||
 grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||
 harbor|https://registry.bstein.dev/v2/|200,401|||
 CHECKS
 }
 list_ingress_hosts() {
  kubectl get ingress -A -o jsonpath='{range .items[*]}{range .spec.rules[*]}{.host}{"\n"}{end}{end}' 2>/dev/null \
    | sed '/^[[:space:]]*$/d' \
    | sort -u
 }
 generated_ingress_service_checks() {
  local host
  while IFS= read -r host; do
    [[ -n "${host}" ]] || continue
    if [[ -n "${STARTUP_IGNORE_INGRESS_HOSTS_REGEX}" ]] && [[ "${host}" =~ ${STARTUP_IGNORE_INGRESS_HOSTS_REGEX} ]]; then
      continue
    fi
    printf 'ingress-%s|https://%s/|%s|||0|%s\n' "${host}" "${host}" "${STARTUP_INGRESS_ALLOWED_STATUSES}" "${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS}"
  done < <(list_ingress_hosts)
 }
 startup_service_checklist_rows() {
  local base
  if [[ -n "${STARTUP_SERVICE_CHECKLIST}" ]]; then
    base="$(printf '%s' "${STARTUP_SERVICE_CHECKLIST}" | tr ';' '\n')"
  else
    base="$(default_startup_service_checklist)"
  fi
  printf '%s\n' "${base}" | sed '/^[[:space:]]*$/d'
  if [[ "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "1" || "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "true" ]]; then
    generated_ingress_service_checks
  fi
 }
 service_status_allowed() {
  local expected_csv="$1"
  local got="$2"
  local token
  IFS=',' read -r -a _statuses <<< "${expected_csv}"
  for token in "${_statuses[@]}"; do
    if [[ "${token}" == "${got}" ]]; then
      return 0
    fi
  done
  return 1
 }
 check_startup_service_checklist_once() {
  local rows row name url expected body_must body_must_not insecure timeout code rc
  local body_file failures
  failures=0
  rows="$(startup_service_checklist_rows)"
  while IFS= read -r row; do
    [[ -n "${row}" ]] || continue
    IFS='|' read -r name url expected body_must body_must_not insecure timeout <<< "${row}"
    [[ -n "${name}" && -n "${url}" && -n "${expected}" ]] || continue
    [[ -n "${insecure}" ]] || insecure=0
    [[ -n "${timeout}" ]] || timeout="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}"
    body_file="$(mktemp)"
    rc=0
    if [[ "${insecure}" == "1" || "${insecure}" == "true" ]]; then
      code="$(curl -ksS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)"
    else
      code="$(curl -sS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)"
    fi
    if (( rc != 0 )); then
      warn "startup-check ${name}: request failed (rc=${rc}) url=${url}"
      failures=1
      rm -f "${body_file}"
      continue
    fi
    if ! service_status_allowed "${expected}" "${code}"; then
      warn "startup-check ${name}: expected status ${expected}, got ${code} url=${url}"
      failures=1
      rm -f "${body_file}"
      continue
    fi
    if [[ -n "${body_must}" ]] && ! grep -Fq -- "${body_must}" "${body_file}"; then
      warn "startup-check ${name}: missing required body fragment '${body_must}'"
      failures=1
      rm -f "${body_file}"
      continue
    fi
    if [[ -n "${body_must_not}" ]] && grep -Fq -- "${body_must_not}" "${body_file}"; then
      warn "startup-check ${name}: forbidden body fragment '${body_must_not}' present"
      failures=1
      rm -f "${body_file}"
      continue
    fi
    rm -f "${body_file}"
  done <<< "${rows}"
  (( failures == 0 ))
 }
 wait_for_startup_service_checklist() {
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: skipping startup external service checklist wait"
    return 0
  fi
  local start now checklist_ok workloads_ok
  start="$(date +%s)"
  while true; do
    checklist_ok=0
    workloads_ok=0
    if check_startup_service_checklist_once; then
      checklist_ok=1
    fi
    if list_unhealthy_workloads | sed '/^[[:space:]]*$/d' | grep -q .; then
      workloads_ok=0
    else
      workloads_ok=1
    fi
    if (( checklist_ok == 1 && workloads_ok == 1 )); then
      log "startup-checklist=all-passed"
      return 0
    fi
    if (( workloads_ok == 0 )); then
      warn "startup-checklist: workloads are not fully ready yet."
    fi
    now="$(date +%s)"
    if (( now - start >= STARTUP_CHECKLIST_TIMEOUT_SECONDS )); then
      die "Timed out waiting for startup external checklist after ${STARTUP_CHECKLIST_TIMEOUT_SECONDS}s."
    fi
    sleep "${STARTUP_CHECKLIST_POLL_SECONDS}"
  done
 }
 collect_unstable_pods() {
  local rows
  rows="$(kubectl get pods -A --no-headers 2>/dev/null \
    | awk '$4 ~ /(CrashLoopBackOff|ImagePullBackOff|ErrImagePull|CreateContainerConfigError|RunContainerError|InvalidImageName)/ {print $1 "/" $2 "|" $4}' || true)"
  if [[ -n "${STARTUP_IGNORE_PODS_REGEX}" ]]; then
    rows="$(printf '%s\n' "${rows}" | grep -Ev "${STARTUP_IGNORE_PODS_REGEX}" || true)"
  fi
  printf '%s' "${rows}"
 }
 wait_for_startup_stability_window() {
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: skipping startup stability window"
    return 0
  fi
  local hard_deadline stable_since now unstable pods not_ready unhealthy_workloads
  stable_since="$(date +%s)"
  hard_deadline=$(( stable_since + STARTUP_STABILITY_TIMEOUT_SECONDS ))
  while true; do
    unstable=0
    not_ready="$(list_not_ready_kustomizations || true)"
    if [[ -n "${not_ready}" ]]; then
      unstable=1
      warn "stability-window: Flux kustomizations not ready."
    fi
    pods="$(collect_unstable_pods || true)"
    if [[ -n "${pods}" ]]; then
      unstable=1
      warn "stability-window: unstable pods detected."
      while IFS= read -r line; do
        [[ -n "${line}" ]] || continue
        warn "  ${line}"
      done <<< "${pods}"
    fi
    if ! check_startup_service_checklist_once; then
      unstable=1
      warn "stability-window: external service checklist failed."
    fi
    unhealthy_workloads="$(list_unhealthy_workloads || true)"
    if [[ -n "${unhealthy_workloads}" ]]; then
      unstable=1
      warn "stability-window: workloads not fully ready."
      while IFS= read -r line; do
        [[ -n "${line}" ]] || continue
        warn "  ${line}"
      done <<< "${unhealthy_workloads}"
    fi
    now="$(date +%s)"
    if (( unstable == 0 )); then
      if (( now - stable_since >= STARTUP_STABILITY_WINDOW_SECONDS )); then
        log "startup-stability-window=passed (${STARTUP_STABILITY_WINDOW_SECONDS}s)"
        return 0
      fi
    else
      stable_since="${now}"
    fi
    if (( now >= hard_deadline )); then
      die "Timed out waiting for startup stability window (${STARTUP_STABILITY_WINDOW_SECONDS}s healthy) within ${STARTUP_STABILITY_TIMEOUT_SECONDS}s."
    fi
    sleep "${STARTUP_STABILITY_POLL_SECONDS}"
  done
 }
 wait_for_api() {
@ -423,13 +842,22 @@ patch_flux_suspend_all() {
  done <<< "${hr_list}"
 }
 shutdown_namespace_excluded() {
  local ns="$1"
  [[ "${ns}" =~ ${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX} ]]
 }
 startup_workload_namespace_excluded() {
  local ns="$1"
  [[ "${ns}" =~ ${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX} ]]
 }
 best_effort_scale_down_apps() {
-  local excludes='^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$'
+  local ns_list ns
  local ns_list
  ns_list="$(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
  while IFS= read -r ns; do
    [[ -z "${ns}" ]] && continue
-    if [[ "${ns}" =~ ${excludes} ]]; then
+    if shutdown_namespace_excluded "${ns}"; then
      continue
    fi
    run_shell "kubectl -n ${ns} scale deployment --all --replicas=0 || true"
@ -437,11 +865,126 @@ best_effort_scale_down_apps() {
  done <<< "${ns_list}"
 }
 save_workload_replica_snapshot() {
  local rows line ns kind name replicas
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: save workload replica snapshot to ${REPLICA_SNAPSHOT_FILE}"
    return 0
  fi
  rows="$(
    {
      kubectl get deployment -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tdeployment\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true
      kubectl get statefulset -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tstatefulset\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true
    } | sed '/^[[:space:]]*$/d'
  )"
  mkdir -p "$(dirname "${REPLICA_SNAPSHOT_FILE}")"
  : > "${REPLICA_SNAPSHOT_FILE}"
  while IFS=$'\t' read -r ns kind name replicas; do
    [[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${replicas}" ]] || continue
    shutdown_namespace_excluded "${ns}" && continue
    [[ "${replicas}" =~ ^[0-9]+$ ]] || continue
    (( replicas > 0 )) || continue
    printf '%s\t%s\t%s\t%s\n' "${ns}" "${kind}" "${name}" "${replicas}" >> "${REPLICA_SNAPSHOT_FILE}"
  done <<< "${rows}"
  log "replica-snapshot-file=${REPLICA_SNAPSHOT_FILE}"
  log "replica-snapshot-count=$(wc -l < "${REPLICA_SNAPSHOT_FILE}" | tr -d ' ')"
 }
 restore_workload_replica_snapshot() {
  local ns kind name desired current
  if [[ "${RECOVERY_PENDING}" -ne 1 ]]; then
    log "Skipping replica restore because recovery_pending=0."
    return 0
  fi
  if [[ ! -f "${REPLICA_SNAPSHOT_FILE}" ]]; then
    warn "Replica snapshot file not found at ${REPLICA_SNAPSHOT_FILE}; skipping replica restore."
    return 0
  fi
  while IFS=$'\t' read -r ns kind name desired; do
    [[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue
    [[ "${desired}" =~ ^[0-9]+$ ]] || continue
    (( desired > 0 )) || continue
    current="$(kubectl -n "${ns}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
    [[ -n "${current}" ]] || continue
    [[ "${current}" =~ ^[0-9]+$ ]] || current=0
    if (( current == desired )); then
      continue
    fi
    run kubectl -n "${ns}" scale "${kind}" "${name}" --replicas="${desired}"
  done < "${REPLICA_SNAPSHOT_FILE}"
  mark_checkpoint startup_replicas_restored
 }
 list_unhealthy_workloads() {
  local rows line ns name desired ready available
  rows="$(kubectl get deployment -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas,AVAILABLE:.status.availableReplicas --no-headers 2>/dev/null || true)"
  while IFS= read -r line; do
    [[ -n "${line}" ]] || continue
    ns="$(awk '{print $1}' <<< "${line}")"
    name="$(awk '{print $2}' <<< "${line}")"
    desired="$(awk '{print $3}' <<< "${line}")"
    ready="$(awk '{print $4}' <<< "${line}")"
    available="$(awk '{print $5}' <<< "${line}")"
    startup_workload_namespace_excluded "${ns}" && continue
    [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue
    [[ "${desired}" =~ ^[0-9]+$ ]] || desired=0
    [[ "${ready}" =~ ^[0-9]+$ ]] || ready=0
    [[ "${available}" =~ ^[0-9]+$ ]] || available=0
    (( desired > 0 )) || continue
    if (( ready < desired || available < desired )); then
      printf '%s/deployment/%s|ready=%s available=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${available}" "${desired}"
    fi
  done <<< "${rows}"
  rows="$(kubectl get statefulset -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas --no-headers 2>/dev/null || true)"
  while IFS= read -r line; do
    [[ -n "${line}" ]] || continue
    ns="$(awk '{print $1}' <<< "${line}")"
    name="$(awk '{print $2}' <<< "${line}")"
    desired="$(awk '{print $3}' <<< "${line}")"
    ready="$(awk '{print $4}' <<< "${line}")"
    startup_workload_namespace_excluded "${ns}" && continue
    [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue
    [[ "${desired}" =~ ^[0-9]+$ ]] || desired=0
    [[ "${ready}" =~ ^[0-9]+$ ]] || ready=0
    (( desired > 0 )) || continue
    if (( ready < desired )); then
      printf '%s/statefulset/%s|ready=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${desired}"
    fi
  done <<< "${rows}"
 }
 wait_for_startup_workloads_ready() {
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: skipping startup workload readiness checks"
    return 0
  fi
  local start now unhealthy
  start="$(date +%s)"
  while true; do
    unhealthy="$(list_unhealthy_workloads || true)"
    if [[ -z "${unhealthy}" ]]; then
      log "startup-workloads=all-ready"
      return 0
    fi
    warn "startup-workloads-not-ready:"
    while IFS= read -r line; do
      [[ -n "${line}" ]] || continue
      warn "  ${line}"
    done <<< "${unhealthy}"
    now="$(date +%s)"
    if (( now - start >= STARTUP_WORKLOAD_TIMEOUT_SECONDS )); then
      die "Timed out waiting for startup workloads Ready after ${STARTUP_WORKLOAD_TIMEOUT_SECONDS}s."
    fi
    sleep "${STARTUP_WORKLOAD_POLL_SECONDS}"
  done
 }
 discover_workers_csv() {
  kubectl get nodes \
-    -o custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\\.kubernetes\\.io/control-plane,MASTER:.metadata.labels.node-role\\.kubernetes\\.io/master \
+    -o 'custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\.kubernetes\.io/control-plane,MASTER:.metadata.labels.node-role\.kubernetes\.io/master,READY:.status.conditions[?(@.type=="Ready")].status' \
    --no-headers \
-    | awk '$2=="<none>" && $3=="<none>" {print $1}' \
+    | awk '$2=="<none>" && $3=="<none>" && $4=="True" {print $1}' \
    | paste -sd, -
 }
@ -770,12 +1313,37 @@ schedule_host_shutdown_via_helper() {
  run_host_command_via_helper "${node}" "shutdown-${node}-${service_name}" 120 "${host_command}"
 }
 schedule_host_service_stop_via_helper() {
  local node="$1"
  local service_name="$2"
  local delay_seconds="$3"
  local host_command
  host_command="/usr/bin/systemd-run --unit ananke-stop-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true'"
  if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then
    return 0
  fi
  run_host_command_via_helper "${node}" "stop-${node}-${service_name}" 120 "${host_command}"
 }
 prewarm_node_helper_image() {
  local name="${NODE_HELPER_PREWARM_DS}"
  local ready_nodes node
  local node_affinity_block=""
  if [[ "${EXECUTE}" -eq 0 ]]; then
    log "DRY-RUN: prewarm ${NODE_HELPER_IMAGE} via temporary DaemonSet"
    return 0
  fi
  ready_nodes="$(kubectl get nodes -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status' --no-headers 2>/dev/null | awk '$2=="True" {print $1}' || true)"
  if [[ -n "${ready_nodes}" ]]; then
    node_affinity_block=$'      affinity:\n        nodeAffinity:\n          requiredDuringSchedulingIgnoredDuringExecution:\n            nodeSelectorTerms:\n            - matchExpressions:\n              - key: kubernetes.io/hostname\n                operator: In\n                values:'
    while IFS= read -r node; do
      [[ -z "${node}" ]] && continue
      node_affinity_block+=$'\n'"                - ${node}"
    done <<< "${ready_nodes}"
    log "node-helper-prewarm-targets=$(printf '%s' "${ready_nodes}" | paste -sd, -)"
  else
    warn "Unable to detect Ready nodes for prewarm targeting; continuing without node affinity."
  fi
  cat <<DS | kubectl apply -f -
 apiVersion: apps/v1
 kind: DaemonSet
@ -793,6 +1361,7 @@ spec:
    spec:
      imagePullSecrets:
        - name: ${REGISTRY_PULL_SECRET}
 ${node_affinity_block}
      tolerations:
        - operator: Exists
      containers:
@ -915,6 +1484,34 @@ bootstrap_local_harbor() {
  apply_kustomization services/harbor
 }
 reconcile_kustomization_with_self_heal() {
  local item="$1"
  if [[ "${EXECUTE}" -eq 0 ]]; then
    run flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m
    return 0
  fi
  local attempt output rc
  for attempt in 1 2; do
    set +e
    output="$(flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m 2>&1)"
    rc=$?
    set -e
    if (( rc == 0 )); then
      [[ -n "${output}" ]] && printf '%s\n' "${output}"
      return 0
    fi
    [[ -n "${output}" ]] && printf '%s\n' "${output}" >&2
    if (( attempt == 1 )) && grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${output}"; then
      warn "Flux reconcile for '${item}' failed due immutable Job/template signal. Attempting self-heal."
      heal_failed_flux_jobs || true
      trigger_flux_reconcile_all || true
      sleep 5
      continue
    fi
    return "${rc}"
  done
 }
 reconcile_stage() {
  local stage_name="$1"
  shift
@ -926,7 +1523,7 @@ reconcile_stage() {
  fi
  local item
  for item in "$@"; do
-    run flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m
+    reconcile_kustomization_with_self_heal "${item}"
  done
  mark_checkpoint "reconciled_${stage_name}"
 }
@ -950,11 +1547,22 @@ resume_flux_and_reconcile() {
 }
 status_report() {
-  local battery flux_ready harbor_code workers
+  local battery flux_ready flux_url flux_branch flux_url_drift flux_branch_drift harbor_code workers ingress_hosts_count
  local effective_target effective_canary
  local labeled_nodes
  battery="$(read_ups_battery || true)"
  flux_ready="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
  flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)"
  flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)"
  flux_url_drift=false
  flux_branch_drift=false
  if [[ -n "${EXPECTED_FLUX_URL}" && -n "${flux_url}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then
    flux_url_drift=true
  fi
  if [[ -n "${EXPECTED_FLUX_BRANCH}" && -n "${flux_branch}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then
    flux_branch_drift=true
  fi
  ingress_hosts_count="$(list_ingress_hosts | sed '/^[[:space:]]*$/d' | wc -l | tr -d ' ')"
  harbor_code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
  workers="$(discover_workers_csv 2>/dev/null || true)"
  effective_target="${HARBOR_TARGET_NODE}"
@ -966,8 +1574,11 @@ status_report() {
    effective_canary="${effective_target}"
  fi
  echo "mode=status"
  echo "shutdown_mode=${SHUTDOWN_MODE}"
  echo "bundle_file=${HARBOR_BUNDLE_FILE}"
  echo "bundle_present=$([[ -f "${HARBOR_BUNDLE_FILE}" ]] && echo true || echo false)"
  echo "replica_snapshot_file=${REPLICA_SNAPSHOT_FILE}"
  echo "replica_snapshot_present=$([[ -f "${REPLICA_SNAPSHOT_FILE}" ]] && echo true || echo false)"
  echo "node_helper_image=${NODE_HELPER_IMAGE}"
  echo "harbor_target_node=${effective_target:-unknown}"
  echo "harbor_canary_node=${effective_canary:-unknown}"
@ -981,7 +1592,14 @@ status_report() {
  echo "last_checkpoint=${LAST_CHECKPOINT}"
  echo "ups_host=${UPS_HOST_IN_USE:-${UPS_HOST}}"
  echo "ups_battery=${battery:-unknown}"
  echo "flux_source_expected_url=${EXPECTED_FLUX_URL}"
  echo "flux_source_expected_branch=${EXPECTED_FLUX_BRANCH}"
  echo "flux_source_actual_url=${flux_url:-unknown}"
  echo "flux_source_actual_branch=${flux_branch:-unknown}"
  echo "flux_source_url_drift=${flux_url_drift}"
  echo "flux_source_branch_drift=${flux_branch_drift}"
  echo "flux_source_ready=${flux_ready:-unknown}"
  echo "ingress_hosts_count=${ingress_hosts_count}"
  echo "harbor_http=${harbor_code:-unknown}"
  kubectl get ingressclass traefik >/dev/null 2>&1 && echo "traefik_ingressclass=true" || echo "traefik_ingressclass=false"
  kubectl -n traefik get deploy traefik >/dev/null 2>&1 && echo "traefik_deploy=true" || echo "traefik_deploy=false"
@ -1017,6 +1635,9 @@ planned_shutdown() {
    warn "Skipping etcd snapshot by request."
  fi
  save_workload_replica_snapshot
  mark_checkpoint shutdown_replicas_snapshot
  patch_flux_suspend_all true
  best_effort_scale_down_apps
  mark_checkpoint shutdown_apps_scaled_down
@ -1029,21 +1650,39 @@ planned_shutdown() {
  fi
  local node
  if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
    warn "shutdown-mode=cluster-only: stopping k3s services only; host poweroff is disabled."
  else
    log "shutdown-mode=host-poweroff: scheduling host poweroff after service stop."
  fi
  for node in "${WORKER_NODES[@]}"; do
    [[ -z "${node}" ]] && continue
    if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
      schedule_host_service_stop_via_helper "${node}" k3s-agent 20
    else
      schedule_host_shutdown_via_helper "${node}" k3s-agent 20
    fi
  done
  mark_checkpoint shutdown_workers_scheduled
  for node in "${CONTROL_PLANE_NODES[@]}"; do
    [[ -z "${node}" ]] && continue
    if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
      schedule_host_service_stop_via_helper "${node}" k3s 45
    else
      schedule_host_shutdown_via_helper "${node}" k3s 45
    fi
  done
  if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
    cleanup_prewarm_daemonset
  fi
  mark_checkpoint shutdown_control_planes_scheduled
-  log "Shutdown actions scheduled on hosts."
+  if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
    log "Cluster-only shutdown actions scheduled (hosts remain powered on)."
  else
    log "Shutdown + host poweroff actions scheduled on hosts."
  fi
 }
 emergency_shutdown_after_outage() {
@ -1081,11 +1720,19 @@ startup_flow() {
  ensure_harbor_host_label
  mark_checkpoint startup_harbor_host_labeled
  if [[ -n "${FORCE_FLUX_URL}" ]]; then
    warn "Breakglass: forcing Flux source URL to '${FORCE_FLUX_URL}'."
    run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"url\":\"${FORCE_FLUX_URL}\"}}"
    mark_checkpoint startup_flux_url_forced
  fi
  if [[ -n "${FORCE_FLUX_BRANCH}" ]]; then
    run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"ref\":{\"branch\":\"${FORCE_FLUX_BRANCH}\"}}}"
    mark_checkpoint startup_flux_branch_forced
  fi
  assert_flux_source_expected
  if [[ "${SKIP_LOCAL_BOOTSTRAP}" -eq 0 ]]; then
    if ! kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q True; then
      warn "Flux source not Ready; executing local bootstrap fallback path."
@ -1123,6 +1770,11 @@ startup_flow() {
  fi
  resume_flux_and_reconcile
  wait_for_flux_kustomizations_ready
  restore_workload_replica_snapshot
  wait_for_startup_workloads_ready
  wait_for_startup_service_checklist
  wait_for_startup_stability_window
  if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
    prewarm_node_helper_image
    mark_checkpoint startup_helper_prewarmed
@ -1157,12 +1809,16 @@ harbor_seed_flow() {
 load_recovery_state
 log "mode=${MODE} execute=${EXECUTE}"
 log "shutdown-mode=${SHUTDOWN_MODE}"
 log "recovery-state-file=${RECOVERY_STATE_FILE}"
 log "bundle-file=${HARBOR_BUNDLE_FILE}"
 log "node-helper-image=${NODE_HELPER_IMAGE}"
 log "harbor-target-node-config=${HARBOR_TARGET_NODE:-auto}"
 log "harbor-canary-node-config=${HARBOR_CANARY_NODE:-auto}"
 log "harbor-host-label-key=${HARBOR_HOST_LABEL_KEY}"
 log "expected-flux-url=${EXPECTED_FLUX_URL}"
 log "expected-flux-branch=${EXPECTED_FLUX_BRANCH}"
 log "startup-optional-kustomizations=${STARTUP_OPTIONAL_KUSTOMIZATIONS:-none}"
 report_flux_source_state
 case "${MODE}" in