diff --git a/README.md b/README.md index 15dc3778..859ac256 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,80 @@ # titan-iac -Flux-managed Kubernetes cluster for bstein.dev services. +Flux-managed Kubernetes cluster config for bstein.dev. + +Canonical repo URL: +- `ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git` + +## Why `ananke` + +`Ananke` is inevitability and constraint. That is exactly what this tooling is for: +- power events happen +- recovery windows are finite +- bootstrap has to be deterministic + +The point is not clever automation. The point is boring, repeatable recovery. + +## Power Domains + +Two UPS domains matter during shutdown/startup drills: +- `Statera`: `titan-23`, `titan-24`, `titan-jh` +- `Pyrphoros`: all other nodes + +Default UPS checks in Ananke read from `Pyrphoros` (`pyrphoros@localhost`) unless overridden. + +## Breakglass + +If primary operator access is lost, breakglass is on the remote Magic Mirror. + +## Ananke Commands + +Ananke is the recovery orchestrator. Flux desired-state source remains `titan-iac.git`. + +Use `titan-db` as the canonical control host. `tethys` (`titan-24`) is the backup operator host. + +From `titan-db`: + +```bash +~/ananke-cluster-power status +~/ananke-cluster-power prepare --execute +~/ananke-cluster-power shutdown --execute --require-ups-battery +~/ananke-cluster-power startup --execute --force-flux-branch main --require-ups-battery +``` + +From `tethys` / `titan-24` (delegating to `titan-db`): + +```bash +~/ananke-tools/cluster_power_console.sh --delegate-host titan-db status +~/ananke-tools/cluster_power_console.sh --delegate-host titan-db prepare --execute +~/ananke-tools/cluster_power_console.sh --delegate-host titan-db shutdown --execute --require-ups-battery +~/ananke-tools/cluster_power_console.sh --delegate-host titan-db startup --execute --force-flux-branch main --require-ups-battery +``` + +## Shutdown Modes + +`cluster_power_recovery.sh` supports two shutdown behaviors: +- `--shutdown-mode host-poweroff` (default): graceful cluster shutdown plus scheduled host poweroff. +- `--shutdown-mode cluster-only`: graceful cluster shutdown without host poweroff (stops `k3s` / `k3s-agent` only). + +## Startup Completion Rules + +Ananke startup is not “done” just because Flux says green once. + +Startup now completes only after: +- Flux source drift checks pass (expected URL and branch) +- all non-optional Flux kustomizations report `Ready=True` +- external service checklist passes (default includes Gitea, Grafana, Harbor) +- generated ingress reachability checks pass (default accepted statuses: `200,301,302,307,308,401,403,404`) +- a stability soak window passes with no `CrashLoopBackOff` / image-pull failures and checklist still healthy + +If you intentionally need to correct Flux source during recovery, use: +- `--force-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git` +- `--force-flux-branch main` + +`--force-flux-url` is breakglass-only and requires `--allow-flux-source-mutation`. + +The defaults live in: +- `scripts/bootstrap/recovery-config.env` + +Detailed runbook: +- `knowledge/runbooks/cluster-power-recovery.md` diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 7593064c..a793e031 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -9,7 +9,7 @@ metadata: spec: interval: 1m0s ref: - branch: feature/atlasbot + branch: main secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git diff --git a/dockerfiles/Dockerfile.hecate-node-helper b/dockerfiles/Dockerfile.ananke-node-helper similarity index 100% rename from dockerfiles/Dockerfile.hecate-node-helper rename to dockerfiles/Dockerfile.ananke-node-helper diff --git a/knowledge/runbooks/cluster-power-recovery.md b/knowledge/runbooks/cluster-power-recovery.md index eb4ab395..67a10d3d 100644 --- a/knowledge/runbooks/cluster-power-recovery.md +++ b/knowledge/runbooks/cluster-power-recovery.md @@ -45,33 +45,37 @@ Execute examples Manual remote console examples - Canonical operator hosts: - `titan-db` - - `titan-24` + - `tethys` (`titan-24`) - Both hosts now have: - - `~/hecate-tools/cluster_power_recovery.sh` - - `~/hecate-tools/cluster_power_console.sh` - - `~/hecate-tools/bootstrap/recovery-config.env` - - `~/hecate-tools/bootstrap/harbor-bootstrap-images.txt` - - `~/hecate-tools/kubeconfig` - - `~/hecate-cluster-power` - - `~/bin/hecate-cluster-power` - - `~/hecate-repo/{infrastructure,services,scripts}` + - `~/ananke-tools/cluster_power_recovery.sh` + - `~/ananke-tools/cluster_power_console.sh` + - `~/ananke-tools/bootstrap/recovery-config.env` + - `~/ananke-tools/bootstrap/harbor-bootstrap-images.txt` + - `~/ananke-tools/kubeconfig` + - `~/ananke-cluster-power` + - `~/bin/ananke-cluster-power` + - `~/ananke-repo/{infrastructure,services,scripts}` - Both hosts also keep the Harbor bootstrap bundle at: - - `~/.local/share/hecate/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst` + - `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst` - Remote usage: - `ssh titan-db` - - `~/hecate-cluster-power status` - - `~/hecate-cluster-power prepare --execute` - - `~/hecate-cluster-power shutdown --execute` - - `~/hecate-cluster-power startup --execute --force-flux-branch main` - - `ssh titan-24` - - `~/hecate-cluster-power status` - - `~/hecate-cluster-power prepare --execute` - - `~/hecate-cluster-power shutdown --execute` - - `~/hecate-cluster-power startup --execute --force-flux-branch main` + - `~/ananke-cluster-power status` + - `~/ananke-cluster-power prepare --execute` + - `~/ananke-cluster-power shutdown --execute` + - `~/ananke-cluster-power startup --execute --force-flux-branch main` + - `ssh tethys` + - `~/ananke-cluster-power status` + - `~/ananke-cluster-power prepare --execute` + - `~/ananke-cluster-power shutdown --execute` + - `~/ananke-cluster-power startup --execute --force-flux-branch main` Useful options +- `--shutdown-mode host-poweroff|cluster-only` - `--expected-flux-branch main` +- `--expected-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git` +- `--force-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git` - `--force-flux-branch main` +- `--allow-flux-source-mutation` (required with `--force-flux-url`; breakglass only) - `--skip-local-bootstrap` (not recommended for cold-start recovery) - `--skip-harbor-bootstrap` (skip the Harbor recovery stage if you know Harbor should stay deferred) - `--skip-harbor-seed` (skip bundle import if Harbor images are already cached on the target node) @@ -81,8 +85,12 @@ Useful options - `--require-ups-battery` - `--drain-timeout 180` - `--emergency-drain-timeout 45` -- `--recovery-state-file ~/.local/share/hecate/cluster_power_recovery.state` -- `--harbor-bundle-file ~/.local/share/hecate/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst` +- `--flux-ready-timeout 1200` +- `--startup-checklist-timeout 900` +- `--startup-stability-window 180` +- `--startup-stability-timeout 900` +- `--recovery-state-file ~/.local/share/ananke/cluster_power_recovery.state` +- `--harbor-bundle-file ~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst` Controlled drill checklist (recommended) - Operator host: use `titan-db` as canonical control host for the drill. @@ -91,37 +99,48 @@ Controlled drill checklist (recommended) - Confirm they will manually power cluster nodes back on after shutdown completes. - Confirm who will announce "all nodes powered on" to resume startup. - Preflight on `titan-db`: - - `mkdir -p ~/hecate-logs` - - `~/hecate-cluster-power status` and verify: + - `mkdir -p ~/ananke-logs` + - `~/ananke-cluster-power status` and verify: - `ups_host=pyrphoros@localhost` - `ups_battery` is numeric - `flux_source_ready=True` - Warm helper image just before shutdown: - - `~/hecate-cluster-power prepare --execute` + - `~/ananke-cluster-power prepare --execute` - Run in a persistent shell and capture logs: - - `tmux new -s hecate-drill` - - `script -q -a ~/hecate-logs/hecate-drill-$(date +%Y%m%d-%H%M%S).log` + - `tmux new -s ananke-drill` + - `script -q -a ~/ananke-logs/ananke-drill-$(date +%Y%m%d-%H%M%S).log` - Execute controlled shutdown with telemetry enforcement: - - `~/hecate-cluster-power shutdown --execute --require-ups-battery` + - `~/ananke-cluster-power shutdown --execute --require-ups-battery` - After on-site power-on confirmation, execute startup: - - `~/hecate-cluster-power startup --execute --force-flux-branch main --require-ups-battery` + - `~/ananke-cluster-power startup --execute --force-flux-branch main --require-ups-battery` - Post-check: - - `~/hecate-cluster-power status` + - `~/ananke-cluster-power status` - Verify critical services (`longhorn`, `vault`, `postgres`, `gitea`, `harbor`, `pegasus`) and no widespread pull/crash failures. Operational notes - The flow suspends Flux Kustomizations/HelmReleases during shutdown to prevent churn. +- Shutdown behavior is explicit: + - `host-poweroff` schedules host poweroff after service stop. + - `cluster-only` stops `k3s`/`k3s-agent` without powering hosts off. - Worker drain is no longer best-effort only. The script now escalates from normal drain, to `--force`, to `--disable-eviction` once the configured timeout is exhausted. -- During startup, if Flux source is not `Ready`, local bootstrap fallback is applied first using the repo snapshot under `~/hecate-repo`. +- Startup fails fast if Flux source URL/branch drift from expected values (unless branch override is explicitly requested with `--force-flux-branch`). +- Flux desired-state source remains `titan-iac.git`. Ananke orchestrates runtime recovery and should not be used as the normal Flux source repo. +- During startup, if Flux source is not `Ready`, local bootstrap fallback is applied first using the repo snapshot under `~/ananke-repo`. - Longhorn is reconciled before Vault/Postgres/Gitea so storage-backed services are not racing the volume layer. - Harbor is reconciled after the first critical stateful services. - Harbor bootstrap is now designed around a control-host bundle: - Build the Harbor bundle locally with `scripts/build_harbor_bootstrap_bundle.sh`. - - Stage it on the operator host at `~/.local/share/hecate/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`. + - Stage it on the operator host at `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`. - Use `harbor-seed --execute` or a full `startup --execute` to stream/import that bundle onto `titan-05`. -- The Harbor bundle remains arm64-only because Harbor is pinned to arm64 nodes. The node-helper image is multi-arch because Hecate uses it across both arm64 and amd64 nodes during prepare/shutdown operations. -- Hecate uses a temporary privileged helper pod for host-side operations. The helper image is prewarmed with `prepare --execute` so later shutdown/startup steps do not stall on image pulls. -- The script persists outage state in `~/.local/state/cluster_power_recovery.state` by default. If startup is attempted during an outage window and power becomes unstable again, rerunning startup with insufficient UPS charge will flip into the emergency shutdown path instead of continuing to bootstrap. +- The Harbor bundle remains arm64-only because Harbor is pinned to arm64 nodes. The node-helper image is multi-arch because Ananke uses it across both arm64 and amd64 nodes during prepare/shutdown operations. +- Ananke uses a temporary privileged helper pod for host-side operations. The helper image is prewarmed with `prepare --execute` so later shutdown/startup steps do not stall on image pulls. +- The script persists outage state in `~/.local/share/ananke/cluster_power_recovery.state` by default. If startup is attempted during an outage window and power becomes unstable again, rerunning startup with insufficient UPS charge will flip into the emergency shutdown path instead of continuing to bootstrap. +- Startup completion is strict now: + - all non-optional Flux kustomizations must be `Ready=True` + - external service checklist must pass (defaults include Gitea, Grafana, Harbor) + - generated ingress reachability checks must pass (default accepted codes: `200,301,302,307,308,401,403,404`) + - stability soak must pass with no crashloop/pull-failure churn +- If Flux hits immutable one-off Job drift during reconcile, Ananke now attempts self-heal by pruning failed Flux-managed Jobs and retrying reconcile. - In dry-run mode, the script now skips the live API wait step so preview runs do not stall on an offline cluster. - Dry-run mode no longer mutates outage recovery state. - `harbor-seed --execute` was validated by: diff --git a/scripts/bootstrap/recovery-config.env b/scripts/bootstrap/recovery-config.env index c2f789d9..a8bae234 100644 --- a/scripts/bootstrap/recovery-config.env +++ b/scripts/bootstrap/recovery-config.env @@ -1,14 +1,36 @@ CANONICAL_CONTROL_HOST="titan-db" DEFAULT_FLUX_BRANCH="main" -STATE_SUBDIR=".local/share/hecate" +EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git" +SHUTDOWN_MODE="host-poweroff" +STATE_SUBDIR=".local/share/ananke" HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst" -HARBOR_TARGET_NODE="titan-05" -HARBOR_CANARY_NODE="titan-04" +HARBOR_TARGET_NODE="" +HARBOR_CANARY_NODE="" +HARBOR_HOST_LABEL_KEY="ananke.bstein.dev/harbor-bootstrap" HARBOR_CANARY_IMAGE="registry.bstein.dev/bstein/kubectl:1.35.0" -NODE_HELPER_IMAGE="registry.bstein.dev/bstein/hecate-node-helper:0.1.0" +NODE_HELPER_IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0" NODE_HELPER_NAMESPACE="maintenance" NODE_HELPER_SERVICE_ACCOUNT="default" REGISTRY_PULL_SECRET="harbor-regcred" BUNDLE_HTTP_PORT="8877" UPS_HOST="pyrphoros@localhost" UPS_BATTERY_KEY="battery.charge" +FLUX_READY_TIMEOUT_SECONDS="1200" +FLUX_READY_POLL_SECONDS="10" +STARTUP_CHECKLIST_TIMEOUT_SECONDS="900" +STARTUP_CHECKLIST_POLL_SECONDS="10" +STARTUP_WORKLOAD_TIMEOUT_SECONDS="900" +STARTUP_WORKLOAD_POLL_SECONDS="10" +STARTUP_STABILITY_WINDOW_SECONDS="180" +STARTUP_STABILITY_TIMEOUT_SECONDS="900" +STARTUP_STABILITY_POLL_SECONDS="10" +STARTUP_OPTIONAL_KUSTOMIZATIONS="" +STARTUP_IGNORE_PODS_REGEX="" +STARTUP_IGNORE_WORKLOADS_REGEX="" +STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="^(kube-system|kube-public|kube-node-lease|flux-system)$" +STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="10" +STARTUP_INCLUDE_INGRESS_CHECKS="1" +STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404" +STARTUP_IGNORE_INGRESS_HOSTS_REGEX="" +STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10" +STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||' diff --git a/scripts/build_hecate_node_helper.sh b/scripts/build_ananke_node_helper.sh similarity index 83% rename from scripts/build_hecate_node_helper.sh rename to scripts/build_ananke_node_helper.sh index 148f51c5..8d03b22d 100755 --- a/scripts/build_hecate_node_helper.sh +++ b/scripts/build_ananke_node_helper.sh @@ -1,10 +1,10 @@ #!/usr/bin/env bash set -euo pipefail -IMAGE="registry.bstein.dev/bstein/hecate-node-helper:0.1.0" +IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0" DOCKER_CONFIG_PATH="" PLATFORMS="linux/amd64,linux/arm64" -BUILDER_NAME="hecate-node-helper-builder" +BUILDER_NAME="ananke-node-helper-builder" while [[ $# -gt 0 ]]; do case "$1" in @@ -26,7 +26,7 @@ while [[ $# -gt 0 ]]; do ;; -h|--help) cat <] [--docker-config ] [--platforms ] [--builder ] +Usage: scripts/build_ananke_node_helper.sh [--image ] [--docker-config ] [--platforms ] [--builder ] USAGE exit 0 ;; @@ -50,7 +50,7 @@ fi docker buildx inspect --bootstrap >/dev/null docker buildx build \ --platform "${PLATFORMS}" \ - -f dockerfiles/Dockerfile.hecate-node-helper \ + -f dockerfiles/Dockerfile.ananke-node-helper \ -t "${IMAGE}" \ --push \ . diff --git a/scripts/cluster_power_console.sh b/scripts/cluster_power_console.sh index 6bc8cd7e..6d8ce679 100755 --- a/scripts/cluster_power_console.sh +++ b/scripts/cluster_power_console.sh @@ -7,11 +7,11 @@ Usage: scripts/cluster_power_console.sh [--repo-dir ] [--delegate-host ] [--allow-local] [recovery-script-options...] Purpose: - Friendly manual entrypoint for running Hecate from a remote console. + Friendly manual entrypoint for running Ananke from a remote console. The canonical control host is titan-db by default so bundle/state handling stays in one place. Defaults: - --repo-dir \$HOME/Development/titan-iac + --repo-dir \$HOME/Development/ananke (fallback: \$HOME/Development/titan-iac) --delegate-host titan-db Examples: @@ -22,10 +22,14 @@ Examples: USAGE } -REPO_DIR="${HOME}/Development/titan-iac" +if [[ -d "${HOME}/Development/ananke" ]]; then + REPO_DIR="${HOME}/Development/ananke" +else + REPO_DIR="${HOME}/Development/titan-iac" +fi DELEGATE_HOST="titan-db" ALLOW_LOCAL=0 -REMOTE_REPO_DIR="${HECATE_REMOTE_REPO_DIR:-}" +REMOTE_REPO_DIR="${ANANKE_REMOTE_REPO_DIR:-}" while [[ $# -gt 0 ]]; do case "$1" in @@ -73,6 +77,6 @@ fi quoted_args="$(printf '%q ' "$@")" remote_prefix="" if [[ -n "${REMOTE_REPO_DIR}" ]]; then - remote_prefix="HECATE_REPO_DIR=$(printf '%q' "${REMOTE_REPO_DIR}") " + remote_prefix="ANANKE_REPO_DIR=$(printf '%q' "${REMOTE_REPO_DIR}") " fi -exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" "${remote_prefix}~/hecate-tools/cluster_power_recovery.sh ${quoted_args}" +exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" "${remote_prefix}~/ananke-tools/cluster_power_recovery.sh ${quoted_args}" diff --git a/scripts/cluster_power_recovery.sh b/scripts/cluster_power_recovery.sh index 9efafff1..0081a0e7 100755 --- a/scripts/cluster_power_recovery.sh +++ b/scripts/cluster_power_recovery.sh @@ -2,7 +2,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_DIR="${HECATE_REPO_DIR:-$(cd "${SCRIPT_DIR}/.." && pwd)}" +REPO_DIR="${ANANKE_REPO_DIR:-$(cd "${SCRIPT_DIR}/.." && pwd)}" BOOTSTRAP_DIR="${SCRIPT_DIR}/bootstrap" CONFIG_FILE="${BOOTSTRAP_DIR}/recovery-config.env" if [[ -f "${CONFIG_FILE}" ]]; then @@ -20,7 +20,11 @@ Usage: Options: --execute Actually run commands (default is dry-run) + --shutdown-mode Shutdown behavior: host-poweroff or cluster-only (default: ${SHUTDOWN_MODE:-host-poweroff}) --expected-flux-branch Expected Flux source branch during startup checks (default: ${DEFAULT_FLUX_BRANCH:-main}) + --expected-flux-url Expected Flux source URL during startup checks + --allow-flux-source-mutation Required to allow --force-flux-url during startup + --force-flux-url Startup: patch flux-system GitRepository URL to this value --force-flux-branch Startup: patch flux-system GitRepository branch to this value --skip-etcd-snapshot Shutdown: skip etcd snapshot before shutdown --skip-drain Shutdown: skip worker drain during shutdown @@ -32,15 +36,29 @@ Options: --ups-host UPS identifier for upsc (default: ups@localhost) --ups-battery-key UPS battery key for upsc (default: battery.charge) --recovery-state-file Recovery state file for outage-aware restart logic + --replica-snapshot-file + File used to persist workload replica snapshot across shutdown/startup --harbor-bundle-file Harbor bootstrap bundle on the control host - --harbor-target-node Node that should host Harbor during bootstrap (default: ${HARBOR_TARGET_NODE:-titan-05}) + --harbor-target-node Node that should host Harbor during bootstrap (default: auto) + --harbor-canary-node Node used for Harbor pull canary (default: auto) + --harbor-host-label-key Node label key used to pin Harbor bootstrap workloads (default: ${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}) --harbor-canary-image Harbor-backed image used for pull canary (default: ${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0}) - --node-helper-image Privileged helper image used for host operations (default: ${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/hecate-node-helper:0.1.0}) + --node-helper-image Privileged helper image used for host operations (default: ${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/ananke-node-helper:0.1.0}) --bundle-http-port Temporary HTTP port used to serve bootstrap bundles (default: ${BUNDLE_HTTP_PORT:-8877}) --api-wait-timeout Startup: Kubernetes API wait timeout (default: 600) --drain-timeout Worker drain timeout for normal shutdown (default: 180) --emergency-drain-timeout Worker drain timeout for emergency fallback (default: 45) + --flux-ready-timeout + Startup: max time to wait for Flux kustomizations Ready (default: 1200) + --startup-checklist-timeout + Startup: max time to wait for external service checklist (default: 900) + --startup-workload-timeout + Startup: max time to wait for workload readiness checks (default: 900) + --startup-stability-window + Startup: continuous healthy window required before success (default: 180) + --startup-stability-timeout + Startup: max time allowed to achieve the healthy window (default: 900) --require-ups-battery Hard-fail startup if UPS battery cannot be read -h, --help Show help @@ -70,7 +88,11 @@ case "${MODE}" in esac EXECUTE=0 +SHUTDOWN_MODE="${SHUTDOWN_MODE:-host-poweroff}" EXPECTED_FLUX_BRANCH="${DEFAULT_FLUX_BRANCH:-main}" +EXPECTED_FLUX_URL="${EXPECTED_FLUX_URL:-ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git}" +ALLOW_FLUX_SOURCE_MUTATION=0 +FORCE_FLUX_URL="" FORCE_FLUX_BRANCH="" SKIP_ETCD_SNAPSHOT=0 SKIP_DRAIN=0 @@ -85,17 +107,41 @@ REQUIRE_UPS_BATTERY="${REQUIRE_UPS_BATTERY:-0}" DRAIN_TIMEOUT_SECONDS=180 EMERGENCY_DRAIN_TIMEOUT_SECONDS=45 API_WAIT_TIMEOUT_SECONDS=600 +FLUX_READY_TIMEOUT_SECONDS="${FLUX_READY_TIMEOUT_SECONDS:-1200}" +FLUX_READY_POLL_SECONDS="${FLUX_READY_POLL_SECONDS:-10}" +STARTUP_CHECKLIST_TIMEOUT_SECONDS="${STARTUP_CHECKLIST_TIMEOUT_SECONDS:-900}" +STARTUP_CHECKLIST_POLL_SECONDS="${STARTUP_CHECKLIST_POLL_SECONDS:-10}" +STARTUP_WORKLOAD_TIMEOUT_SECONDS="${STARTUP_WORKLOAD_TIMEOUT_SECONDS:-900}" +STARTUP_WORKLOAD_POLL_SECONDS="${STARTUP_WORKLOAD_POLL_SECONDS:-10}" +STARTUP_STABILITY_WINDOW_SECONDS="${STARTUP_STABILITY_WINDOW_SECONDS:-180}" +STARTUP_STABILITY_TIMEOUT_SECONDS="${STARTUP_STABILITY_TIMEOUT_SECONDS:-900}" +STARTUP_STABILITY_POLL_SECONDS="${STARTUP_STABILITY_POLL_SECONDS:-10}" +STARTUP_IGNORE_PODS_REGEX="${STARTUP_IGNORE_PODS_REGEX:-}" +STARTUP_IGNORE_WORKLOADS_REGEX="${STARTUP_IGNORE_WORKLOADS_REGEX:-}" +STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system)$}" +STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}" +STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS:-10}" +STARTUP_SERVICE_CHECKLIST="${STARTUP_SERVICE_CHECKLIST:-}" +STARTUP_INCLUDE_INGRESS_CHECKS="${STARTUP_INCLUDE_INGRESS_CHECKS:-1}" +STARTUP_INGRESS_ALLOWED_STATUSES="${STARTUP_INGRESS_ALLOWED_STATUSES:-200,301,302,307,308,401,403,404}" +STARTUP_IGNORE_INGRESS_HOSTS_REGEX="${STARTUP_IGNORE_INGRESS_HOSTS_REGEX:-}" +STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS:-10}" +SHUTDOWN_NAMESPACE_EXCLUDES_REGEX="${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$}" BUNDLE_HTTP_PORT="${BUNDLE_HTTP_PORT:-8877}" -STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/hecate}" +STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/ananke}" RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state" +REPLICA_SNAPSHOT_FILE="${STATE_ROOT}/desired_workload_replicas.tsv" HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}" -HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-titan-05}" -HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-titan-04}" +HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-}" +HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-}" +HARBOR_HOST_LABEL_KEY="${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}" HARBOR_CANARY_IMAGE="${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0}" -NODE_HELPER_IMAGE="${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/hecate-node-helper:0.1.0}" +NODE_HELPER_IMAGE="${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/ananke-node-helper:0.1.0}" NODE_HELPER_NAMESPACE="${NODE_HELPER_NAMESPACE:-maintenance}" NODE_HELPER_SERVICE_ACCOUNT="${NODE_HELPER_SERVICE_ACCOUNT:-default}" +NODE_HELPER_PREWARM_DS="${NODE_HELPER_PREWARM_DS:-ananke-node-helper-prewarm}" REGISTRY_PULL_SECRET="${REGISTRY_PULL_SECRET:-harbor-regcred}" +KEEP_PREWARM_DAEMONSET=0 RECOVERY_PENDING=0 STARTUP_ATTEMPTED_DURING_OUTAGE=0 @@ -109,10 +155,26 @@ while [[ $# -gt 0 ]]; do EXECUTE=1 shift ;; + --shutdown-mode) + SHUTDOWN_MODE="${2:?missing shutdown mode}" + shift 2 + ;; --expected-flux-branch) EXPECTED_FLUX_BRANCH="${2:?missing branch}" shift 2 ;; + --expected-flux-url) + EXPECTED_FLUX_URL="${2:?missing flux url}" + shift 2 + ;; + --allow-flux-source-mutation) + ALLOW_FLUX_SOURCE_MUTATION=1 + shift + ;; + --force-flux-url) + FORCE_FLUX_URL="${2:?missing flux url}" + shift 2 + ;; --force-flux-branch) FORCE_FLUX_BRANCH="${2:?missing branch}" shift 2 @@ -161,6 +223,10 @@ while [[ $# -gt 0 ]]; do RECOVERY_STATE_FILE="${2:?missing state file path}" shift 2 ;; + --replica-snapshot-file) + REPLICA_SNAPSHOT_FILE="${2:?missing replica snapshot file path}" + shift 2 + ;; --harbor-bundle-file) HARBOR_BUNDLE_FILE="${2:?missing bundle file path}" shift 2 @@ -169,6 +235,14 @@ while [[ $# -gt 0 ]]; do HARBOR_TARGET_NODE="${2:?missing harbor target node}" shift 2 ;; + --harbor-canary-node) + HARBOR_CANARY_NODE="${2:?missing harbor canary node}" + shift 2 + ;; + --harbor-host-label-key) + HARBOR_HOST_LABEL_KEY="${2:?missing harbor host label key}" + shift 2 + ;; --harbor-canary-image) HARBOR_CANARY_IMAGE="${2:?missing canary image}" shift 2 @@ -185,6 +259,26 @@ while [[ $# -gt 0 ]]; do API_WAIT_TIMEOUT_SECONDS="${2:?missing api wait timeout}" shift 2 ;; + --flux-ready-timeout) + FLUX_READY_TIMEOUT_SECONDS="${2:?missing flux ready timeout}" + shift 2 + ;; + --startup-checklist-timeout) + STARTUP_CHECKLIST_TIMEOUT_SECONDS="${2:?missing startup checklist timeout}" + shift 2 + ;; + --startup-workload-timeout) + STARTUP_WORKLOAD_TIMEOUT_SECONDS="${2:?missing startup workload timeout}" + shift 2 + ;; + --startup-stability-window) + STARTUP_STABILITY_WINDOW_SECONDS="${2:?missing startup stability window}" + shift 2 + ;; + --startup-stability-timeout) + STARTUP_STABILITY_TIMEOUT_SECONDS="${2:?missing startup stability timeout}" + shift 2 + ;; --drain-timeout) DRAIN_TIMEOUT_SECONDS="${2:?missing drain timeout}" shift 2 @@ -205,6 +299,19 @@ while [[ $# -gt 0 ]]; do esac done +case "${SHUTDOWN_MODE}" in + host-poweroff|cluster-only) ;; + *) + echo "Invalid --shutdown-mode '${SHUTDOWN_MODE}'. Expected host-poweroff or cluster-only." >&2 + exit 1 + ;; +esac + +if [[ -n "${FORCE_FLUX_URL}" && "${ALLOW_FLUX_SOURCE_MUTATION}" -ne 1 ]]; then + echo "--force-flux-url requires --allow-flux-source-mutation (breakglass)." >&2 + exit 1 +fi + require_cmd() { local cmd="$1" if ! command -v "${cmd}" >/dev/null 2>&1; then @@ -363,12 +470,337 @@ report_flux_source_state() { [[ -n "${flux_url}" ]] && log "flux-source-url=${flux_url}" if [[ -n "${flux_branch}" ]]; then log "flux-source-branch=${flux_branch}" - if [[ "${MODE}" == "startup" && -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then - warn "Flux source branch is '${flux_branch}'. Expected '${EXPECTED_FLUX_BRANCH}' for canonical recovery." - fi fi } +csv_has_value() { + local csv="$1" + local value="$2" + local needle=",${value}," + local haystack=",${csv}," + [[ "${haystack}" == *"${needle}"* ]] +} + +assert_flux_source_expected() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: skipping strict Flux source drift guard" + return 0 + fi + local flux_url flux_branch + flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)" + flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)" + [[ -n "${flux_url}" ]] || die "Unable to read Flux source URL from flux-system/gitrepository." + [[ -n "${flux_branch}" ]] || die "Unable to read Flux source branch from flux-system/gitrepository." + + if [[ -n "${EXPECTED_FLUX_URL}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then + die "Flux source URL drift detected: got '${flux_url}', expected '${EXPECTED_FLUX_URL}'. Refusing startup." + fi + if [[ -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then + die "Flux source branch drift detected: got '${flux_branch}', expected '${EXPECTED_FLUX_BRANCH}'. Use --force-flux-branch to correct." + fi +} + +kustomization_is_optional() { + local name="$1" + [[ -n "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" ]] || return 1 + csv_has_value "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" "${name}" +} + +list_not_ready_kustomizations() { + local rows line name ready message + rows="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io \ + -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status,MESSAGE:.status.conditions[?(@.type=="Ready")].message' \ + --no-headers 2>/dev/null || true)" + [[ -n "${rows}" ]] || return 0 + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + name="$(awk '{print $1}' <<< "${line}")" + ready="$(awk '{print $2}' <<< "${line}")" + message="${line#${name} }" + message="${message#${ready} }" + if kustomization_is_optional "${name}"; then + continue + fi + if [[ "${ready}" != "True" ]]; then + printf '%s|%s\n' "${name}" "${message}" + fi + done <<< "${rows}" +} + +trigger_flux_reconcile_all() { + local now + now="$(date --iso-8601=seconds)" + run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="${now}" --overwrite + if command -v flux >/dev/null 2>&1; then + run flux reconcile source git flux-system -n flux-system --timeout=3m + fi +} + +heal_failed_flux_jobs() { + local rows line ns name failed flux_owner helm_owner healed + healed=0 + rows="$(kubectl get jobs.batch -A \ + -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,FAILED:.status.failed,FLUX_OWNER:.metadata.labels.kustomize\\.toolkit\\.fluxcd\\.io/name,HELM_OWNER:.metadata.labels.helm\\.toolkit\\.fluxcd\\.io/name \ + --no-headers 2>/dev/null || true)" + [[ -n "${rows}" ]] || return 1 + + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + ns="$(awk '{print $1}' <<< "${line}")" + name="$(awk '{print $2}' <<< "${line}")" + failed="$(awk '{print $3}' <<< "${line}")" + flux_owner="$(awk '{print $4}' <<< "${line}")" + helm_owner="$(awk '{print $5}' <<< "${line}")" + [[ "${failed}" != "" ]] || continue + [[ "${failed}" =~ ^[0-9]+$ ]] || continue + (( failed > 0 )) || continue + if [[ "${flux_owner}" == "" && "${helm_owner}" == "" ]]; then + continue + fi + warn "Deleting failed Flux-managed Job ${ns}/${name} to heal immutable-template drift." + run kubectl -n "${ns}" delete job "${name}" --ignore-not-found + healed=1 + done <<< "${rows}" + + (( healed == 1 )) +} + +wait_for_flux_kustomizations_ready() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: skipping wait for all Flux kustomizations Ready" + return 0 + fi + local start now not_ready immutable_hits + start="$(date +%s)" + immutable_hits=0 + while true; do + not_ready="$(list_not_ready_kustomizations || true)" + if [[ -z "${not_ready}" ]]; then + log "flux-kustomizations=all-ready" + return 0 + fi + + log "flux-kustomizations-not-ready:" + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + log " ${line}" + done <<< "${not_ready}" + + if grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${not_ready}"; then + if (( immutable_hits < 3 )); then + immutable_hits=$(( immutable_hits + 1 )) + warn "Detected immutable Job failure signal in Flux status. Attempting automated Job cleanup (${immutable_hits}/3)." + if heal_failed_flux_jobs; then + trigger_flux_reconcile_all + fi + fi + fi + + now="$(date +%s)" + if (( now - start >= FLUX_READY_TIMEOUT_SECONDS )); then + die "Timed out waiting for Flux kustomizations Ready after ${FLUX_READY_TIMEOUT_SECONDS}s." + fi + sleep "${FLUX_READY_POLL_SECONDS}" + done +} + +default_startup_service_checklist() { + cat <<'CHECKS' +gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"|| +grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"|| +harbor|https://registry.bstein.dev/v2/|200,401||| +CHECKS +} + +list_ingress_hosts() { + kubectl get ingress -A -o jsonpath='{range .items[*]}{range .spec.rules[*]}{.host}{"\n"}{end}{end}' 2>/dev/null \ + | sed '/^[[:space:]]*$/d' \ + | sort -u +} + +generated_ingress_service_checks() { + local host + while IFS= read -r host; do + [[ -n "${host}" ]] || continue + if [[ -n "${STARTUP_IGNORE_INGRESS_HOSTS_REGEX}" ]] && [[ "${host}" =~ ${STARTUP_IGNORE_INGRESS_HOSTS_REGEX} ]]; then + continue + fi + printf 'ingress-%s|https://%s/|%s|||0|%s\n' "${host}" "${host}" "${STARTUP_INGRESS_ALLOWED_STATUSES}" "${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS}" + done < <(list_ingress_hosts) +} + +startup_service_checklist_rows() { + local base + if [[ -n "${STARTUP_SERVICE_CHECKLIST}" ]]; then + base="$(printf '%s' "${STARTUP_SERVICE_CHECKLIST}" | tr ';' '\n')" + else + base="$(default_startup_service_checklist)" + fi + + printf '%s\n' "${base}" | sed '/^[[:space:]]*$/d' + if [[ "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "1" || "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "true" ]]; then + generated_ingress_service_checks + fi +} + +service_status_allowed() { + local expected_csv="$1" + local got="$2" + local token + IFS=',' read -r -a _statuses <<< "${expected_csv}" + for token in "${_statuses[@]}"; do + if [[ "${token}" == "${got}" ]]; then + return 0 + fi + done + return 1 +} + +check_startup_service_checklist_once() { + local rows row name url expected body_must body_must_not insecure timeout code rc + local body_file failures + failures=0 + rows="$(startup_service_checklist_rows)" + while IFS= read -r row; do + [[ -n "${row}" ]] || continue + IFS='|' read -r name url expected body_must body_must_not insecure timeout <<< "${row}" + [[ -n "${name}" && -n "${url}" && -n "${expected}" ]] || continue + [[ -n "${insecure}" ]] || insecure=0 + [[ -n "${timeout}" ]] || timeout="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}" + body_file="$(mktemp)" + rc=0 + if [[ "${insecure}" == "1" || "${insecure}" == "true" ]]; then + code="$(curl -ksS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)" + else + code="$(curl -sS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)" + fi + if (( rc != 0 )); then + warn "startup-check ${name}: request failed (rc=${rc}) url=${url}" + failures=1 + rm -f "${body_file}" + continue + fi + if ! service_status_allowed "${expected}" "${code}"; then + warn "startup-check ${name}: expected status ${expected}, got ${code} url=${url}" + failures=1 + rm -f "${body_file}" + continue + fi + if [[ -n "${body_must}" ]] && ! grep -Fq -- "${body_must}" "${body_file}"; then + warn "startup-check ${name}: missing required body fragment '${body_must}'" + failures=1 + rm -f "${body_file}" + continue + fi + if [[ -n "${body_must_not}" ]] && grep -Fq -- "${body_must_not}" "${body_file}"; then + warn "startup-check ${name}: forbidden body fragment '${body_must_not}' present" + failures=1 + rm -f "${body_file}" + continue + fi + rm -f "${body_file}" + done <<< "${rows}" + (( failures == 0 )) +} + +wait_for_startup_service_checklist() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: skipping startup external service checklist wait" + return 0 + fi + local start now checklist_ok workloads_ok + start="$(date +%s)" + while true; do + checklist_ok=0 + workloads_ok=0 + if check_startup_service_checklist_once; then + checklist_ok=1 + fi + if list_unhealthy_workloads | sed '/^[[:space:]]*$/d' | grep -q .; then + workloads_ok=0 + else + workloads_ok=1 + fi + if (( checklist_ok == 1 && workloads_ok == 1 )); then + log "startup-checklist=all-passed" + return 0 + fi + if (( workloads_ok == 0 )); then + warn "startup-checklist: workloads are not fully ready yet." + fi + now="$(date +%s)" + if (( now - start >= STARTUP_CHECKLIST_TIMEOUT_SECONDS )); then + die "Timed out waiting for startup external checklist after ${STARTUP_CHECKLIST_TIMEOUT_SECONDS}s." + fi + sleep "${STARTUP_CHECKLIST_POLL_SECONDS}" + done +} + +collect_unstable_pods() { + local rows + rows="$(kubectl get pods -A --no-headers 2>/dev/null \ + | awk '$4 ~ /(CrashLoopBackOff|ImagePullBackOff|ErrImagePull|CreateContainerConfigError|RunContainerError|InvalidImageName)/ {print $1 "/" $2 "|" $4}' || true)" + if [[ -n "${STARTUP_IGNORE_PODS_REGEX}" ]]; then + rows="$(printf '%s\n' "${rows}" | grep -Ev "${STARTUP_IGNORE_PODS_REGEX}" || true)" + fi + printf '%s' "${rows}" +} + +wait_for_startup_stability_window() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: skipping startup stability window" + return 0 + fi + local hard_deadline stable_since now unstable pods not_ready unhealthy_workloads + stable_since="$(date +%s)" + hard_deadline=$(( stable_since + STARTUP_STABILITY_TIMEOUT_SECONDS )) + while true; do + unstable=0 + not_ready="$(list_not_ready_kustomizations || true)" + if [[ -n "${not_ready}" ]]; then + unstable=1 + warn "stability-window: Flux kustomizations not ready." + fi + pods="$(collect_unstable_pods || true)" + if [[ -n "${pods}" ]]; then + unstable=1 + warn "stability-window: unstable pods detected." + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + warn " ${line}" + done <<< "${pods}" + fi + if ! check_startup_service_checklist_once; then + unstable=1 + warn "stability-window: external service checklist failed." + fi + unhealthy_workloads="$(list_unhealthy_workloads || true)" + if [[ -n "${unhealthy_workloads}" ]]; then + unstable=1 + warn "stability-window: workloads not fully ready." + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + warn " ${line}" + done <<< "${unhealthy_workloads}" + fi + + now="$(date +%s)" + if (( unstable == 0 )); then + if (( now - stable_since >= STARTUP_STABILITY_WINDOW_SECONDS )); then + log "startup-stability-window=passed (${STARTUP_STABILITY_WINDOW_SECONDS}s)" + return 0 + fi + else + stable_since="${now}" + fi + + if (( now >= hard_deadline )); then + die "Timed out waiting for startup stability window (${STARTUP_STABILITY_WINDOW_SECONDS}s healthy) within ${STARTUP_STABILITY_TIMEOUT_SECONDS}s." + fi + sleep "${STARTUP_STABILITY_POLL_SECONDS}" + done +} + wait_for_api() { local attempts=$(( API_WAIT_TIMEOUT_SECONDS / 5 )) if (( attempts < 1 )); then @@ -410,13 +842,22 @@ patch_flux_suspend_all() { done <<< "${hr_list}" } +shutdown_namespace_excluded() { + local ns="$1" + [[ "${ns}" =~ ${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX} ]] +} + +startup_workload_namespace_excluded() { + local ns="$1" + [[ "${ns}" =~ ${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX} ]] +} + best_effort_scale_down_apps() { - local excludes='^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$' - local ns_list + local ns_list ns ns_list="$(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')" while IFS= read -r ns; do [[ -z "${ns}" ]] && continue - if [[ "${ns}" =~ ${excludes} ]]; then + if shutdown_namespace_excluded "${ns}"; then continue fi run_shell "kubectl -n ${ns} scale deployment --all --replicas=0 || true" @@ -424,14 +865,186 @@ best_effort_scale_down_apps() { done <<< "${ns_list}" } +save_workload_replica_snapshot() { + local rows line ns kind name replicas + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: save workload replica snapshot to ${REPLICA_SNAPSHOT_FILE}" + return 0 + fi + rows="$( + { + kubectl get deployment -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tdeployment\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true + kubectl get statefulset -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tstatefulset\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true + } | sed '/^[[:space:]]*$/d' + )" + mkdir -p "$(dirname "${REPLICA_SNAPSHOT_FILE}")" + : > "${REPLICA_SNAPSHOT_FILE}" + while IFS=$'\t' read -r ns kind name replicas; do + [[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${replicas}" ]] || continue + shutdown_namespace_excluded "${ns}" && continue + [[ "${replicas}" =~ ^[0-9]+$ ]] || continue + (( replicas > 0 )) || continue + printf '%s\t%s\t%s\t%s\n' "${ns}" "${kind}" "${name}" "${replicas}" >> "${REPLICA_SNAPSHOT_FILE}" + done <<< "${rows}" + log "replica-snapshot-file=${REPLICA_SNAPSHOT_FILE}" + log "replica-snapshot-count=$(wc -l < "${REPLICA_SNAPSHOT_FILE}" | tr -d ' ')" +} + +restore_workload_replica_snapshot() { + local ns kind name desired current + if [[ "${RECOVERY_PENDING}" -ne 1 ]]; then + log "Skipping replica restore because recovery_pending=0." + return 0 + fi + if [[ ! -f "${REPLICA_SNAPSHOT_FILE}" ]]; then + warn "Replica snapshot file not found at ${REPLICA_SNAPSHOT_FILE}; skipping replica restore." + return 0 + fi + while IFS=$'\t' read -r ns kind name desired; do + [[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue + [[ "${desired}" =~ ^[0-9]+$ ]] || continue + (( desired > 0 )) || continue + current="$(kubectl -n "${ns}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" + [[ -n "${current}" ]] || continue + [[ "${current}" =~ ^[0-9]+$ ]] || current=0 + if (( current == desired )); then + continue + fi + run kubectl -n "${ns}" scale "${kind}" "${name}" --replicas="${desired}" + done < "${REPLICA_SNAPSHOT_FILE}" + mark_checkpoint startup_replicas_restored +} + +list_unhealthy_workloads() { + local rows line ns name desired ready available + rows="$(kubectl get deployment -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas,AVAILABLE:.status.availableReplicas --no-headers 2>/dev/null || true)" + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + ns="$(awk '{print $1}' <<< "${line}")" + name="$(awk '{print $2}' <<< "${line}")" + desired="$(awk '{print $3}' <<< "${line}")" + ready="$(awk '{print $4}' <<< "${line}")" + available="$(awk '{print $5}' <<< "${line}")" + startup_workload_namespace_excluded "${ns}" && continue + [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue + [[ "${desired}" =~ ^[0-9]+$ ]] || desired=0 + [[ "${ready}" =~ ^[0-9]+$ ]] || ready=0 + [[ "${available}" =~ ^[0-9]+$ ]] || available=0 + (( desired > 0 )) || continue + if (( ready < desired || available < desired )); then + printf '%s/deployment/%s|ready=%s available=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${available}" "${desired}" + fi + done <<< "${rows}" + + rows="$(kubectl get statefulset -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas --no-headers 2>/dev/null || true)" + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + ns="$(awk '{print $1}' <<< "${line}")" + name="$(awk '{print $2}' <<< "${line}")" + desired="$(awk '{print $3}' <<< "${line}")" + ready="$(awk '{print $4}' <<< "${line}")" + startup_workload_namespace_excluded "${ns}" && continue + [[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue + [[ "${desired}" =~ ^[0-9]+$ ]] || desired=0 + [[ "${ready}" =~ ^[0-9]+$ ]] || ready=0 + (( desired > 0 )) || continue + if (( ready < desired )); then + printf '%s/statefulset/%s|ready=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${desired}" + fi + done <<< "${rows}" +} + +wait_for_startup_workloads_ready() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: skipping startup workload readiness checks" + return 0 + fi + local start now unhealthy + start="$(date +%s)" + while true; do + unhealthy="$(list_unhealthy_workloads || true)" + if [[ -z "${unhealthy}" ]]; then + log "startup-workloads=all-ready" + return 0 + fi + warn "startup-workloads-not-ready:" + while IFS= read -r line; do + [[ -n "${line}" ]] || continue + warn " ${line}" + done <<< "${unhealthy}" + now="$(date +%s)" + if (( now - start >= STARTUP_WORKLOAD_TIMEOUT_SECONDS )); then + die "Timed out waiting for startup workloads Ready after ${STARTUP_WORKLOAD_TIMEOUT_SECONDS}s." + fi + sleep "${STARTUP_WORKLOAD_POLL_SECONDS}" + done +} + discover_workers_csv() { kubectl get nodes \ - -o custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\\.kubernetes\\.io/control-plane,MASTER:.metadata.labels.node-role\\.kubernetes\\.io/master \ + -o 'custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\.kubernetes\.io/control-plane,MASTER:.metadata.labels.node-role\.kubernetes\.io/master,READY:.status.conditions[?(@.type=="Ready")].status' \ --no-headers \ - | awk '$2=="" && $3=="" {print $1}' \ + | awk '$2=="" && $3=="" && $4=="True" {print $1}' \ | paste -sd, - } +node_is_ready() { + local node="$1" + [[ -n "${node}" ]] || return 1 + local ready + ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)" + [[ "${ready}" == "True" ]] +} + +select_ready_arm64_worker() { + local rows node + rows="$(kubectl get nodes -o 'custom-columns=NAME:.metadata.name,ARCH:.metadata.labels.kubernetes\.io/arch,WORKER:.metadata.labels.node-role\.kubernetes\.io/worker,HARDWARE:.metadata.labels.hardware,READY:.status.conditions[?(@.type=="Ready")].status' --no-headers 2>/dev/null || true)" + [[ -n "${rows}" ]] || return 1 + node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $4=="rpi5" && $5=="True" {print $1; exit}')" + if [[ -n "${node}" ]]; then + printf '%s' "${node}" + return 0 + fi + node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $4=="rpi4" && $5=="True" {print $1; exit}')" + if [[ -n "${node}" ]]; then + printf '%s' "${node}" + return 0 + fi + node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $5=="True" {print $1; exit}')" + if [[ -n "${node}" ]]; then + printf '%s' "${node}" + return 0 + fi + return 1 +} + +ensure_harbor_target_node() { + if node_is_ready "${HARBOR_TARGET_NODE}"; then + return 0 + fi + local fallback + fallback="$(select_ready_arm64_worker || true)" + [[ -n "${fallback}" ]] || die "No Ready arm64 worker available for Harbor bootstrap target." + if [[ -n "${HARBOR_TARGET_NODE}" ]]; then + warn "Configured harbor target node '${HARBOR_TARGET_NODE}' is not Ready; using '${fallback}' instead." + else + log "harbor-target-node auto-selected: ${fallback}" + fi + HARBOR_TARGET_NODE="${fallback}" +} + +ensure_harbor_host_label() { + [[ -n "${HARBOR_TARGET_NODE}" ]] || die "Harbor target node is not set." + local labeled node + labeled="$(kubectl get nodes -l "${HARBOR_HOST_LABEL_KEY}=true" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)" + while IFS= read -r node; do + [[ -z "${node}" ]] && continue + [[ "${node}" == "${HARBOR_TARGET_NODE}" ]] && continue + run kubectl label node "${node}" "${HARBOR_HOST_LABEL_KEY}-" + done <<< "${labeled}" + run kubectl label node "${HARBOR_TARGET_NODE}" "${HARBOR_HOST_LABEL_KEY}=true" --overwrite +} + as_array_from_csv() { local csv="$1" local out_var="$2" @@ -557,9 +1170,18 @@ harbor_is_ready() { } run_harbor_pull_canary() { - local pod="hecate-harbor-canary" + local pod="ananke-harbor-canary" + local canary_node="${HARBOR_CANARY_NODE}" + if ! node_is_ready "${canary_node}"; then + ensure_harbor_target_node + canary_node="${HARBOR_TARGET_NODE}" + if [[ -n "${HARBOR_CANARY_NODE}" ]]; then + warn "Configured harbor canary node '${HARBOR_CANARY_NODE}' is not Ready; using '${canary_node}'." + fi + HARBOR_CANARY_NODE="${canary_node}" + fi if [[ "${EXECUTE}" -eq 0 ]]; then - log "DRY-RUN: create Harbor pull canary pod with ${HARBOR_CANARY_IMAGE} on ${HARBOR_CANARY_NODE}" + log "DRY-RUN: create Harbor pull canary pod with ${HARBOR_CANARY_IMAGE} on ${canary_node}" return 0 fi timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true @@ -570,7 +1192,7 @@ metadata: name: ${pod} namespace: ${NODE_HELPER_NAMESPACE} spec: - nodeName: ${HARBOR_CANARY_NODE} + nodeName: ${canary_node} restartPolicy: Never imagePullSecrets: - name: ${REGISTRY_PULL_SECRET} @@ -597,7 +1219,7 @@ run_helper_pod() { local purpose="$2" local timeout_seconds="$3" local script_content="$4" - local pod="hecate-$(sanitize_name "${purpose}")-$(date +%H%M%S)" + local pod="ananke-$(sanitize_name "${purpose}")-$(date +%H%M%S)" local encoded_script encoded_script="$(printf '%s' "${script_content}" | base64 -w0)" @@ -631,9 +1253,9 @@ spec: command: ["/bin/bash", "-ceu"] args: - | - printf '%s' '${encoded_script}' | base64 -d >/tmp/hecate-step.sh - chmod +x /tmp/hecate-step.sh - /tmp/hecate-step.sh + printf '%s' '${encoded_script}' | base64 -d >/tmp/ananke-step.sh + chmod +x /tmp/ananke-step.sh + /tmp/ananke-step.sh POD if ! wait_for_pod_phase "${NODE_HELPER_NAMESPACE}" "${pod}" Succeeded "${timeout_seconds}"; then @@ -663,21 +1285,65 @@ SCRIPT run_helper_pod "${node}" "${purpose}" "${timeout_seconds}" "${script_content}" } +run_host_command_via_prewarm_pod() { + local node="$1" + local host_command="$2" + local pod encoded_command + pod="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app="${NODE_HELPER_PREWARM_DS}" --field-selector "spec.nodeName=${node}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" + if [[ -z "${pod}" ]]; then + return 1 + fi + encoded_command="$(printf '%s' "${host_command}" | base64 -w0)" + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: helper exec via ${pod} on ${node}" + return 0 + fi + run kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/bash -ceu "HOST_COMMAND=\$(printf '%s' '${encoded_command}' | base64 -d); nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu \"\${HOST_COMMAND}\"" +} + schedule_host_shutdown_via_helper() { local node="$1" local service_name="$2" local delay_seconds="$3" local host_command - host_command="/usr/bin/systemd-run --unit hecate-shutdown-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true; /usr/bin/systemctl poweroff || true'" + host_command="/usr/bin/systemd-run --unit ananke-shutdown-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true; /usr/bin/systemctl poweroff || true'" + if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then + return 0 + fi run_host_command_via_helper "${node}" "shutdown-${node}-${service_name}" 120 "${host_command}" } +schedule_host_service_stop_via_helper() { + local node="$1" + local service_name="$2" + local delay_seconds="$3" + local host_command + host_command="/usr/bin/systemd-run --unit ananke-stop-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true'" + if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then + return 0 + fi + run_host_command_via_helper "${node}" "stop-${node}-${service_name}" 120 "${host_command}" +} + prewarm_node_helper_image() { - local name="hecate-node-helper-prewarm" + local name="${NODE_HELPER_PREWARM_DS}" + local ready_nodes node + local node_affinity_block="" if [[ "${EXECUTE}" -eq 0 ]]; then log "DRY-RUN: prewarm ${NODE_HELPER_IMAGE} via temporary DaemonSet" return 0 fi + ready_nodes="$(kubectl get nodes -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status' --no-headers 2>/dev/null | awk '$2=="True" {print $1}' || true)" + if [[ -n "${ready_nodes}" ]]; then + node_affinity_block=$' affinity:\n nodeAffinity:\n requiredDuringSchedulingIgnoredDuringExecution:\n nodeSelectorTerms:\n - matchExpressions:\n - key: kubernetes.io/hostname\n operator: In\n values:' + while IFS= read -r node; do + [[ -z "${node}" ]] && continue + node_affinity_block+=$'\n'" - ${node}" + done <<< "${ready_nodes}" + log "node-helper-prewarm-targets=$(printf '%s' "${ready_nodes}" | paste -sd, -)" + else + warn "Unable to detect Ready nodes for prewarm targeting; continuing without node affinity." + fi cat </dev/null 2>&1 || true + if [[ "${KEEP_PREWARM_DAEMONSET}" -eq 0 ]]; then + kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${name}" --ignore-not-found >/dev/null 2>&1 || true + else + log "Keeping ${name} DaemonSet running for shutdown helper exec path." + fi return 0 fi sleep 2 @@ -722,6 +1393,14 @@ DS die "Timed out prewarming node helper image ${NODE_HELPER_IMAGE}" } +cleanup_prewarm_daemonset() { + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: cleanup ${NODE_HELPER_PREWARM_DS} DaemonSet" + return 0 + fi + kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${NODE_HELPER_PREWARM_DS}" --ignore-not-found >/dev/null 2>&1 || true +} + start_bundle_server() { [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}" require_cmd python3 @@ -732,7 +1411,7 @@ start_bundle_server() { log "DRY-RUN: serve ${bundle_name} from ${bundle_dir} on port ${BUNDLE_HTTP_PORT}" return 0 fi - python3 -m http.server "${BUNDLE_HTTP_PORT}" --bind 0.0.0.0 --directory "${bundle_dir}" /tmp/hecate-bundle-server.log 2>&1 & + python3 -m http.server "${BUNDLE_HTTP_PORT}" --bind 0.0.0.0 --directory "${bundle_dir}" /tmp/ananke-bundle-server.log 2>&1 & BUNDLE_SERVER_PID=$! for _ in $(seq 1 20); do if curl -fsS "http://127.0.0.1:${BUNDLE_HTTP_PORT}/${bundle_name}" >/dev/null 2>&1; then @@ -740,7 +1419,7 @@ start_bundle_server() { fi sleep 1 done - die "Temporary bundle server did not become ready; see /tmp/hecate-bundle-server.log" + die "Temporary bundle server did not become ready; see /tmp/ananke-bundle-server.log" } stop_bundle_server() { @@ -762,6 +1441,8 @@ control_host_ip() { seed_harbor_images() { local images_text control_ip bundle_name script_content seed_rc=0 [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}" + ensure_harbor_target_node + ensure_harbor_host_label images_text="$(sed '/^[[:space:]]*#/d;/^[[:space:]]*$/d' "${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt")" [[ -n "${images_text}" ]] || die "No Harbor images listed in ${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt" bundle_name="$(basename "${HARBOR_BUNDLE_FILE}")" @@ -803,6 +1484,34 @@ bootstrap_local_harbor() { apply_kustomization services/harbor } +reconcile_kustomization_with_self_heal() { + local item="$1" + if [[ "${EXECUTE}" -eq 0 ]]; then + run flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m + return 0 + fi + local attempt output rc + for attempt in 1 2; do + set +e + output="$(flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m 2>&1)" + rc=$? + set -e + if (( rc == 0 )); then + [[ -n "${output}" ]] && printf '%s\n' "${output}" + return 0 + fi + [[ -n "${output}" ]] && printf '%s\n' "${output}" >&2 + if (( attempt == 1 )) && grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${output}"; then + warn "Flux reconcile for '${item}' failed due immutable Job/template signal. Attempting self-heal." + heal_failed_flux_jobs || true + trigger_flux_reconcile_all || true + sleep 5 + continue + fi + return "${rc}" + done +} + reconcile_stage() { local stage_name="$1" shift @@ -814,7 +1523,7 @@ reconcile_stage() { fi local item for item in "$@"; do - run flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m + reconcile_kustomization_with_self_heal "${item}" done mark_checkpoint "reconciled_${stage_name}" } @@ -838,23 +1547,59 @@ resume_flux_and_reconcile() { } status_report() { - local battery flux_ready harbor_code workers + local battery flux_ready flux_url flux_branch flux_url_drift flux_branch_drift harbor_code workers ingress_hosts_count + local effective_target effective_canary + local labeled_nodes battery="$(read_ups_battery || true)" flux_ready="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)" + flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)" + flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)" + flux_url_drift=false + flux_branch_drift=false + if [[ -n "${EXPECTED_FLUX_URL}" && -n "${flux_url}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then + flux_url_drift=true + fi + if [[ -n "${EXPECTED_FLUX_BRANCH}" && -n "${flux_branch}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then + flux_branch_drift=true + fi + ingress_hosts_count="$(list_ingress_hosts | sed '/^[[:space:]]*$/d' | wc -l | tr -d ' ')" harbor_code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)" workers="$(discover_workers_csv 2>/dev/null || true)" + effective_target="${HARBOR_TARGET_NODE}" + if ! node_is_ready "${effective_target}"; then + effective_target="$(select_ready_arm64_worker || true)" + fi + effective_canary="${HARBOR_CANARY_NODE}" + if ! node_is_ready "${effective_canary}"; then + effective_canary="${effective_target}" + fi echo "mode=status" + echo "shutdown_mode=${SHUTDOWN_MODE}" echo "bundle_file=${HARBOR_BUNDLE_FILE}" echo "bundle_present=$([[ -f "${HARBOR_BUNDLE_FILE}" ]] && echo true || echo false)" + echo "replica_snapshot_file=${REPLICA_SNAPSHOT_FILE}" + echo "replica_snapshot_present=$([[ -f "${REPLICA_SNAPSHOT_FILE}" ]] && echo true || echo false)" echo "node_helper_image=${NODE_HELPER_IMAGE}" - echo "harbor_target_node=${HARBOR_TARGET_NODE}" + echo "harbor_target_node=${effective_target:-unknown}" + echo "harbor_canary_node=${effective_canary:-unknown}" + labeled_nodes="$(kubectl get nodes -l "${HARBOR_HOST_LABEL_KEY}=true" -o jsonpath='{range .items[*]}{.metadata.name}{","}{end}' 2>/dev/null || true)" + labeled_nodes="${labeled_nodes%,}" + echo "harbor_host_label_key=${HARBOR_HOST_LABEL_KEY}" + echo "harbor_host_label_nodes=${labeled_nodes:-none}" echo "workers=${workers}" echo "recovery_pending=${RECOVERY_PENDING}" echo "startup_attempted=${STARTUP_ATTEMPTED_DURING_OUTAGE}" echo "last_checkpoint=${LAST_CHECKPOINT}" echo "ups_host=${UPS_HOST_IN_USE:-${UPS_HOST}}" echo "ups_battery=${battery:-unknown}" + echo "flux_source_expected_url=${EXPECTED_FLUX_URL}" + echo "flux_source_expected_branch=${EXPECTED_FLUX_BRANCH}" + echo "flux_source_actual_url=${flux_url:-unknown}" + echo "flux_source_actual_branch=${flux_branch:-unknown}" + echo "flux_source_url_drift=${flux_url_drift}" + echo "flux_source_branch_drift=${flux_branch_drift}" echo "flux_source_ready=${flux_ready:-unknown}" + echo "ingress_hosts_count=${ingress_hosts_count}" echo "harbor_http=${harbor_code:-unknown}" kubectl get ingressclass traefik >/dev/null 2>&1 && echo "traefik_ingressclass=true" || echo "traefik_ingressclass=false" kubectl -n traefik get deploy traefik >/dev/null 2>&1 && echo "traefik_deploy=true" || echo "traefik_deploy=false" @@ -876,6 +1621,7 @@ planned_shutdown() { save_recovery_state 1 0 shutdown_started if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then + KEEP_PREWARM_DAEMONSET=1 prewarm_node_helper_image mark_checkpoint shutdown_helper_prewarmed fi @@ -889,6 +1635,9 @@ planned_shutdown() { warn "Skipping etcd snapshot by request." fi + save_workload_replica_snapshot + mark_checkpoint shutdown_replicas_snapshot + patch_flux_suspend_all true best_effort_scale_down_apps mark_checkpoint shutdown_apps_scaled_down @@ -901,18 +1650,39 @@ planned_shutdown() { fi local node + if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then + warn "shutdown-mode=cluster-only: stopping k3s services only; host poweroff is disabled." + else + log "shutdown-mode=host-poweroff: scheduling host poweroff after service stop." + fi + for node in "${WORKER_NODES[@]}"; do [[ -z "${node}" ]] && continue - schedule_host_shutdown_via_helper "${node}" k3s-agent 20 + if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then + schedule_host_service_stop_via_helper "${node}" k3s-agent 20 + else + schedule_host_shutdown_via_helper "${node}" k3s-agent 20 + fi done mark_checkpoint shutdown_workers_scheduled for node in "${CONTROL_PLANE_NODES[@]}"; do [[ -z "${node}" ]] && continue - schedule_host_shutdown_via_helper "${node}" k3s 45 + if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then + schedule_host_service_stop_via_helper "${node}" k3s 45 + else + schedule_host_shutdown_via_helper "${node}" k3s 45 + fi done + if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then + cleanup_prewarm_daemonset + fi mark_checkpoint shutdown_control_planes_scheduled - log "Shutdown actions scheduled on hosts." + if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then + log "Cluster-only shutdown actions scheduled (hosts remain powered on)." + else + log "Shutdown + host poweroff actions scheduled on hosts." + fi } emergency_shutdown_after_outage() { @@ -946,11 +1716,23 @@ startup_flow() { fi mark_checkpoint startup_api_ready + ensure_harbor_target_node + ensure_harbor_host_label + mark_checkpoint startup_harbor_host_labeled + + if [[ -n "${FORCE_FLUX_URL}" ]]; then + warn "Breakglass: forcing Flux source URL to '${FORCE_FLUX_URL}'." + run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"url\":\"${FORCE_FLUX_URL}\"}}" + mark_checkpoint startup_flux_url_forced + fi + if [[ -n "${FORCE_FLUX_BRANCH}" ]]; then run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"ref\":{\"branch\":\"${FORCE_FLUX_BRANCH}\"}}}" mark_checkpoint startup_flux_branch_forced fi + assert_flux_source_expected + if [[ "${SKIP_LOCAL_BOOTSTRAP}" -eq 0 ]]; then if ! kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q True; then warn "Flux source not Ready; executing local bootstrap fallback path." @@ -988,6 +1770,11 @@ startup_flow() { fi resume_flux_and_reconcile + wait_for_flux_kustomizations_ready + restore_workload_replica_snapshot + wait_for_startup_workloads_ready + wait_for_startup_service_checklist + wait_for_startup_stability_window if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then prewarm_node_helper_image mark_checkpoint startup_helper_prewarmed @@ -998,6 +1785,9 @@ startup_flow() { prepare_flow() { [[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first." + ensure_harbor_target_node + ensure_harbor_host_label + mark_checkpoint prepare_harbor_host_labeled if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then prewarm_node_helper_image mark_checkpoint prepare_helper_prewarmed @@ -1019,9 +1809,16 @@ harbor_seed_flow() { load_recovery_state log "mode=${MODE} execute=${EXECUTE}" +log "shutdown-mode=${SHUTDOWN_MODE}" log "recovery-state-file=${RECOVERY_STATE_FILE}" log "bundle-file=${HARBOR_BUNDLE_FILE}" log "node-helper-image=${NODE_HELPER_IMAGE}" +log "harbor-target-node-config=${HARBOR_TARGET_NODE:-auto}" +log "harbor-canary-node-config=${HARBOR_CANARY_NODE:-auto}" +log "harbor-host-label-key=${HARBOR_HOST_LABEL_KEY}" +log "expected-flux-url=${EXPECTED_FLUX_URL}" +log "expected-flux-branch=${EXPECTED_FLUX_BRANCH}" +log "startup-optional-kustomizations=${STARTUP_OPTIONAL_KUSTOMIZATIONS:-none}" report_flux_source_state case "${MODE}" in