titan-iac/scripts/cluster_power_recovery.sh

1044 lines
34 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="${HECATE_REPO_DIR:-$(cd "${SCRIPT_DIR}/.." && pwd)}"
BOOTSTRAP_DIR="${SCRIPT_DIR}/bootstrap"
CONFIG_FILE="${BOOTSTRAP_DIR}/recovery-config.env"
if [[ -f "${CONFIG_FILE}" ]]; then
# shellcheck disable=SC1090
source "${CONFIG_FILE}"
fi
if [[ -z "${KUBECONFIG:-}" && -f "${SCRIPT_DIR}/kubeconfig" ]]; then
export KUBECONFIG="${SCRIPT_DIR}/kubeconfig"
fi
usage() {
cat <<USAGE
Usage:
scripts/cluster_power_recovery.sh <prepare|status|harbor-seed|shutdown|startup> [options]
Options:
--execute Actually run commands (default is dry-run)
--expected-flux-branch <name> Expected Flux source branch during startup checks (default: ${DEFAULT_FLUX_BRANCH:-main})
--force-flux-branch <name> Startup: patch flux-system GitRepository branch to this value
--skip-etcd-snapshot Shutdown: skip etcd snapshot before shutdown
--skip-drain Shutdown: skip worker drain during shutdown
--skip-local-bootstrap Startup: skip local bootstrap fallback applies
--skip-harbor-bootstrap Startup: skip Harbor recovery bootstrap stage
--skip-harbor-seed Startup: skip Harbor image seed/import stage
--skip-helper-prewarm Prepare/Shutdown/Startup: skip node-helper prewarm
--min-startup-battery <pct> Minimum UPS percent required before bootstrap (default: 35)
--ups-host <name> UPS identifier for upsc (default: ups@localhost)
--ups-battery-key <key> UPS battery key for upsc (default: battery.charge)
--recovery-state-file <path> Recovery state file for outage-aware restart logic
--harbor-bundle-file <path> Harbor bootstrap bundle on the control host
--harbor-target-node <name> Node that should host Harbor during bootstrap (default: ${HARBOR_TARGET_NODE:-titan-05})
--harbor-canary-image <image> Harbor-backed image used for pull canary (default: ${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0})
--node-helper-image <image> Privileged helper image used for host operations (default: ${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/hecate-node-helper:0.1.0})
--bundle-http-port <port> Temporary HTTP port used to serve bootstrap bundles (default: ${BUNDLE_HTTP_PORT:-8877})
--api-wait-timeout <seconds> Startup: Kubernetes API wait timeout (default: 600)
--drain-timeout <seconds> Worker drain timeout for normal shutdown (default: 180)
--emergency-drain-timeout <seconds>
Worker drain timeout for emergency fallback (default: 45)
--require-ups-battery Hard-fail startup if UPS battery cannot be read
-h, --help Show help
Examples:
scripts/cluster_power_recovery.sh prepare --execute
scripts/cluster_power_recovery.sh harbor-seed --execute
scripts/cluster_power_recovery.sh status
scripts/cluster_power_recovery.sh shutdown --execute
scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main
USAGE
}
MODE="${1:-}"
if [[ -z "${MODE}" || "${MODE}" == "-h" || "${MODE}" == "--help" ]]; then
usage
exit 0
fi
shift || true
case "${MODE}" in
prepare|status|harbor-seed|shutdown|startup) ;;
*)
echo "Unknown mode: ${MODE}" >&2
usage
exit 1
;;
esac
EXECUTE=0
EXPECTED_FLUX_BRANCH="${DEFAULT_FLUX_BRANCH:-main}"
FORCE_FLUX_BRANCH=""
SKIP_ETCD_SNAPSHOT=0
SKIP_DRAIN=0
SKIP_LOCAL_BOOTSTRAP=0
SKIP_HARBOR_BOOTSTRAP=0
SKIP_HARBOR_SEED=0
SKIP_HELPER_PREWARM=0
UPS_HOST="${UPS_HOST:-ups@localhost}"
UPS_BATTERY_KEY="${UPS_BATTERY_KEY:-battery.charge}"
MIN_STARTUP_BATTERY="${MIN_STARTUP_BATTERY:-35}"
REQUIRE_UPS_BATTERY="${REQUIRE_UPS_BATTERY:-0}"
DRAIN_TIMEOUT_SECONDS=180
EMERGENCY_DRAIN_TIMEOUT_SECONDS=45
API_WAIT_TIMEOUT_SECONDS=600
BUNDLE_HTTP_PORT="${BUNDLE_HTTP_PORT:-8877}"
STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/hecate}"
RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state"
HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}"
HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-titan-05}"
HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-titan-04}"
HARBOR_CANARY_IMAGE="${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0}"
NODE_HELPER_IMAGE="${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/hecate-node-helper:0.1.0}"
NODE_HELPER_NAMESPACE="${NODE_HELPER_NAMESPACE:-maintenance}"
NODE_HELPER_SERVICE_ACCOUNT="${NODE_HELPER_SERVICE_ACCOUNT:-default}"
REGISTRY_PULL_SECRET="${REGISTRY_PULL_SECRET:-harbor-regcred}"
RECOVERY_PENDING=0
STARTUP_ATTEMPTED_DURING_OUTAGE=0
LAST_CHECKPOINT="none"
BUNDLE_SERVER_PID=""
UPS_HOST_IN_USE=""
while [[ $# -gt 0 ]]; do
case "$1" in
--execute)
EXECUTE=1
shift
;;
--expected-flux-branch)
EXPECTED_FLUX_BRANCH="${2:?missing branch}"
shift 2
;;
--force-flux-branch)
FORCE_FLUX_BRANCH="${2:?missing branch}"
shift 2
;;
--skip-etcd-snapshot)
SKIP_ETCD_SNAPSHOT=1
shift
;;
--skip-drain)
SKIP_DRAIN=1
shift
;;
--skip-local-bootstrap)
SKIP_LOCAL_BOOTSTRAP=1
shift
;;
--skip-harbor-bootstrap)
SKIP_HARBOR_BOOTSTRAP=1
shift
;;
--skip-harbor-seed)
SKIP_HARBOR_SEED=1
shift
;;
--skip-helper-prewarm)
SKIP_HELPER_PREWARM=1
shift
;;
--ups-host)
UPS_HOST="${2:?missing ups host}"
shift 2
;;
--ups-battery-key)
UPS_BATTERY_KEY="${2:?missing ups key}"
shift 2
;;
--min-startup-battery)
MIN_STARTUP_BATTERY="${2:?missing battery threshold}"
shift 2
;;
--require-ups-battery)
REQUIRE_UPS_BATTERY=1
shift
;;
--recovery-state-file)
RECOVERY_STATE_FILE="${2:?missing state file path}"
shift 2
;;
--harbor-bundle-file)
HARBOR_BUNDLE_FILE="${2:?missing bundle file path}"
shift 2
;;
--harbor-target-node)
HARBOR_TARGET_NODE="${2:?missing harbor target node}"
shift 2
;;
--harbor-canary-image)
HARBOR_CANARY_IMAGE="${2:?missing canary image}"
shift 2
;;
--node-helper-image)
NODE_HELPER_IMAGE="${2:?missing node helper image}"
shift 2
;;
--bundle-http-port)
BUNDLE_HTTP_PORT="${2:?missing bundle http port}"
shift 2
;;
--api-wait-timeout)
API_WAIT_TIMEOUT_SECONDS="${2:?missing api wait timeout}"
shift 2
;;
--drain-timeout)
DRAIN_TIMEOUT_SECONDS="${2:?missing drain timeout}"
shift 2
;;
--emergency-drain-timeout)
EMERGENCY_DRAIN_TIMEOUT_SECONDS="${2:?missing emergency drain timeout}"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown option: $1" >&2
usage
exit 1
;;
esac
done
require_cmd() {
local cmd="$1"
if ! command -v "${cmd}" >/dev/null 2>&1; then
echo "Missing required command: ${cmd}" >&2
exit 1
fi
}
require_cmd kubectl
require_cmd bash
require_cmd base64
require_cmd curl
log() { echo "[cluster-power] $*"; }
warn() { echo "[cluster-power][warn] $*" >&2; }
die() { echo "[cluster-power][error] $*" >&2; exit 1; }
run() {
if [[ "${EXECUTE}" -eq 1 ]]; then
log "EXEC: $*"
"$@"
else
log "DRY-RUN: $*"
fi
}
run_shell() {
if [[ "${EXECUTE}" -eq 1 ]]; then
log "EXEC: $*"
bash -lc "$*"
else
log "DRY-RUN: $*"
fi
}
apply_kustomization() {
local path="$1"
local full_path="${REPO_DIR}/${path}"
if [[ "${EXECUTE}" -eq 1 ]]; then
log "EXEC: kubectl kustomize ${full_path} --load-restrictor=LoadRestrictionsNone | kubectl apply -f -"
kubectl kustomize "${full_path}" --load-restrictor=LoadRestrictionsNone | kubectl apply -f -
else
log "DRY-RUN: kubectl kustomize ${full_path} --load-restrictor=LoadRestrictionsNone | kubectl apply -f -"
fi
}
sanitize_name() {
printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9-' '-'
}
state_dir() {
dirname "${RECOVERY_STATE_FILE}"
}
load_recovery_state() {
RECOVERY_PENDING=0
STARTUP_ATTEMPTED_DURING_OUTAGE=0
LAST_CHECKPOINT="none"
[[ -f "${RECOVERY_STATE_FILE}" ]] || return 0
while IFS='=' read -r key value; do
case "${key}" in
recovery_pending) RECOVERY_PENDING="${value}" ;;
startup_attempted) STARTUP_ATTEMPTED_DURING_OUTAGE="${value}" ;;
last_checkpoint) LAST_CHECKPOINT="${value}" ;;
esac
done < "${RECOVERY_STATE_FILE}"
}
save_recovery_state() {
[[ "${EXECUTE}" -eq 1 ]] || return 0
mkdir -p "$(state_dir)"
cat > "${RECOVERY_STATE_FILE}" <<STATE
recovery_pending=${1}
startup_attempted=${2}
last_checkpoint=${3}
STATE
}
mark_checkpoint() {
LAST_CHECKPOINT="$1"
save_recovery_state "${RECOVERY_PENDING}" "${STARTUP_ATTEMPTED_DURING_OUTAGE}" "${LAST_CHECKPOINT}"
}
clear_recovery_state() {
[[ "${EXECUTE}" -eq 1 ]] || return 0
rm -f "${RECOVERY_STATE_FILE}" 2>/dev/null || true
LAST_CHECKPOINT="none"
}
sanitize_battery_percent() {
local raw="$1"
raw="${raw##*:}"
raw="${raw//[[:space:]]/}"
raw="${raw%%.*}"
[[ "${raw}" =~ ^[0-9]+$ ]] || return 1
printf '%s' "${raw}"
}
candidate_ups_hosts() {
local candidate name
local -A seen=()
if [[ -n "${UPS_HOST}" ]]; then
seen["${UPS_HOST}"]=1
echo "${UPS_HOST}"
fi
while IFS= read -r name; do
[[ -n "${name}" ]] || continue
for candidate in "${name}@localhost" "${name}"; do
[[ -n "${seen[${candidate}]+x}" ]] && continue
seen["${candidate}"]=1
echo "${candidate}"
done
done < <(upsc -l 2>/dev/null || true)
}
read_ups_battery() {
if ! command -v upsc >/dev/null 2>&1; then
return 1
fi
local host raw parsed
while IFS= read -r host; do
raw="$(upsc "${host}" "${UPS_BATTERY_KEY}" 2>/dev/null || true)"
[[ -n "${raw}" ]] || continue
parsed="$(sanitize_battery_percent "${raw}" || true)"
[[ -n "${parsed}" ]] || continue
UPS_HOST_IN_USE="${host}"
printf '%s' "${parsed}"
return 0
done < <(candidate_ups_hosts)
return 1
}
ensure_minimum_battery_for_bootstrap() {
local battery
battery="$(read_ups_battery || true)"
if [[ -z "${battery}" ]]; then
if [[ "${REQUIRE_UPS_BATTERY}" -eq 1 ]]; then
warn "Unable to read UPS battery status and --require-ups-battery is set."
return 1
fi
warn "Unable to read UPS battery status; continuing without hard battery gating."
return 0
fi
log "ups-battery=${battery}% host=${UPS_HOST_IN_USE:-${UPS_HOST}}"
if (( battery < MIN_STARTUP_BATTERY )); then
warn "UPS battery ${battery}% below minimum startup threshold ${MIN_STARTUP_BATTERY}%."
return 1
fi
return 0
}
report_flux_source_state() {
local flux_url flux_branch
flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)"
flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)"
[[ -n "${flux_url}" ]] && log "flux-source-url=${flux_url}"
if [[ -n "${flux_branch}" ]]; then
log "flux-source-branch=${flux_branch}"
if [[ "${MODE}" == "startup" && -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then
warn "Flux source branch is '${flux_branch}'. Expected '${EXPECTED_FLUX_BRANCH}' for canonical recovery."
fi
fi
}
wait_for_api() {
local attempts=$(( API_WAIT_TIMEOUT_SECONDS / 5 ))
if (( attempts < 1 )); then
attempts=1
fi
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: skipping live Kubernetes API wait"
return 0
fi
local i
for i in $(seq 1 "${attempts}"); do
if kubectl version --request-timeout=5s >/dev/null 2>&1; then
return 0
fi
sleep 5
done
return 1
}
patch_flux_suspend_all() {
local value="$1"
local patch
patch=$(printf '{"spec":{"suspend":%s}}' "${value}")
local ks_list hr_list
ks_list="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' || true)"
hr_list="$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"/"}{.metadata.name}{"\n"}{end}' || true)"
while IFS= read -r k; do
[[ -z "${k}" ]] && continue
run kubectl -n flux-system patch kustomization "${k}" --type=merge -p "${patch}"
done <<< "${ks_list}"
while IFS= read -r hr; do
[[ -z "${hr}" ]] && continue
local ns="${hr%%/*}"
local name="${hr##*/}"
run kubectl -n "${ns}" patch helmrelease "${name}" --type=merge -p "${patch}"
done <<< "${hr_list}"
}
best_effort_scale_down_apps() {
local excludes='^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$'
local ns_list
ns_list="$(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
while IFS= read -r ns; do
[[ -z "${ns}" ]] && continue
if [[ "${ns}" =~ ${excludes} ]]; then
continue
fi
run_shell "kubectl -n ${ns} scale deployment --all --replicas=0 || true"
run_shell "kubectl -n ${ns} scale statefulset --all --replicas=0 || true"
done <<< "${ns_list}"
}
discover_workers_csv() {
kubectl get nodes \
-o custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\\.kubernetes\\.io/control-plane,MASTER:.metadata.labels.node-role\\.kubernetes\\.io/master \
--no-headers \
| awk '$2=="<none>" && $3=="<none>" {print $1}' \
| paste -sd, -
}
as_array_from_csv() {
local csv="$1"
local out_var="$2"
local old_ifs="${IFS}"
IFS=',' read -r -a _tmp <<< "${csv}"
IFS="${old_ifs}"
eval "${out_var}"'=( "${_tmp[@]}" )'
}
best_effort_drain_workers() {
local timeout_seconds="$1"
shift || true
local workers=("$@")
local node
for node in "${workers[@]}"; do
[[ -z "${node}" ]] && continue
run kubectl cordon "${node}"
if run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s"; then
continue
fi
warn "Gentle drain timed out for ${node}; retrying with --force."
if run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s --force"; then
continue
fi
warn "Force drain timed out for ${node}; final attempt with --disable-eviction."
run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s --force --disable-eviction || true"
done
}
wait_for_rollout() {
local namespace="$1"
local kind="$2"
local name="$3"
local timeout="$4"
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: kubectl -n ${namespace} rollout status ${kind}/${name} --timeout=${timeout}"
return 0
fi
kubectl -n "${namespace}" rollout status "${kind}/${name}" --timeout="${timeout}"
}
check_ingress_stack() {
kubectl get ingressclass traefik >/dev/null
wait_for_rollout traefik deployment traefik 5m
}
check_longhorn_stack() {
wait_for_rollout longhorn-system daemonset longhorn-manager 10m
wait_for_rollout longhorn-system deployment longhorn-ui 10m
}
check_vault_stack() {
wait_for_rollout vault statefulset vault 10m
if [[ "${EXECUTE}" -eq 1 ]]; then
kubectl -n vault exec vault-0 -- sh -ceu 'VAULT_ADDR=http://127.0.0.1:8200 vault status >/dev/null'
fi
}
check_postgres_stack() {
wait_for_rollout postgres statefulset postgres 10m
if [[ "${EXECUTE}" -eq 1 ]]; then
kubectl -n postgres exec postgres-0 -c postgres -- sh -ceu 'pg_isready -h 127.0.0.1 -p 5432 >/dev/null'
fi
}
check_gitea_stack() {
wait_for_rollout gitea deployment gitea 10m
}
check_harbor_stack() {
wait_for_rollout harbor statefulset harbor-redis 10m
wait_for_rollout harbor deployment harbor-core 10m
wait_for_rollout harbor deployment harbor-jobservice 10m
wait_for_rollout harbor deployment harbor-portal 10m
wait_for_rollout harbor deployment harbor-registry 10m
}
check_harbor_endpoint() {
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/"
return 0
fi
local code
code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
case "${code}" in
200|401)
log "harbor-endpoint=http-${code}"
;;
*)
die "Harbor endpoint check failed with HTTP ${code:-unknown}"
;;
esac
}
wait_for_pod_phase() {
local namespace="$1"
local pod="$2"
local expected_phase="$3"
local timeout_seconds="$4"
local start now phase
start="$(date +%s)"
while true; do
phase="$(kubectl -n "${namespace}" get pod "${pod}" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
if [[ "${phase}" == "${expected_phase}" ]]; then
return 0
fi
if [[ "${phase}" == "Failed" ]]; then
return 1
fi
now="$(date +%s)"
if (( now - start >= timeout_seconds )); then
return 1
fi
sleep 2
done
}
harbor_is_ready() {
kubectl -n harbor get deploy harbor-core harbor-jobservice harbor-portal harbor-registry >/dev/null 2>&1 || return 1
local code
code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
[[ "${code}" == "200" || "${code}" == "401" ]]
}
run_harbor_pull_canary() {
local pod="hecate-harbor-canary"
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: create Harbor pull canary pod with ${HARBOR_CANARY_IMAGE} on ${HARBOR_CANARY_NODE}"
return 0
fi
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
cat <<CANARY | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: ${pod}
namespace: ${NODE_HELPER_NAMESPACE}
spec:
nodeName: ${HARBOR_CANARY_NODE}
restartPolicy: Never
imagePullSecrets:
- name: ${REGISTRY_PULL_SECRET}
tolerations:
- operator: Exists
containers:
- name: canary
image: ${HARBOR_CANARY_IMAGE}
imagePullPolicy: Always
command: ["sh", "-ceu", "echo harbor-canary-ok"]
CANARY
if ! wait_for_pod_phase "${NODE_HELPER_NAMESPACE}" "${pod}" Succeeded 180; then
kubectl -n "${NODE_HELPER_NAMESPACE}" describe pod "${pod}" >&2 || true
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" >&2 || true
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
return 1
fi
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" || true
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
}
run_helper_pod() {
local node="$1"
local purpose="$2"
local timeout_seconds="$3"
local script_content="$4"
local pod="hecate-$(sanitize_name "${purpose}")-$(date +%H%M%S)"
local encoded_script
encoded_script="$(printf '%s' "${script_content}" | base64 -w0)"
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: helper pod ${pod} on ${node} for ${purpose}"
return 0
fi
cat <<POD | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: ${pod}
namespace: ${NODE_HELPER_NAMESPACE}
spec:
nodeName: ${node}
restartPolicy: Never
serviceAccountName: ${NODE_HELPER_SERVICE_ACCOUNT}
imagePullSecrets:
- name: ${REGISTRY_PULL_SECRET}
hostNetwork: true
hostPID: true
tolerations:
- operator: Exists
containers:
- name: helper
image: ${NODE_HELPER_IMAGE}
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
command: ["/bin/bash", "-ceu"]
args:
- |
printf '%s' '${encoded_script}' | base64 -d >/tmp/hecate-step.sh
chmod +x /tmp/hecate-step.sh
/tmp/hecate-step.sh
POD
if ! wait_for_pod_phase "${NODE_HELPER_NAMESPACE}" "${pod}" Succeeded "${timeout_seconds}"; then
kubectl -n "${NODE_HELPER_NAMESPACE}" describe pod "${pod}" >&2 || true
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" >&2 || true
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
return 1
fi
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" || true
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
}
run_host_command_via_helper() {
local node="$1"
local purpose="$2"
local timeout_seconds="$3"
local host_command="$4"
local encoded_command
encoded_command="$(printf '%s' "${host_command}" | base64 -w0)"
local script_content
script_content=$(cat <<SCRIPT
set -euo pipefail
HOST_COMMAND="\$(printf '%s' '${encoded_command}' | base64 -d)"
nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu "\${HOST_COMMAND}"
SCRIPT
)
run_helper_pod "${node}" "${purpose}" "${timeout_seconds}" "${script_content}"
}
schedule_host_shutdown_via_helper() {
local node="$1"
local service_name="$2"
local delay_seconds="$3"
local host_command
host_command="/usr/bin/systemd-run --unit hecate-shutdown-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true; /usr/bin/systemctl poweroff || true'"
run_host_command_via_helper "${node}" "shutdown-${node}-${service_name}" 120 "${host_command}"
}
prewarm_node_helper_image() {
local name="hecate-node-helper-prewarm"
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: prewarm ${NODE_HELPER_IMAGE} via temporary DaemonSet"
return 0
fi
cat <<DS | kubectl apply -f -
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: ${name}
namespace: ${NODE_HELPER_NAMESPACE}
spec:
selector:
matchLabels:
app: ${name}
template:
metadata:
labels:
app: ${name}
spec:
imagePullSecrets:
- name: ${REGISTRY_PULL_SECRET}
tolerations:
- operator: Exists
containers:
- name: helper
image: ${NODE_HELPER_IMAGE}
imagePullPolicy: Always
command: ["/bin/sh", "-ceu", "sleep 300"]
DS
local i desired ready
for i in $(seq 1 90); do
desired="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get ds "${name}" -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo 0)"
ready="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get ds "${name}" -o jsonpath='{.status.numberReady}' 2>/dev/null || echo 0)"
[[ -n "${desired}" ]] || desired=0
[[ -n "${ready}" ]] || ready=0
if [[ "${desired}" != "0" && "${desired}" == "${ready}" ]]; then
log "node-helper-prewarm=${ready}/${desired}"
kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${name}" --ignore-not-found >/dev/null 2>&1 || true
return 0
fi
sleep 2
done
kubectl -n "${NODE_HELPER_NAMESPACE}" describe ds "${name}" >&2 || true
kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app="${name}" >&2 || true
kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${name}" --ignore-not-found >/dev/null 2>&1 || true
die "Timed out prewarming node helper image ${NODE_HELPER_IMAGE}"
}
start_bundle_server() {
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}"
require_cmd python3
local bundle_dir bundle_name
bundle_dir="$(dirname "${HARBOR_BUNDLE_FILE}")"
bundle_name="$(basename "${HARBOR_BUNDLE_FILE}")"
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: serve ${bundle_name} from ${bundle_dir} on port ${BUNDLE_HTTP_PORT}"
return 0
fi
python3 -m http.server "${BUNDLE_HTTP_PORT}" --bind 0.0.0.0 --directory "${bundle_dir}" </dev/null >/tmp/hecate-bundle-server.log 2>&1 &
BUNDLE_SERVER_PID=$!
for _ in $(seq 1 20); do
if curl -fsS "http://127.0.0.1:${BUNDLE_HTTP_PORT}/${bundle_name}" >/dev/null 2>&1; then
return 0
fi
sleep 1
done
die "Temporary bundle server did not become ready; see /tmp/hecate-bundle-server.log"
}
stop_bundle_server() {
if [[ -n "${BUNDLE_SERVER_PID}" ]]; then
kill "${BUNDLE_SERVER_PID}" >/dev/null 2>&1 || true
for _ in $(seq 1 10); do
kill -0 "${BUNDLE_SERVER_PID}" >/dev/null 2>&1 || break
sleep 1
done
BUNDLE_SERVER_PID=""
fi
}
trap stop_bundle_server EXIT
control_host_ip() {
hostname -I | awk '{print $1}'
}
seed_harbor_images() {
local images_text control_ip bundle_name script_content seed_rc=0
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}"
images_text="$(sed '/^[[:space:]]*#/d;/^[[:space:]]*$/d' "${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt")"
[[ -n "${images_text}" ]] || die "No Harbor images listed in ${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt"
bundle_name="$(basename "${HARBOR_BUNDLE_FILE}")"
start_bundle_server
control_ip="$(control_host_ip)"
script_content=$(cat <<SCRIPT
set -euo pipefail
curl -fsSL "http://${control_ip}:${BUNDLE_HTTP_PORT}/${bundle_name}" \
| zstd -dc \
| nsenter --target 1 --mount --uts --ipc --net --pid /usr/local/bin/k3s ctr images import -
while IFS= read -r image; do
[[ -z "\${image}" ]] && continue
nsenter --target 1 --mount --uts --ipc --net --pid /usr/local/bin/k3s ctr images ls | awk '{print \$1}' | grep -Fx "\${image}" >/dev/null
done <<'IMAGES'
${images_text}
IMAGES
SCRIPT
)
run_helper_pod "${HARBOR_TARGET_NODE}" "harbor-seed" 900 "${script_content}" || seed_rc=$?
stop_bundle_server
[[ "${seed_rc}" -eq 0 ]] || return "${seed_rc}"
mark_checkpoint startup_harbor_seeded
}
bootstrap_local_minimal() {
apply_kustomization infrastructure/core
apply_kustomization infrastructure/sources/helm
apply_kustomization infrastructure/longhorn/core
apply_kustomization infrastructure/metallb
apply_kustomization infrastructure/traefik
apply_kustomization infrastructure/vault-csi
apply_kustomization infrastructure/vault-injector
apply_kustomization services/vault
apply_kustomization infrastructure/postgres
apply_kustomization services/gitea
}
bootstrap_local_harbor() {
apply_kustomization services/harbor
}
reconcile_stage() {
local stage_name="$1"
shift
if ! command -v flux >/dev/null 2>&1; then
local now
now="$(date --iso-8601=seconds)"
run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="${now}" --overwrite
return 0
fi
local item
for item in "$@"; do
run flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m
done
mark_checkpoint "reconciled_${stage_name}"
}
resume_flux_and_reconcile() {
patch_flux_suspend_all false
if command -v flux >/dev/null 2>&1; then
run flux reconcile source git flux-system -n flux-system --timeout=3m
fi
reconcile_stage core core helm longhorn metallb traefik vault-csi vault-injector
check_ingress_stack
check_longhorn_stack
reconcile_stage stateful vault postgres gitea
check_vault_stack
check_postgres_stack
check_gitea_stack
reconcile_stage registry harbor
check_harbor_stack
check_harbor_endpoint
run_harbor_pull_canary
}
status_report() {
local battery flux_ready harbor_code workers
battery="$(read_ups_battery || true)"
flux_ready="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
harbor_code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
workers="$(discover_workers_csv 2>/dev/null || true)"
echo "mode=status"
echo "bundle_file=${HARBOR_BUNDLE_FILE}"
echo "bundle_present=$([[ -f "${HARBOR_BUNDLE_FILE}" ]] && echo true || echo false)"
echo "node_helper_image=${NODE_HELPER_IMAGE}"
echo "harbor_target_node=${HARBOR_TARGET_NODE}"
echo "workers=${workers}"
echo "recovery_pending=${RECOVERY_PENDING}"
echo "startup_attempted=${STARTUP_ATTEMPTED_DURING_OUTAGE}"
echo "last_checkpoint=${LAST_CHECKPOINT}"
echo "ups_host=${UPS_HOST_IN_USE:-${UPS_HOST}}"
echo "ups_battery=${battery:-unknown}"
echo "flux_source_ready=${flux_ready:-unknown}"
echo "harbor_http=${harbor_code:-unknown}"
kubectl get ingressclass traefik >/dev/null 2>&1 && echo "traefik_ingressclass=true" || echo "traefik_ingressclass=false"
kubectl -n traefik get deploy traefik >/dev/null 2>&1 && echo "traefik_deploy=true" || echo "traefik_deploy=false"
kubectl -n longhorn-system get ds longhorn-manager >/dev/null 2>&1 && echo "longhorn_manager=true" || echo "longhorn_manager=false"
kubectl -n vault get sts vault >/dev/null 2>&1 && echo "vault_statefulset=true" || echo "vault_statefulset=false"
kubectl -n postgres get sts postgres >/dev/null 2>&1 && echo "postgres_statefulset=true" || echo "postgres_statefulset=false"
kubectl -n gitea get deploy gitea >/dev/null 2>&1 && echo "gitea_deploy=true" || echo "gitea_deploy=false"
kubectl -n harbor get deploy harbor-core >/dev/null 2>&1 && echo "harbor_deploy=true" || echo "harbor_deploy=false"
}
planned_shutdown() {
local workers_csv
workers_csv="$(discover_workers_csv 2>/dev/null || true)"
as_array_from_csv "${workers_csv}" WORKER_NODES
as_array_from_csv "titan-0a,titan-0b,titan-0c" CONTROL_PLANE_NODES
RECOVERY_PENDING=1
STARTUP_ATTEMPTED_DURING_OUTAGE=0
save_recovery_state 1 0 shutdown_started
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
prewarm_node_helper_image
mark_checkpoint shutdown_helper_prewarmed
fi
if [[ "${SKIP_ETCD_SNAPSHOT}" -eq 0 ]]; then
local ts
ts="$(date +%Y%m%d-%H%M%S)"
run_host_command_via_helper "${CONTROL_PLANE_NODES[0]}" "etcd-snapshot" 300 "/usr/local/bin/k3s etcd-snapshot save --name pre-shutdown-${ts}"
mark_checkpoint shutdown_snapshot_complete
else
warn "Skipping etcd snapshot by request."
fi
patch_flux_suspend_all true
best_effort_scale_down_apps
mark_checkpoint shutdown_apps_scaled_down
if [[ "${SKIP_DRAIN}" -eq 0 ]]; then
best_effort_drain_workers "${DRAIN_TIMEOUT_SECONDS}" "${WORKER_NODES[@]}"
mark_checkpoint shutdown_workers_drained
else
warn "Skipping worker drain by request."
fi
local node
for node in "${WORKER_NODES[@]}"; do
[[ -z "${node}" ]] && continue
schedule_host_shutdown_via_helper "${node}" k3s-agent 20
done
mark_checkpoint shutdown_workers_scheduled
for node in "${CONTROL_PLANE_NODES[@]}"; do
[[ -z "${node}" ]] && continue
schedule_host_shutdown_via_helper "${node}" k3s 45
done
mark_checkpoint shutdown_control_planes_scheduled
log "Shutdown actions scheduled on hosts."
}
emergency_shutdown_after_outage() {
warn "Entering outage-aware emergency shutdown path due insufficient startup budget."
patch_flux_suspend_all true || true
best_effort_scale_down_apps || true
local workers_csv
workers_csv="$(discover_workers_csv 2>/dev/null || true)"
as_array_from_csv "${workers_csv}" WORKER_NODES
best_effort_drain_workers "${EMERGENCY_DRAIN_TIMEOUT_SECONDS}" "${WORKER_NODES[@]}" || true
planned_shutdown
}
startup_flow() {
if [[ "${RECOVERY_PENDING}" -eq 1 ]]; then
if ! ensure_minimum_battery_for_bootstrap; then
if [[ "${STARTUP_ATTEMPTED_DURING_OUTAGE}" -eq 1 ]]; then
emergency_shutdown_after_outage
exit 1
fi
warn "Startup deferred due low battery after recent outage; marking for second-outage fallback."
save_recovery_state 1 1 deferred_low_battery
exit 1
fi
STARTUP_ATTEMPTED_DURING_OUTAGE=1
save_recovery_state 1 1 waiting_for_api
fi
if ! wait_for_api; then
die "Kubernetes API did not become reachable in time."
fi
mark_checkpoint startup_api_ready
if [[ -n "${FORCE_FLUX_BRANCH}" ]]; then
run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"ref\":{\"branch\":\"${FORCE_FLUX_BRANCH}\"}}}"
mark_checkpoint startup_flux_branch_forced
fi
if [[ "${SKIP_LOCAL_BOOTSTRAP}" -eq 0 ]]; then
if ! kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q True; then
warn "Flux source not Ready; executing local bootstrap fallback path."
bootstrap_local_minimal
mark_checkpoint startup_local_bootstrap_complete
check_ingress_stack
check_longhorn_stack
check_vault_stack
check_postgres_stack
check_gitea_stack
if [[ "${SKIP_HARBOR_BOOTSTRAP}" -eq 0 ]]; then
if harbor_is_ready; then
log "Harbor already healthy; skipping Harbor seed/bootstrap."
else
if [[ "${SKIP_HARBOR_SEED}" -eq 0 ]]; then
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
prewarm_node_helper_image
fi
seed_harbor_images
else
warn "Skipping Harbor seed/import by request."
fi
bootstrap_local_harbor
mark_checkpoint startup_local_harbor_applied
check_harbor_stack
check_harbor_endpoint
fi
else
warn "Skipping Harbor bootstrap fallback by request."
fi
fi
else
warn "Skipping local bootstrap fallback by request."
fi
resume_flux_and_reconcile
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
prewarm_node_helper_image
mark_checkpoint startup_helper_prewarmed
fi
clear_recovery_state
log "Startup flow complete."
}
prepare_flow() {
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
prewarm_node_helper_image
mark_checkpoint prepare_helper_prewarmed
fi
log "Prepare flow complete."
}
harbor_seed_flow() {
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
prewarm_node_helper_image
mark_checkpoint harbor_seed_helper_prewarmed
fi
seed_harbor_images
check_harbor_endpoint
run_harbor_pull_canary
log "Harbor seed flow complete."
}
load_recovery_state
log "mode=${MODE} execute=${EXECUTE}"
log "recovery-state-file=${RECOVERY_STATE_FILE}"
log "bundle-file=${HARBOR_BUNDLE_FILE}"
log "node-helper-image=${NODE_HELPER_IMAGE}"
report_flux_source_state
case "${MODE}" in
status)
status_report
;;
prepare)
prepare_flow
;;
harbor-seed)
harbor_seed_flow
;;
shutdown)
planned_shutdown
;;
startup)
startup_flow
;;
esac