1841 lines
65 KiB
Bash
Executable File
1841 lines
65 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_DIR="${ANANKE_REPO_DIR:-$(cd "${SCRIPT_DIR}/.." && pwd)}"
|
|
BOOTSTRAP_DIR="${SCRIPT_DIR}/bootstrap"
|
|
CONFIG_FILE="${BOOTSTRAP_DIR}/recovery-config.env"
|
|
if [[ -f "${CONFIG_FILE}" ]]; then
|
|
# shellcheck disable=SC1090
|
|
source "${CONFIG_FILE}"
|
|
fi
|
|
if [[ -z "${KUBECONFIG:-}" && -f "${SCRIPT_DIR}/kubeconfig" ]]; then
|
|
export KUBECONFIG="${SCRIPT_DIR}/kubeconfig"
|
|
fi
|
|
|
|
usage() {
|
|
cat <<USAGE
|
|
Usage:
|
|
scripts/cluster_power_recovery.sh <prepare|status|harbor-seed|shutdown|startup> [options]
|
|
|
|
Options:
|
|
--execute Actually run commands (default is dry-run)
|
|
--shutdown-mode <mode> Shutdown behavior: host-poweroff or cluster-only (default: ${SHUTDOWN_MODE:-host-poweroff})
|
|
--expected-flux-branch <name> Expected Flux source branch during startup checks (default: ${DEFAULT_FLUX_BRANCH:-main})
|
|
--expected-flux-url <url> Expected Flux source URL during startup checks
|
|
--allow-flux-source-mutation Required to allow --force-flux-url during startup
|
|
--force-flux-url <url> Startup: patch flux-system GitRepository URL to this value
|
|
--force-flux-branch <name> Startup: patch flux-system GitRepository branch to this value
|
|
--skip-etcd-snapshot Shutdown: skip etcd snapshot before shutdown
|
|
--skip-drain Shutdown: skip worker drain during shutdown
|
|
--skip-local-bootstrap Startup: skip local bootstrap fallback applies
|
|
--skip-harbor-bootstrap Startup: skip Harbor recovery bootstrap stage
|
|
--skip-harbor-seed Startup: skip Harbor image seed/import stage
|
|
--skip-helper-prewarm Prepare/Shutdown/Startup: skip node-helper prewarm
|
|
--min-startup-battery <pct> Minimum UPS percent required before bootstrap (default: 35)
|
|
--ups-host <name> UPS identifier for upsc (default: ups@localhost)
|
|
--ups-battery-key <key> UPS battery key for upsc (default: battery.charge)
|
|
--recovery-state-file <path> Recovery state file for outage-aware restart logic
|
|
--replica-snapshot-file <path>
|
|
File used to persist workload replica snapshot across shutdown/startup
|
|
--harbor-bundle-file <path> Harbor bootstrap bundle on the control host
|
|
--harbor-target-node <name> Node that should host Harbor during bootstrap (default: auto)
|
|
--harbor-canary-node <name> Node used for Harbor pull canary (default: auto)
|
|
--harbor-host-label-key <key> Node label key used to pin Harbor bootstrap workloads (default: ${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap})
|
|
--harbor-canary-image <image> Harbor-backed image used for pull canary (default: ${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0})
|
|
--node-helper-image <image> Privileged helper image used for host operations (default: ${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/ananke-node-helper:0.1.0})
|
|
--bundle-http-port <port> Temporary HTTP port used to serve bootstrap bundles (default: ${BUNDLE_HTTP_PORT:-8877})
|
|
--api-wait-timeout <seconds> Startup: Kubernetes API wait timeout (default: 600)
|
|
--drain-timeout <seconds> Worker drain timeout for normal shutdown (default: 180)
|
|
--emergency-drain-timeout <seconds>
|
|
Worker drain timeout for emergency fallback (default: 45)
|
|
--flux-ready-timeout <seconds>
|
|
Startup: max time to wait for Flux kustomizations Ready (default: 1200)
|
|
--startup-checklist-timeout <seconds>
|
|
Startup: max time to wait for external service checklist (default: 900)
|
|
--startup-workload-timeout <seconds>
|
|
Startup: max time to wait for workload readiness checks (default: 900)
|
|
--startup-stability-window <seconds>
|
|
Startup: continuous healthy window required before success (default: 180)
|
|
--startup-stability-timeout <seconds>
|
|
Startup: max time allowed to achieve the healthy window (default: 900)
|
|
--require-ups-battery Hard-fail startup if UPS battery cannot be read
|
|
-h, --help Show help
|
|
|
|
Examples:
|
|
scripts/cluster_power_recovery.sh prepare --execute
|
|
scripts/cluster_power_recovery.sh harbor-seed --execute
|
|
scripts/cluster_power_recovery.sh status
|
|
scripts/cluster_power_recovery.sh shutdown --execute
|
|
scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main
|
|
USAGE
|
|
}
|
|
|
|
MODE="${1:-}"
|
|
if [[ -z "${MODE}" || "${MODE}" == "-h" || "${MODE}" == "--help" ]]; then
|
|
usage
|
|
exit 0
|
|
fi
|
|
shift || true
|
|
|
|
case "${MODE}" in
|
|
prepare|status|harbor-seed|shutdown|startup) ;;
|
|
*)
|
|
echo "Unknown mode: ${MODE}" >&2
|
|
usage
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
EXECUTE=0
|
|
SHUTDOWN_MODE="${SHUTDOWN_MODE:-host-poweroff}"
|
|
EXPECTED_FLUX_BRANCH="${DEFAULT_FLUX_BRANCH:-main}"
|
|
EXPECTED_FLUX_URL="${EXPECTED_FLUX_URL:-ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git}"
|
|
ALLOW_FLUX_SOURCE_MUTATION=0
|
|
FORCE_FLUX_URL=""
|
|
FORCE_FLUX_BRANCH=""
|
|
SKIP_ETCD_SNAPSHOT=0
|
|
SKIP_DRAIN=0
|
|
SKIP_LOCAL_BOOTSTRAP=0
|
|
SKIP_HARBOR_BOOTSTRAP=0
|
|
SKIP_HARBOR_SEED=0
|
|
SKIP_HELPER_PREWARM=0
|
|
UPS_HOST="${UPS_HOST:-ups@localhost}"
|
|
UPS_BATTERY_KEY="${UPS_BATTERY_KEY:-battery.charge}"
|
|
MIN_STARTUP_BATTERY="${MIN_STARTUP_BATTERY:-35}"
|
|
REQUIRE_UPS_BATTERY="${REQUIRE_UPS_BATTERY:-0}"
|
|
DRAIN_TIMEOUT_SECONDS=180
|
|
EMERGENCY_DRAIN_TIMEOUT_SECONDS=45
|
|
API_WAIT_TIMEOUT_SECONDS=600
|
|
FLUX_READY_TIMEOUT_SECONDS="${FLUX_READY_TIMEOUT_SECONDS:-1200}"
|
|
FLUX_READY_POLL_SECONDS="${FLUX_READY_POLL_SECONDS:-10}"
|
|
STARTUP_CHECKLIST_TIMEOUT_SECONDS="${STARTUP_CHECKLIST_TIMEOUT_SECONDS:-900}"
|
|
STARTUP_CHECKLIST_POLL_SECONDS="${STARTUP_CHECKLIST_POLL_SECONDS:-10}"
|
|
STARTUP_WORKLOAD_TIMEOUT_SECONDS="${STARTUP_WORKLOAD_TIMEOUT_SECONDS:-900}"
|
|
STARTUP_WORKLOAD_POLL_SECONDS="${STARTUP_WORKLOAD_POLL_SECONDS:-10}"
|
|
STARTUP_STABILITY_WINDOW_SECONDS="${STARTUP_STABILITY_WINDOW_SECONDS:-180}"
|
|
STARTUP_STABILITY_TIMEOUT_SECONDS="${STARTUP_STABILITY_TIMEOUT_SECONDS:-900}"
|
|
STARTUP_STABILITY_POLL_SECONDS="${STARTUP_STABILITY_POLL_SECONDS:-10}"
|
|
STARTUP_IGNORE_PODS_REGEX="${STARTUP_IGNORE_PODS_REGEX:-}"
|
|
STARTUP_IGNORE_WORKLOADS_REGEX="${STARTUP_IGNORE_WORKLOADS_REGEX:-}"
|
|
STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system)$}"
|
|
STARTUP_OPTIONAL_KUSTOMIZATIONS="${STARTUP_OPTIONAL_KUSTOMIZATIONS:-}"
|
|
STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS:-10}"
|
|
STARTUP_SERVICE_CHECKLIST="${STARTUP_SERVICE_CHECKLIST:-}"
|
|
STARTUP_INCLUDE_INGRESS_CHECKS="${STARTUP_INCLUDE_INGRESS_CHECKS:-1}"
|
|
STARTUP_INGRESS_ALLOWED_STATUSES="${STARTUP_INGRESS_ALLOWED_STATUSES:-200,301,302,307,308,401,403,404}"
|
|
STARTUP_IGNORE_INGRESS_HOSTS_REGEX="${STARTUP_IGNORE_INGRESS_HOSTS_REGEX:-}"
|
|
STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS:-10}"
|
|
SHUTDOWN_NAMESPACE_EXCLUDES_REGEX="${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX:-^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$}"
|
|
BUNDLE_HTTP_PORT="${BUNDLE_HTTP_PORT:-8877}"
|
|
STATE_ROOT="${HOME}/${STATE_SUBDIR:-.local/share/ananke}"
|
|
RECOVERY_STATE_FILE="${STATE_ROOT}/cluster_power_recovery.state"
|
|
REPLICA_SNAPSHOT_FILE="${STATE_ROOT}/desired_workload_replicas.tsv"
|
|
HARBOR_BUNDLE_FILE="${STATE_ROOT}/bundles/${HARBOR_BUNDLE_BASENAME:-harbor-bootstrap-v2.14.1-arm64.tar.zst}"
|
|
HARBOR_TARGET_NODE="${HARBOR_TARGET_NODE:-}"
|
|
HARBOR_CANARY_NODE="${HARBOR_CANARY_NODE:-}"
|
|
HARBOR_HOST_LABEL_KEY="${HARBOR_HOST_LABEL_KEY:-ananke.bstein.dev/harbor-bootstrap}"
|
|
HARBOR_CANARY_IMAGE="${HARBOR_CANARY_IMAGE:-registry.bstein.dev/bstein/kubectl:1.35.0}"
|
|
NODE_HELPER_IMAGE="${NODE_HELPER_IMAGE:-registry.bstein.dev/bstein/ananke-node-helper:0.1.0}"
|
|
NODE_HELPER_NAMESPACE="${NODE_HELPER_NAMESPACE:-maintenance}"
|
|
NODE_HELPER_SERVICE_ACCOUNT="${NODE_HELPER_SERVICE_ACCOUNT:-default}"
|
|
NODE_HELPER_PREWARM_DS="${NODE_HELPER_PREWARM_DS:-ananke-node-helper-prewarm}"
|
|
REGISTRY_PULL_SECRET="${REGISTRY_PULL_SECRET:-harbor-regcred}"
|
|
KEEP_PREWARM_DAEMONSET=0
|
|
|
|
RECOVERY_PENDING=0
|
|
STARTUP_ATTEMPTED_DURING_OUTAGE=0
|
|
LAST_CHECKPOINT="none"
|
|
BUNDLE_SERVER_PID=""
|
|
UPS_HOST_IN_USE=""
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--execute)
|
|
EXECUTE=1
|
|
shift
|
|
;;
|
|
--shutdown-mode)
|
|
SHUTDOWN_MODE="${2:?missing shutdown mode}"
|
|
shift 2
|
|
;;
|
|
--expected-flux-branch)
|
|
EXPECTED_FLUX_BRANCH="${2:?missing branch}"
|
|
shift 2
|
|
;;
|
|
--expected-flux-url)
|
|
EXPECTED_FLUX_URL="${2:?missing flux url}"
|
|
shift 2
|
|
;;
|
|
--allow-flux-source-mutation)
|
|
ALLOW_FLUX_SOURCE_MUTATION=1
|
|
shift
|
|
;;
|
|
--force-flux-url)
|
|
FORCE_FLUX_URL="${2:?missing flux url}"
|
|
shift 2
|
|
;;
|
|
--force-flux-branch)
|
|
FORCE_FLUX_BRANCH="${2:?missing branch}"
|
|
shift 2
|
|
;;
|
|
--skip-etcd-snapshot)
|
|
SKIP_ETCD_SNAPSHOT=1
|
|
shift
|
|
;;
|
|
--skip-drain)
|
|
SKIP_DRAIN=1
|
|
shift
|
|
;;
|
|
--skip-local-bootstrap)
|
|
SKIP_LOCAL_BOOTSTRAP=1
|
|
shift
|
|
;;
|
|
--skip-harbor-bootstrap)
|
|
SKIP_HARBOR_BOOTSTRAP=1
|
|
shift
|
|
;;
|
|
--skip-harbor-seed)
|
|
SKIP_HARBOR_SEED=1
|
|
shift
|
|
;;
|
|
--skip-helper-prewarm)
|
|
SKIP_HELPER_PREWARM=1
|
|
shift
|
|
;;
|
|
--ups-host)
|
|
UPS_HOST="${2:?missing ups host}"
|
|
shift 2
|
|
;;
|
|
--ups-battery-key)
|
|
UPS_BATTERY_KEY="${2:?missing ups key}"
|
|
shift 2
|
|
;;
|
|
--min-startup-battery)
|
|
MIN_STARTUP_BATTERY="${2:?missing battery threshold}"
|
|
shift 2
|
|
;;
|
|
--require-ups-battery)
|
|
REQUIRE_UPS_BATTERY=1
|
|
shift
|
|
;;
|
|
--recovery-state-file)
|
|
RECOVERY_STATE_FILE="${2:?missing state file path}"
|
|
shift 2
|
|
;;
|
|
--replica-snapshot-file)
|
|
REPLICA_SNAPSHOT_FILE="${2:?missing replica snapshot file path}"
|
|
shift 2
|
|
;;
|
|
--harbor-bundle-file)
|
|
HARBOR_BUNDLE_FILE="${2:?missing bundle file path}"
|
|
shift 2
|
|
;;
|
|
--harbor-target-node)
|
|
HARBOR_TARGET_NODE="${2:?missing harbor target node}"
|
|
shift 2
|
|
;;
|
|
--harbor-canary-node)
|
|
HARBOR_CANARY_NODE="${2:?missing harbor canary node}"
|
|
shift 2
|
|
;;
|
|
--harbor-host-label-key)
|
|
HARBOR_HOST_LABEL_KEY="${2:?missing harbor host label key}"
|
|
shift 2
|
|
;;
|
|
--harbor-canary-image)
|
|
HARBOR_CANARY_IMAGE="${2:?missing canary image}"
|
|
shift 2
|
|
;;
|
|
--node-helper-image)
|
|
NODE_HELPER_IMAGE="${2:?missing node helper image}"
|
|
shift 2
|
|
;;
|
|
--bundle-http-port)
|
|
BUNDLE_HTTP_PORT="${2:?missing bundle http port}"
|
|
shift 2
|
|
;;
|
|
--api-wait-timeout)
|
|
API_WAIT_TIMEOUT_SECONDS="${2:?missing api wait timeout}"
|
|
shift 2
|
|
;;
|
|
--flux-ready-timeout)
|
|
FLUX_READY_TIMEOUT_SECONDS="${2:?missing flux ready timeout}"
|
|
shift 2
|
|
;;
|
|
--startup-checklist-timeout)
|
|
STARTUP_CHECKLIST_TIMEOUT_SECONDS="${2:?missing startup checklist timeout}"
|
|
shift 2
|
|
;;
|
|
--startup-workload-timeout)
|
|
STARTUP_WORKLOAD_TIMEOUT_SECONDS="${2:?missing startup workload timeout}"
|
|
shift 2
|
|
;;
|
|
--startup-stability-window)
|
|
STARTUP_STABILITY_WINDOW_SECONDS="${2:?missing startup stability window}"
|
|
shift 2
|
|
;;
|
|
--startup-stability-timeout)
|
|
STARTUP_STABILITY_TIMEOUT_SECONDS="${2:?missing startup stability timeout}"
|
|
shift 2
|
|
;;
|
|
--drain-timeout)
|
|
DRAIN_TIMEOUT_SECONDS="${2:?missing drain timeout}"
|
|
shift 2
|
|
;;
|
|
--emergency-drain-timeout)
|
|
EMERGENCY_DRAIN_TIMEOUT_SECONDS="${2:?missing emergency drain timeout}"
|
|
shift 2
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown option: $1" >&2
|
|
usage
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
case "${SHUTDOWN_MODE}" in
|
|
host-poweroff|cluster-only) ;;
|
|
*)
|
|
echo "Invalid --shutdown-mode '${SHUTDOWN_MODE}'. Expected host-poweroff or cluster-only." >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
if [[ -n "${FORCE_FLUX_URL}" && "${ALLOW_FLUX_SOURCE_MUTATION}" -ne 1 ]]; then
|
|
echo "--force-flux-url requires --allow-flux-source-mutation (breakglass)." >&2
|
|
exit 1
|
|
fi
|
|
|
|
require_cmd() {
|
|
local cmd="$1"
|
|
if ! command -v "${cmd}" >/dev/null 2>&1; then
|
|
echo "Missing required command: ${cmd}" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
require_cmd kubectl
|
|
require_cmd bash
|
|
require_cmd base64
|
|
require_cmd curl
|
|
|
|
log() { echo "[cluster-power] $*"; }
|
|
warn() { echo "[cluster-power][warn] $*" >&2; }
|
|
die() { echo "[cluster-power][error] $*" >&2; exit 1; }
|
|
|
|
run() {
|
|
if [[ "${EXECUTE}" -eq 1 ]]; then
|
|
log "EXEC: $*"
|
|
"$@"
|
|
else
|
|
log "DRY-RUN: $*"
|
|
fi
|
|
}
|
|
|
|
run_shell() {
|
|
if [[ "${EXECUTE}" -eq 1 ]]; then
|
|
log "EXEC: $*"
|
|
bash -lc "$*"
|
|
else
|
|
log "DRY-RUN: $*"
|
|
fi
|
|
}
|
|
|
|
apply_kustomization() {
|
|
local path="$1"
|
|
local full_path="${REPO_DIR}/${path}"
|
|
if [[ "${EXECUTE}" -eq 1 ]]; then
|
|
log "EXEC: kubectl kustomize ${full_path} --load-restrictor=LoadRestrictionsNone | kubectl apply -f -"
|
|
kubectl kustomize "${full_path}" --load-restrictor=LoadRestrictionsNone | kubectl apply -f -
|
|
else
|
|
log "DRY-RUN: kubectl kustomize ${full_path} --load-restrictor=LoadRestrictionsNone | kubectl apply -f -"
|
|
fi
|
|
}
|
|
|
|
sanitize_name() {
|
|
printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9-' '-'
|
|
}
|
|
|
|
state_dir() {
|
|
dirname "${RECOVERY_STATE_FILE}"
|
|
}
|
|
|
|
load_recovery_state() {
|
|
RECOVERY_PENDING=0
|
|
STARTUP_ATTEMPTED_DURING_OUTAGE=0
|
|
LAST_CHECKPOINT="none"
|
|
[[ -f "${RECOVERY_STATE_FILE}" ]] || return 0
|
|
while IFS='=' read -r key value; do
|
|
case "${key}" in
|
|
recovery_pending) RECOVERY_PENDING="${value}" ;;
|
|
startup_attempted) STARTUP_ATTEMPTED_DURING_OUTAGE="${value}" ;;
|
|
last_checkpoint) LAST_CHECKPOINT="${value}" ;;
|
|
esac
|
|
done < "${RECOVERY_STATE_FILE}"
|
|
}
|
|
|
|
save_recovery_state() {
|
|
[[ "${EXECUTE}" -eq 1 ]] || return 0
|
|
mkdir -p "$(state_dir)"
|
|
cat > "${RECOVERY_STATE_FILE}" <<STATE
|
|
recovery_pending=${1}
|
|
startup_attempted=${2}
|
|
last_checkpoint=${3}
|
|
STATE
|
|
}
|
|
|
|
mark_checkpoint() {
|
|
LAST_CHECKPOINT="$1"
|
|
save_recovery_state "${RECOVERY_PENDING}" "${STARTUP_ATTEMPTED_DURING_OUTAGE}" "${LAST_CHECKPOINT}"
|
|
}
|
|
|
|
clear_recovery_state() {
|
|
[[ "${EXECUTE}" -eq 1 ]] || return 0
|
|
rm -f "${RECOVERY_STATE_FILE}" 2>/dev/null || true
|
|
LAST_CHECKPOINT="none"
|
|
}
|
|
|
|
sanitize_battery_percent() {
|
|
local raw="$1"
|
|
raw="${raw##*:}"
|
|
raw="${raw//[[:space:]]/}"
|
|
raw="${raw%%.*}"
|
|
[[ "${raw}" =~ ^[0-9]+$ ]] || return 1
|
|
printf '%s' "${raw}"
|
|
}
|
|
|
|
candidate_ups_hosts() {
|
|
local candidate name
|
|
local -A seen=()
|
|
if [[ -n "${UPS_HOST}" ]]; then
|
|
seen["${UPS_HOST}"]=1
|
|
echo "${UPS_HOST}"
|
|
fi
|
|
while IFS= read -r name; do
|
|
[[ -n "${name}" ]] || continue
|
|
for candidate in "${name}@localhost" "${name}"; do
|
|
[[ -n "${seen[${candidate}]+x}" ]] && continue
|
|
seen["${candidate}"]=1
|
|
echo "${candidate}"
|
|
done
|
|
done < <(upsc -l 2>/dev/null || true)
|
|
}
|
|
|
|
read_ups_battery() {
|
|
if ! command -v upsc >/dev/null 2>&1; then
|
|
return 1
|
|
fi
|
|
local host raw parsed
|
|
while IFS= read -r host; do
|
|
raw="$(upsc "${host}" "${UPS_BATTERY_KEY}" 2>/dev/null || true)"
|
|
[[ -n "${raw}" ]] || continue
|
|
parsed="$(sanitize_battery_percent "${raw}" || true)"
|
|
[[ -n "${parsed}" ]] || continue
|
|
UPS_HOST_IN_USE="${host}"
|
|
printf '%s' "${parsed}"
|
|
return 0
|
|
done < <(candidate_ups_hosts)
|
|
return 1
|
|
}
|
|
|
|
ensure_minimum_battery_for_bootstrap() {
|
|
local battery
|
|
battery="$(read_ups_battery || true)"
|
|
if [[ -z "${battery}" ]]; then
|
|
if [[ "${REQUIRE_UPS_BATTERY}" -eq 1 ]]; then
|
|
warn "Unable to read UPS battery status and --require-ups-battery is set."
|
|
return 1
|
|
fi
|
|
warn "Unable to read UPS battery status; continuing without hard battery gating."
|
|
return 0
|
|
fi
|
|
log "ups-battery=${battery}% host=${UPS_HOST_IN_USE:-${UPS_HOST}}"
|
|
if (( battery < MIN_STARTUP_BATTERY )); then
|
|
warn "UPS battery ${battery}% below minimum startup threshold ${MIN_STARTUP_BATTERY}%."
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
report_flux_source_state() {
|
|
local flux_url flux_branch
|
|
flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)"
|
|
flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)"
|
|
[[ -n "${flux_url}" ]] && log "flux-source-url=${flux_url}"
|
|
if [[ -n "${flux_branch}" ]]; then
|
|
log "flux-source-branch=${flux_branch}"
|
|
fi
|
|
}
|
|
|
|
csv_has_value() {
|
|
local csv="$1"
|
|
local value="$2"
|
|
local needle=",${value},"
|
|
local haystack=",${csv},"
|
|
[[ "${haystack}" == *"${needle}"* ]]
|
|
}
|
|
|
|
assert_flux_source_expected() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: skipping strict Flux source drift guard"
|
|
return 0
|
|
fi
|
|
local flux_url flux_branch
|
|
flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)"
|
|
flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)"
|
|
[[ -n "${flux_url}" ]] || die "Unable to read Flux source URL from flux-system/gitrepository."
|
|
[[ -n "${flux_branch}" ]] || die "Unable to read Flux source branch from flux-system/gitrepository."
|
|
|
|
if [[ -n "${EXPECTED_FLUX_URL}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then
|
|
die "Flux source URL drift detected: got '${flux_url}', expected '${EXPECTED_FLUX_URL}'. Refusing startup."
|
|
fi
|
|
if [[ -z "${FORCE_FLUX_BRANCH}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then
|
|
die "Flux source branch drift detected: got '${flux_branch}', expected '${EXPECTED_FLUX_BRANCH}'. Use --force-flux-branch to correct."
|
|
fi
|
|
}
|
|
|
|
kustomization_is_optional() {
|
|
local name="$1"
|
|
[[ -n "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" ]] || return 1
|
|
csv_has_value "${STARTUP_OPTIONAL_KUSTOMIZATIONS}" "${name}"
|
|
}
|
|
|
|
list_not_ready_kustomizations() {
|
|
local rows line name ready message
|
|
rows="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io \
|
|
-o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status,MESSAGE:.status.conditions[?(@.type=="Ready")].message' \
|
|
--no-headers 2>/dev/null || true)"
|
|
[[ -n "${rows}" ]] || return 0
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
name="$(awk '{print $1}' <<< "${line}")"
|
|
ready="$(awk '{print $2}' <<< "${line}")"
|
|
message="${line#${name} }"
|
|
message="${message#${ready} }"
|
|
if kustomization_is_optional "${name}"; then
|
|
continue
|
|
fi
|
|
if [[ "${ready}" != "True" ]]; then
|
|
printf '%s|%s\n' "${name}" "${message}"
|
|
fi
|
|
done <<< "${rows}"
|
|
}
|
|
|
|
trigger_flux_reconcile_all() {
|
|
local now
|
|
now="$(date --iso-8601=seconds)"
|
|
run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="${now}" --overwrite
|
|
if command -v flux >/dev/null 2>&1; then
|
|
run flux reconcile source git flux-system -n flux-system --timeout=3m
|
|
fi
|
|
}
|
|
|
|
heal_failed_flux_jobs() {
|
|
local rows line ns name failed flux_owner helm_owner healed
|
|
healed=0
|
|
rows="$(kubectl get jobs.batch -A \
|
|
-o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,FAILED:.status.failed,FLUX_OWNER:.metadata.labels.kustomize\\.toolkit\\.fluxcd\\.io/name,HELM_OWNER:.metadata.labels.helm\\.toolkit\\.fluxcd\\.io/name \
|
|
--no-headers 2>/dev/null || true)"
|
|
[[ -n "${rows}" ]] || return 1
|
|
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
ns="$(awk '{print $1}' <<< "${line}")"
|
|
name="$(awk '{print $2}' <<< "${line}")"
|
|
failed="$(awk '{print $3}' <<< "${line}")"
|
|
flux_owner="$(awk '{print $4}' <<< "${line}")"
|
|
helm_owner="$(awk '{print $5}' <<< "${line}")"
|
|
[[ "${failed}" != "<none>" ]] || continue
|
|
[[ "${failed}" =~ ^[0-9]+$ ]] || continue
|
|
(( failed > 0 )) || continue
|
|
if [[ "${flux_owner}" == "<none>" && "${helm_owner}" == "<none>" ]]; then
|
|
continue
|
|
fi
|
|
warn "Deleting failed Flux-managed Job ${ns}/${name} to heal immutable-template drift."
|
|
run kubectl -n "${ns}" delete job "${name}" --ignore-not-found
|
|
healed=1
|
|
done <<< "${rows}"
|
|
|
|
(( healed == 1 ))
|
|
}
|
|
|
|
wait_for_flux_kustomizations_ready() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: skipping wait for all Flux kustomizations Ready"
|
|
return 0
|
|
fi
|
|
local start now not_ready immutable_hits
|
|
start="$(date +%s)"
|
|
immutable_hits=0
|
|
while true; do
|
|
not_ready="$(list_not_ready_kustomizations || true)"
|
|
if [[ -z "${not_ready}" ]]; then
|
|
log "flux-kustomizations=all-ready"
|
|
return 0
|
|
fi
|
|
|
|
log "flux-kustomizations-not-ready:"
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
log " ${line}"
|
|
done <<< "${not_ready}"
|
|
|
|
if grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${not_ready}"; then
|
|
if (( immutable_hits < 3 )); then
|
|
immutable_hits=$(( immutable_hits + 1 ))
|
|
warn "Detected immutable Job failure signal in Flux status. Attempting automated Job cleanup (${immutable_hits}/3)."
|
|
if heal_failed_flux_jobs; then
|
|
trigger_flux_reconcile_all
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
now="$(date +%s)"
|
|
if (( now - start >= FLUX_READY_TIMEOUT_SECONDS )); then
|
|
die "Timed out waiting for Flux kustomizations Ready after ${FLUX_READY_TIMEOUT_SECONDS}s."
|
|
fi
|
|
sleep "${FLUX_READY_POLL_SECONDS}"
|
|
done
|
|
}
|
|
|
|
default_startup_service_checklist() {
|
|
cat <<'CHECKS'
|
|
gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||
|
|
grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||
|
|
harbor|https://registry.bstein.dev/v2/|200,401|||
|
|
CHECKS
|
|
}
|
|
|
|
list_ingress_hosts() {
|
|
kubectl get ingress -A -o jsonpath='{range .items[*]}{range .spec.rules[*]}{.host}{"\n"}{end}{end}' 2>/dev/null \
|
|
| sed '/^[[:space:]]*$/d' \
|
|
| sort -u
|
|
}
|
|
|
|
generated_ingress_service_checks() {
|
|
local host
|
|
while IFS= read -r host; do
|
|
[[ -n "${host}" ]] || continue
|
|
if [[ -n "${STARTUP_IGNORE_INGRESS_HOSTS_REGEX}" ]] && [[ "${host}" =~ ${STARTUP_IGNORE_INGRESS_HOSTS_REGEX} ]]; then
|
|
continue
|
|
fi
|
|
printf 'ingress-%s|https://%s/|%s|||0|%s\n' "${host}" "${host}" "${STARTUP_INGRESS_ALLOWED_STATUSES}" "${STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS}"
|
|
done < <(list_ingress_hosts)
|
|
}
|
|
|
|
startup_service_checklist_rows() {
|
|
local base
|
|
if [[ -n "${STARTUP_SERVICE_CHECKLIST}" ]]; then
|
|
base="$(printf '%s' "${STARTUP_SERVICE_CHECKLIST}" | tr ';' '\n')"
|
|
else
|
|
base="$(default_startup_service_checklist)"
|
|
fi
|
|
|
|
printf '%s\n' "${base}" | sed '/^[[:space:]]*$/d'
|
|
if [[ "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "1" || "${STARTUP_INCLUDE_INGRESS_CHECKS}" == "true" ]]; then
|
|
generated_ingress_service_checks
|
|
fi
|
|
}
|
|
|
|
service_status_allowed() {
|
|
local expected_csv="$1"
|
|
local got="$2"
|
|
local token
|
|
IFS=',' read -r -a _statuses <<< "${expected_csv}"
|
|
for token in "${_statuses[@]}"; do
|
|
if [[ "${token}" == "${got}" ]]; then
|
|
return 0
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
check_startup_service_checklist_once() {
|
|
local rows row name url expected body_must body_must_not insecure timeout code rc
|
|
local body_file failures
|
|
failures=0
|
|
rows="$(startup_service_checklist_rows)"
|
|
while IFS= read -r row; do
|
|
[[ -n "${row}" ]] || continue
|
|
IFS='|' read -r name url expected body_must body_must_not insecure timeout <<< "${row}"
|
|
[[ -n "${name}" && -n "${url}" && -n "${expected}" ]] || continue
|
|
[[ -n "${insecure}" ]] || insecure=0
|
|
[[ -n "${timeout}" ]] || timeout="${STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS}"
|
|
body_file="$(mktemp)"
|
|
rc=0
|
|
if [[ "${insecure}" == "1" || "${insecure}" == "true" ]]; then
|
|
code="$(curl -ksS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)"
|
|
else
|
|
code="$(curl -sS --max-time "${timeout}" -o "${body_file}" -w '%{http_code}' "${url}" || rc=$?)"
|
|
fi
|
|
if (( rc != 0 )); then
|
|
warn "startup-check ${name}: request failed (rc=${rc}) url=${url}"
|
|
failures=1
|
|
rm -f "${body_file}"
|
|
continue
|
|
fi
|
|
if ! service_status_allowed "${expected}" "${code}"; then
|
|
warn "startup-check ${name}: expected status ${expected}, got ${code} url=${url}"
|
|
failures=1
|
|
rm -f "${body_file}"
|
|
continue
|
|
fi
|
|
if [[ -n "${body_must}" ]] && ! grep -Fq -- "${body_must}" "${body_file}"; then
|
|
warn "startup-check ${name}: missing required body fragment '${body_must}'"
|
|
failures=1
|
|
rm -f "${body_file}"
|
|
continue
|
|
fi
|
|
if [[ -n "${body_must_not}" ]] && grep -Fq -- "${body_must_not}" "${body_file}"; then
|
|
warn "startup-check ${name}: forbidden body fragment '${body_must_not}' present"
|
|
failures=1
|
|
rm -f "${body_file}"
|
|
continue
|
|
fi
|
|
rm -f "${body_file}"
|
|
done <<< "${rows}"
|
|
(( failures == 0 ))
|
|
}
|
|
|
|
wait_for_startup_service_checklist() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: skipping startup external service checklist wait"
|
|
return 0
|
|
fi
|
|
local start now checklist_ok workloads_ok
|
|
start="$(date +%s)"
|
|
while true; do
|
|
checklist_ok=0
|
|
workloads_ok=0
|
|
if check_startup_service_checklist_once; then
|
|
checklist_ok=1
|
|
fi
|
|
if list_unhealthy_workloads | sed '/^[[:space:]]*$/d' | grep -q .; then
|
|
workloads_ok=0
|
|
else
|
|
workloads_ok=1
|
|
fi
|
|
if (( checklist_ok == 1 && workloads_ok == 1 )); then
|
|
log "startup-checklist=all-passed"
|
|
return 0
|
|
fi
|
|
if (( workloads_ok == 0 )); then
|
|
warn "startup-checklist: workloads are not fully ready yet."
|
|
fi
|
|
now="$(date +%s)"
|
|
if (( now - start >= STARTUP_CHECKLIST_TIMEOUT_SECONDS )); then
|
|
die "Timed out waiting for startup external checklist after ${STARTUP_CHECKLIST_TIMEOUT_SECONDS}s."
|
|
fi
|
|
sleep "${STARTUP_CHECKLIST_POLL_SECONDS}"
|
|
done
|
|
}
|
|
|
|
collect_unstable_pods() {
|
|
local rows
|
|
rows="$(kubectl get pods -A --no-headers 2>/dev/null \
|
|
| awk '$4 ~ /(CrashLoopBackOff|ImagePullBackOff|ErrImagePull|CreateContainerConfigError|RunContainerError|InvalidImageName)/ {print $1 "/" $2 "|" $4}' || true)"
|
|
if [[ -n "${STARTUP_IGNORE_PODS_REGEX}" ]]; then
|
|
rows="$(printf '%s\n' "${rows}" | grep -Ev "${STARTUP_IGNORE_PODS_REGEX}" || true)"
|
|
fi
|
|
printf '%s' "${rows}"
|
|
}
|
|
|
|
wait_for_startup_stability_window() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: skipping startup stability window"
|
|
return 0
|
|
fi
|
|
local hard_deadline stable_since now unstable pods not_ready unhealthy_workloads
|
|
stable_since="$(date +%s)"
|
|
hard_deadline=$(( stable_since + STARTUP_STABILITY_TIMEOUT_SECONDS ))
|
|
while true; do
|
|
unstable=0
|
|
not_ready="$(list_not_ready_kustomizations || true)"
|
|
if [[ -n "${not_ready}" ]]; then
|
|
unstable=1
|
|
warn "stability-window: Flux kustomizations not ready."
|
|
fi
|
|
pods="$(collect_unstable_pods || true)"
|
|
if [[ -n "${pods}" ]]; then
|
|
unstable=1
|
|
warn "stability-window: unstable pods detected."
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
warn " ${line}"
|
|
done <<< "${pods}"
|
|
fi
|
|
if ! check_startup_service_checklist_once; then
|
|
unstable=1
|
|
warn "stability-window: external service checklist failed."
|
|
fi
|
|
unhealthy_workloads="$(list_unhealthy_workloads || true)"
|
|
if [[ -n "${unhealthy_workloads}" ]]; then
|
|
unstable=1
|
|
warn "stability-window: workloads not fully ready."
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
warn " ${line}"
|
|
done <<< "${unhealthy_workloads}"
|
|
fi
|
|
|
|
now="$(date +%s)"
|
|
if (( unstable == 0 )); then
|
|
if (( now - stable_since >= STARTUP_STABILITY_WINDOW_SECONDS )); then
|
|
log "startup-stability-window=passed (${STARTUP_STABILITY_WINDOW_SECONDS}s)"
|
|
return 0
|
|
fi
|
|
else
|
|
stable_since="${now}"
|
|
fi
|
|
|
|
if (( now >= hard_deadline )); then
|
|
die "Timed out waiting for startup stability window (${STARTUP_STABILITY_WINDOW_SECONDS}s healthy) within ${STARTUP_STABILITY_TIMEOUT_SECONDS}s."
|
|
fi
|
|
sleep "${STARTUP_STABILITY_POLL_SECONDS}"
|
|
done
|
|
}
|
|
|
|
wait_for_api() {
|
|
local attempts=$(( API_WAIT_TIMEOUT_SECONDS / 5 ))
|
|
if (( attempts < 1 )); then
|
|
attempts=1
|
|
fi
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: skipping live Kubernetes API wait"
|
|
return 0
|
|
fi
|
|
local i
|
|
for i in $(seq 1 "${attempts}"); do
|
|
if kubectl version --request-timeout=5s >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep 5
|
|
done
|
|
return 1
|
|
}
|
|
|
|
patch_flux_suspend_all() {
|
|
local value="$1"
|
|
local patch
|
|
patch=$(printf '{"spec":{"suspend":%s}}' "${value}")
|
|
|
|
local ks_list hr_list
|
|
ks_list="$(kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' || true)"
|
|
hr_list="$(kubectl get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"/"}{.metadata.name}{"\n"}{end}' || true)"
|
|
|
|
while IFS= read -r k; do
|
|
[[ -z "${k}" ]] && continue
|
|
run kubectl -n flux-system patch kustomization "${k}" --type=merge -p "${patch}"
|
|
done <<< "${ks_list}"
|
|
|
|
while IFS= read -r hr; do
|
|
[[ -z "${hr}" ]] && continue
|
|
local ns="${hr%%/*}"
|
|
local name="${hr##*/}"
|
|
run kubectl -n "${ns}" patch helmrelease "${name}" --type=merge -p "${patch}"
|
|
done <<< "${hr_list}"
|
|
}
|
|
|
|
shutdown_namespace_excluded() {
|
|
local ns="$1"
|
|
[[ "${ns}" =~ ${SHUTDOWN_NAMESPACE_EXCLUDES_REGEX} ]]
|
|
}
|
|
|
|
startup_workload_namespace_excluded() {
|
|
local ns="$1"
|
|
[[ "${ns}" =~ ${STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX} ]]
|
|
}
|
|
|
|
best_effort_scale_down_apps() {
|
|
local ns_list ns
|
|
ns_list="$(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
|
|
while IFS= read -r ns; do
|
|
[[ -z "${ns}" ]] && continue
|
|
if shutdown_namespace_excluded "${ns}"; then
|
|
continue
|
|
fi
|
|
run_shell "kubectl -n ${ns} scale deployment --all --replicas=0 || true"
|
|
run_shell "kubectl -n ${ns} scale statefulset --all --replicas=0 || true"
|
|
done <<< "${ns_list}"
|
|
}
|
|
|
|
save_workload_replica_snapshot() {
|
|
local rows line ns kind name replicas
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: save workload replica snapshot to ${REPLICA_SNAPSHOT_FILE}"
|
|
return 0
|
|
fi
|
|
rows="$(
|
|
{
|
|
kubectl get deployment -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tdeployment\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true
|
|
kubectl get statefulset -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\tstatefulset\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true
|
|
} | sed '/^[[:space:]]*$/d'
|
|
)"
|
|
mkdir -p "$(dirname "${REPLICA_SNAPSHOT_FILE}")"
|
|
: > "${REPLICA_SNAPSHOT_FILE}"
|
|
while IFS=$'\t' read -r ns kind name replicas; do
|
|
[[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${replicas}" ]] || continue
|
|
shutdown_namespace_excluded "${ns}" && continue
|
|
[[ "${replicas}" =~ ^[0-9]+$ ]] || continue
|
|
(( replicas > 0 )) || continue
|
|
printf '%s\t%s\t%s\t%s\n' "${ns}" "${kind}" "${name}" "${replicas}" >> "${REPLICA_SNAPSHOT_FILE}"
|
|
done <<< "${rows}"
|
|
log "replica-snapshot-file=${REPLICA_SNAPSHOT_FILE}"
|
|
log "replica-snapshot-count=$(wc -l < "${REPLICA_SNAPSHOT_FILE}" | tr -d ' ')"
|
|
}
|
|
|
|
restore_workload_replica_snapshot() {
|
|
local ns kind name desired current
|
|
if [[ "${RECOVERY_PENDING}" -ne 1 ]]; then
|
|
log "Skipping replica restore because recovery_pending=0."
|
|
return 0
|
|
fi
|
|
if [[ ! -f "${REPLICA_SNAPSHOT_FILE}" ]]; then
|
|
warn "Replica snapshot file not found at ${REPLICA_SNAPSHOT_FILE}; skipping replica restore."
|
|
return 0
|
|
fi
|
|
while IFS=$'\t' read -r ns kind name desired; do
|
|
[[ -n "${ns}" && -n "${kind}" && -n "${name}" && -n "${desired}" ]] || continue
|
|
[[ "${desired}" =~ ^[0-9]+$ ]] || continue
|
|
(( desired > 0 )) || continue
|
|
current="$(kubectl -n "${ns}" get "${kind}" "${name}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
|
|
[[ -n "${current}" ]] || continue
|
|
[[ "${current}" =~ ^[0-9]+$ ]] || current=0
|
|
if (( current == desired )); then
|
|
continue
|
|
fi
|
|
run kubectl -n "${ns}" scale "${kind}" "${name}" --replicas="${desired}"
|
|
done < "${REPLICA_SNAPSHOT_FILE}"
|
|
mark_checkpoint startup_replicas_restored
|
|
}
|
|
|
|
list_unhealthy_workloads() {
|
|
local rows line ns name desired ready available
|
|
rows="$(kubectl get deployment -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas,AVAILABLE:.status.availableReplicas --no-headers 2>/dev/null || true)"
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
ns="$(awk '{print $1}' <<< "${line}")"
|
|
name="$(awk '{print $2}' <<< "${line}")"
|
|
desired="$(awk '{print $3}' <<< "${line}")"
|
|
ready="$(awk '{print $4}' <<< "${line}")"
|
|
available="$(awk '{print $5}' <<< "${line}")"
|
|
startup_workload_namespace_excluded "${ns}" && continue
|
|
[[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue
|
|
[[ "${desired}" =~ ^[0-9]+$ ]] || desired=0
|
|
[[ "${ready}" =~ ^[0-9]+$ ]] || ready=0
|
|
[[ "${available}" =~ ^[0-9]+$ ]] || available=0
|
|
(( desired > 0 )) || continue
|
|
if (( ready < desired || available < desired )); then
|
|
printf '%s/deployment/%s|ready=%s available=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${available}" "${desired}"
|
|
fi
|
|
done <<< "${rows}"
|
|
|
|
rows="$(kubectl get statefulset -A -o custom-columns=NS:.metadata.namespace,NAME:.metadata.name,DESIRED:.spec.replicas,READY:.status.readyReplicas --no-headers 2>/dev/null || true)"
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
ns="$(awk '{print $1}' <<< "${line}")"
|
|
name="$(awk '{print $2}' <<< "${line}")"
|
|
desired="$(awk '{print $3}' <<< "${line}")"
|
|
ready="$(awk '{print $4}' <<< "${line}")"
|
|
startup_workload_namespace_excluded "${ns}" && continue
|
|
[[ -n "${STARTUP_IGNORE_WORKLOADS_REGEX}" && "${ns}/${name}" =~ ${STARTUP_IGNORE_WORKLOADS_REGEX} ]] && continue
|
|
[[ "${desired}" =~ ^[0-9]+$ ]] || desired=0
|
|
[[ "${ready}" =~ ^[0-9]+$ ]] || ready=0
|
|
(( desired > 0 )) || continue
|
|
if (( ready < desired )); then
|
|
printf '%s/statefulset/%s|ready=%s desired=%s\n' "${ns}" "${name}" "${ready}" "${desired}"
|
|
fi
|
|
done <<< "${rows}"
|
|
}
|
|
|
|
wait_for_startup_workloads_ready() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: skipping startup workload readiness checks"
|
|
return 0
|
|
fi
|
|
local start now unhealthy
|
|
start="$(date +%s)"
|
|
while true; do
|
|
unhealthy="$(list_unhealthy_workloads || true)"
|
|
if [[ -z "${unhealthy}" ]]; then
|
|
log "startup-workloads=all-ready"
|
|
return 0
|
|
fi
|
|
warn "startup-workloads-not-ready:"
|
|
while IFS= read -r line; do
|
|
[[ -n "${line}" ]] || continue
|
|
warn " ${line}"
|
|
done <<< "${unhealthy}"
|
|
now="$(date +%s)"
|
|
if (( now - start >= STARTUP_WORKLOAD_TIMEOUT_SECONDS )); then
|
|
die "Timed out waiting for startup workloads Ready after ${STARTUP_WORKLOAD_TIMEOUT_SECONDS}s."
|
|
fi
|
|
sleep "${STARTUP_WORKLOAD_POLL_SECONDS}"
|
|
done
|
|
}
|
|
|
|
discover_workers_csv() {
|
|
kubectl get nodes \
|
|
-o 'custom-columns=NAME:.metadata.name,CP:.metadata.labels.node-role\.kubernetes\.io/control-plane,MASTER:.metadata.labels.node-role\.kubernetes\.io/master,READY:.status.conditions[?(@.type=="Ready")].status' \
|
|
--no-headers \
|
|
| awk '$2=="<none>" && $3=="<none>" && $4=="True" {print $1}' \
|
|
| paste -sd, -
|
|
}
|
|
|
|
node_is_ready() {
|
|
local node="$1"
|
|
[[ -n "${node}" ]] || return 1
|
|
local ready
|
|
ready="$(kubectl get node "${node}" -o jsonpath='{range .status.conditions[?(@.type=="Ready")]}{.status}{end}' 2>/dev/null || true)"
|
|
[[ "${ready}" == "True" ]]
|
|
}
|
|
|
|
select_ready_arm64_worker() {
|
|
local rows node
|
|
rows="$(kubectl get nodes -o 'custom-columns=NAME:.metadata.name,ARCH:.metadata.labels.kubernetes\.io/arch,WORKER:.metadata.labels.node-role\.kubernetes\.io/worker,HARDWARE:.metadata.labels.hardware,READY:.status.conditions[?(@.type=="Ready")].status' --no-headers 2>/dev/null || true)"
|
|
[[ -n "${rows}" ]] || return 1
|
|
node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $4=="rpi5" && $5=="True" {print $1; exit}')"
|
|
if [[ -n "${node}" ]]; then
|
|
printf '%s' "${node}"
|
|
return 0
|
|
fi
|
|
node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $4=="rpi4" && $5=="True" {print $1; exit}')"
|
|
if [[ -n "${node}" ]]; then
|
|
printf '%s' "${node}"
|
|
return 0
|
|
fi
|
|
node="$(printf '%s\n' "${rows}" | awk '$2=="arm64" && $3=="true" && $5=="True" {print $1; exit}')"
|
|
if [[ -n "${node}" ]]; then
|
|
printf '%s' "${node}"
|
|
return 0
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
ensure_harbor_target_node() {
|
|
if node_is_ready "${HARBOR_TARGET_NODE}"; then
|
|
return 0
|
|
fi
|
|
local fallback
|
|
fallback="$(select_ready_arm64_worker || true)"
|
|
[[ -n "${fallback}" ]] || die "No Ready arm64 worker available for Harbor bootstrap target."
|
|
if [[ -n "${HARBOR_TARGET_NODE}" ]]; then
|
|
warn "Configured harbor target node '${HARBOR_TARGET_NODE}' is not Ready; using '${fallback}' instead."
|
|
else
|
|
log "harbor-target-node auto-selected: ${fallback}"
|
|
fi
|
|
HARBOR_TARGET_NODE="${fallback}"
|
|
}
|
|
|
|
ensure_harbor_host_label() {
|
|
[[ -n "${HARBOR_TARGET_NODE}" ]] || die "Harbor target node is not set."
|
|
local labeled node
|
|
labeled="$(kubectl get nodes -l "${HARBOR_HOST_LABEL_KEY}=true" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true)"
|
|
while IFS= read -r node; do
|
|
[[ -z "${node}" ]] && continue
|
|
[[ "${node}" == "${HARBOR_TARGET_NODE}" ]] && continue
|
|
run kubectl label node "${node}" "${HARBOR_HOST_LABEL_KEY}-"
|
|
done <<< "${labeled}"
|
|
run kubectl label node "${HARBOR_TARGET_NODE}" "${HARBOR_HOST_LABEL_KEY}=true" --overwrite
|
|
}
|
|
|
|
as_array_from_csv() {
|
|
local csv="$1"
|
|
local out_var="$2"
|
|
local old_ifs="${IFS}"
|
|
IFS=',' read -r -a _tmp <<< "${csv}"
|
|
IFS="${old_ifs}"
|
|
eval "${out_var}"'=( "${_tmp[@]}" )'
|
|
}
|
|
|
|
best_effort_drain_workers() {
|
|
local timeout_seconds="$1"
|
|
shift || true
|
|
local workers=("$@")
|
|
local node
|
|
for node in "${workers[@]}"; do
|
|
[[ -z "${node}" ]] && continue
|
|
run kubectl cordon "${node}"
|
|
if run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s"; then
|
|
continue
|
|
fi
|
|
warn "Gentle drain timed out for ${node}; retrying with --force."
|
|
if run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s --force"; then
|
|
continue
|
|
fi
|
|
warn "Force drain timed out for ${node}; final attempt with --disable-eviction."
|
|
run_shell "kubectl drain ${node} --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=${timeout_seconds}s --force --disable-eviction || true"
|
|
done
|
|
}
|
|
|
|
wait_for_rollout() {
|
|
local namespace="$1"
|
|
local kind="$2"
|
|
local name="$3"
|
|
local timeout="$4"
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: kubectl -n ${namespace} rollout status ${kind}/${name} --timeout=${timeout}"
|
|
return 0
|
|
fi
|
|
kubectl -n "${namespace}" rollout status "${kind}/${name}" --timeout="${timeout}"
|
|
}
|
|
|
|
check_ingress_stack() {
|
|
kubectl get ingressclass traefik >/dev/null
|
|
wait_for_rollout traefik deployment traefik 5m
|
|
}
|
|
|
|
check_longhorn_stack() {
|
|
wait_for_rollout longhorn-system daemonset longhorn-manager 10m
|
|
wait_for_rollout longhorn-system deployment longhorn-ui 10m
|
|
}
|
|
|
|
check_vault_stack() {
|
|
wait_for_rollout vault statefulset vault 10m
|
|
if [[ "${EXECUTE}" -eq 1 ]]; then
|
|
kubectl -n vault exec vault-0 -- sh -ceu 'VAULT_ADDR=http://127.0.0.1:8200 vault status >/dev/null'
|
|
fi
|
|
}
|
|
|
|
check_postgres_stack() {
|
|
wait_for_rollout postgres statefulset postgres 10m
|
|
if [[ "${EXECUTE}" -eq 1 ]]; then
|
|
kubectl -n postgres exec postgres-0 -c postgres -- sh -ceu 'pg_isready -h 127.0.0.1 -p 5432 >/dev/null'
|
|
fi
|
|
}
|
|
|
|
check_gitea_stack() {
|
|
wait_for_rollout gitea deployment gitea 10m
|
|
}
|
|
|
|
check_harbor_stack() {
|
|
wait_for_rollout harbor statefulset harbor-redis 10m
|
|
wait_for_rollout harbor deployment harbor-core 10m
|
|
wait_for_rollout harbor deployment harbor-jobservice 10m
|
|
wait_for_rollout harbor deployment harbor-portal 10m
|
|
wait_for_rollout harbor deployment harbor-registry 10m
|
|
}
|
|
|
|
check_harbor_endpoint() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/"
|
|
return 0
|
|
fi
|
|
local code
|
|
code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
|
|
case "${code}" in
|
|
200|401)
|
|
log "harbor-endpoint=http-${code}"
|
|
;;
|
|
*)
|
|
die "Harbor endpoint check failed with HTTP ${code:-unknown}"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
wait_for_pod_phase() {
|
|
local namespace="$1"
|
|
local pod="$2"
|
|
local expected_phase="$3"
|
|
local timeout_seconds="$4"
|
|
local start now phase
|
|
start="$(date +%s)"
|
|
while true; do
|
|
phase="$(kubectl -n "${namespace}" get pod "${pod}" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
|
|
if [[ "${phase}" == "${expected_phase}" ]]; then
|
|
return 0
|
|
fi
|
|
if [[ "${phase}" == "Failed" ]]; then
|
|
return 1
|
|
fi
|
|
now="$(date +%s)"
|
|
if (( now - start >= timeout_seconds )); then
|
|
return 1
|
|
fi
|
|
sleep 2
|
|
done
|
|
}
|
|
|
|
harbor_is_ready() {
|
|
kubectl -n harbor get deploy harbor-core harbor-jobservice harbor-portal harbor-registry >/dev/null 2>&1 || return 1
|
|
local code
|
|
code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
|
|
[[ "${code}" == "200" || "${code}" == "401" ]]
|
|
}
|
|
|
|
run_harbor_pull_canary() {
|
|
local pod="ananke-harbor-canary"
|
|
local canary_node="${HARBOR_CANARY_NODE}"
|
|
if ! node_is_ready "${canary_node}"; then
|
|
ensure_harbor_target_node
|
|
canary_node="${HARBOR_TARGET_NODE}"
|
|
if [[ -n "${HARBOR_CANARY_NODE}" ]]; then
|
|
warn "Configured harbor canary node '${HARBOR_CANARY_NODE}' is not Ready; using '${canary_node}'."
|
|
fi
|
|
HARBOR_CANARY_NODE="${canary_node}"
|
|
fi
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: create Harbor pull canary pod with ${HARBOR_CANARY_IMAGE} on ${canary_node}"
|
|
return 0
|
|
fi
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
|
|
cat <<CANARY | kubectl apply -f -
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: ${pod}
|
|
namespace: ${NODE_HELPER_NAMESPACE}
|
|
spec:
|
|
nodeName: ${canary_node}
|
|
restartPolicy: Never
|
|
imagePullSecrets:
|
|
- name: ${REGISTRY_PULL_SECRET}
|
|
tolerations:
|
|
- operator: Exists
|
|
containers:
|
|
- name: canary
|
|
image: ${HARBOR_CANARY_IMAGE}
|
|
imagePullPolicy: Always
|
|
command: ["sh", "-ceu", "echo harbor-canary-ok"]
|
|
CANARY
|
|
if ! wait_for_pod_phase "${NODE_HELPER_NAMESPACE}" "${pod}" Succeeded 180; then
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" describe pod "${pod}" >&2 || true
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" >&2 || true
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
|
|
return 1
|
|
fi
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" || true
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
|
|
}
|
|
|
|
run_helper_pod() {
|
|
local node="$1"
|
|
local purpose="$2"
|
|
local timeout_seconds="$3"
|
|
local script_content="$4"
|
|
local pod="ananke-$(sanitize_name "${purpose}")-$(date +%H%M%S)"
|
|
local encoded_script
|
|
encoded_script="$(printf '%s' "${script_content}" | base64 -w0)"
|
|
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: helper pod ${pod} on ${node} for ${purpose}"
|
|
return 0
|
|
fi
|
|
|
|
cat <<POD | kubectl apply -f -
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: ${pod}
|
|
namespace: ${NODE_HELPER_NAMESPACE}
|
|
spec:
|
|
nodeName: ${node}
|
|
restartPolicy: Never
|
|
serviceAccountName: ${NODE_HELPER_SERVICE_ACCOUNT}
|
|
imagePullSecrets:
|
|
- name: ${REGISTRY_PULL_SECRET}
|
|
hostNetwork: true
|
|
hostPID: true
|
|
tolerations:
|
|
- operator: Exists
|
|
containers:
|
|
- name: helper
|
|
image: ${NODE_HELPER_IMAGE}
|
|
imagePullPolicy: IfNotPresent
|
|
securityContext:
|
|
privileged: true
|
|
command: ["/bin/bash", "-ceu"]
|
|
args:
|
|
- |
|
|
printf '%s' '${encoded_script}' | base64 -d >/tmp/ananke-step.sh
|
|
chmod +x /tmp/ananke-step.sh
|
|
/tmp/ananke-step.sh
|
|
POD
|
|
|
|
if ! wait_for_pod_phase "${NODE_HELPER_NAMESPACE}" "${pod}" Succeeded "${timeout_seconds}"; then
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" describe pod "${pod}" >&2 || true
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" >&2 || true
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
|
|
return 1
|
|
fi
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" logs "${pod}" || true
|
|
timeout 20 kubectl -n "${NODE_HELPER_NAMESPACE}" delete pod "${pod}" --ignore-not-found --wait=false >/dev/null 2>&1 || true
|
|
}
|
|
|
|
run_host_command_via_helper() {
|
|
local node="$1"
|
|
local purpose="$2"
|
|
local timeout_seconds="$3"
|
|
local host_command="$4"
|
|
local encoded_command
|
|
encoded_command="$(printf '%s' "${host_command}" | base64 -w0)"
|
|
local script_content
|
|
script_content=$(cat <<SCRIPT
|
|
set -euo pipefail
|
|
HOST_COMMAND="\$(printf '%s' '${encoded_command}' | base64 -d)"
|
|
nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu "\${HOST_COMMAND}"
|
|
SCRIPT
|
|
)
|
|
run_helper_pod "${node}" "${purpose}" "${timeout_seconds}" "${script_content}"
|
|
}
|
|
|
|
run_host_command_via_prewarm_pod() {
|
|
local node="$1"
|
|
local host_command="$2"
|
|
local pod encoded_command
|
|
pod="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app="${NODE_HELPER_PREWARM_DS}" --field-selector "spec.nodeName=${node}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
|
|
if [[ -z "${pod}" ]]; then
|
|
return 1
|
|
fi
|
|
encoded_command="$(printf '%s' "${host_command}" | base64 -w0)"
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: helper exec via ${pod} on ${node}"
|
|
return 0
|
|
fi
|
|
run kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/bash -ceu "HOST_COMMAND=\$(printf '%s' '${encoded_command}' | base64 -d); nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu \"\${HOST_COMMAND}\""
|
|
}
|
|
|
|
schedule_host_shutdown_via_helper() {
|
|
local node="$1"
|
|
local service_name="$2"
|
|
local delay_seconds="$3"
|
|
local host_command
|
|
host_command="/usr/bin/systemd-run --unit ananke-shutdown-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true; /usr/bin/systemctl poweroff || true'"
|
|
if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then
|
|
return 0
|
|
fi
|
|
run_host_command_via_helper "${node}" "shutdown-${node}-${service_name}" 120 "${host_command}"
|
|
}
|
|
|
|
schedule_host_service_stop_via_helper() {
|
|
local node="$1"
|
|
local service_name="$2"
|
|
local delay_seconds="$3"
|
|
local host_command
|
|
host_command="/usr/bin/systemd-run --unit ananke-stop-${service_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl stop ${service_name} || true'"
|
|
if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then
|
|
return 0
|
|
fi
|
|
run_host_command_via_helper "${node}" "stop-${node}-${service_name}" 120 "${host_command}"
|
|
}
|
|
|
|
prewarm_node_helper_image() {
|
|
local name="${NODE_HELPER_PREWARM_DS}"
|
|
local ready_nodes node
|
|
local node_affinity_block=""
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: prewarm ${NODE_HELPER_IMAGE} via temporary DaemonSet"
|
|
return 0
|
|
fi
|
|
ready_nodes="$(kubectl get nodes -o 'custom-columns=NAME:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status' --no-headers 2>/dev/null | awk '$2=="True" {print $1}' || true)"
|
|
if [[ -n "${ready_nodes}" ]]; then
|
|
node_affinity_block=$' affinity:\n nodeAffinity:\n requiredDuringSchedulingIgnoredDuringExecution:\n nodeSelectorTerms:\n - matchExpressions:\n - key: kubernetes.io/hostname\n operator: In\n values:'
|
|
while IFS= read -r node; do
|
|
[[ -z "${node}" ]] && continue
|
|
node_affinity_block+=$'\n'" - ${node}"
|
|
done <<< "${ready_nodes}"
|
|
log "node-helper-prewarm-targets=$(printf '%s' "${ready_nodes}" | paste -sd, -)"
|
|
else
|
|
warn "Unable to detect Ready nodes for prewarm targeting; continuing without node affinity."
|
|
fi
|
|
cat <<DS | kubectl apply -f -
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: ${name}
|
|
namespace: ${NODE_HELPER_NAMESPACE}
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: ${name}
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ${name}
|
|
spec:
|
|
imagePullSecrets:
|
|
- name: ${REGISTRY_PULL_SECRET}
|
|
${node_affinity_block}
|
|
tolerations:
|
|
- operator: Exists
|
|
containers:
|
|
- name: helper
|
|
image: ${NODE_HELPER_IMAGE}
|
|
imagePullPolicy: Always
|
|
command: ["/bin/sh", "-ceu", "sleep 300"]
|
|
DS
|
|
local i desired ready
|
|
for i in $(seq 1 90); do
|
|
desired="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get ds "${name}" -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo 0)"
|
|
ready="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get ds "${name}" -o jsonpath='{.status.numberReady}' 2>/dev/null || echo 0)"
|
|
[[ -n "${desired}" ]] || desired=0
|
|
[[ -n "${ready}" ]] || ready=0
|
|
if [[ "${desired}" != "0" && "${desired}" == "${ready}" ]]; then
|
|
log "node-helper-prewarm=${ready}/${desired}"
|
|
if [[ "${KEEP_PREWARM_DAEMONSET}" -eq 0 ]]; then
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${name}" --ignore-not-found >/dev/null 2>&1 || true
|
|
else
|
|
log "Keeping ${name} DaemonSet running for shutdown helper exec path."
|
|
fi
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" describe ds "${name}" >&2 || true
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app="${name}" >&2 || true
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${name}" --ignore-not-found >/dev/null 2>&1 || true
|
|
die "Timed out prewarming node helper image ${NODE_HELPER_IMAGE}"
|
|
}
|
|
|
|
cleanup_prewarm_daemonset() {
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: cleanup ${NODE_HELPER_PREWARM_DS} DaemonSet"
|
|
return 0
|
|
fi
|
|
kubectl -n "${NODE_HELPER_NAMESPACE}" delete ds "${NODE_HELPER_PREWARM_DS}" --ignore-not-found >/dev/null 2>&1 || true
|
|
}
|
|
|
|
start_bundle_server() {
|
|
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}"
|
|
require_cmd python3
|
|
local bundle_dir bundle_name
|
|
bundle_dir="$(dirname "${HARBOR_BUNDLE_FILE}")"
|
|
bundle_name="$(basename "${HARBOR_BUNDLE_FILE}")"
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
log "DRY-RUN: serve ${bundle_name} from ${bundle_dir} on port ${BUNDLE_HTTP_PORT}"
|
|
return 0
|
|
fi
|
|
python3 -m http.server "${BUNDLE_HTTP_PORT}" --bind 0.0.0.0 --directory "${bundle_dir}" </dev/null >/tmp/ananke-bundle-server.log 2>&1 &
|
|
BUNDLE_SERVER_PID=$!
|
|
for _ in $(seq 1 20); do
|
|
if curl -fsS "http://127.0.0.1:${BUNDLE_HTTP_PORT}/${bundle_name}" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
die "Temporary bundle server did not become ready; see /tmp/ananke-bundle-server.log"
|
|
}
|
|
|
|
stop_bundle_server() {
|
|
if [[ -n "${BUNDLE_SERVER_PID}" ]]; then
|
|
kill "${BUNDLE_SERVER_PID}" >/dev/null 2>&1 || true
|
|
for _ in $(seq 1 10); do
|
|
kill -0 "${BUNDLE_SERVER_PID}" >/dev/null 2>&1 || break
|
|
sleep 1
|
|
done
|
|
BUNDLE_SERVER_PID=""
|
|
fi
|
|
}
|
|
trap stop_bundle_server EXIT
|
|
|
|
control_host_ip() {
|
|
hostname -I | awk '{print $1}'
|
|
}
|
|
|
|
seed_harbor_images() {
|
|
local images_text control_ip bundle_name script_content seed_rc=0
|
|
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle not found at ${HARBOR_BUNDLE_FILE}"
|
|
ensure_harbor_target_node
|
|
ensure_harbor_host_label
|
|
images_text="$(sed '/^[[:space:]]*#/d;/^[[:space:]]*$/d' "${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt")"
|
|
[[ -n "${images_text}" ]] || die "No Harbor images listed in ${BOOTSTRAP_DIR}/harbor-bootstrap-images.txt"
|
|
bundle_name="$(basename "${HARBOR_BUNDLE_FILE}")"
|
|
start_bundle_server
|
|
control_ip="$(control_host_ip)"
|
|
script_content=$(cat <<SCRIPT
|
|
set -euo pipefail
|
|
curl -fsSL "http://${control_ip}:${BUNDLE_HTTP_PORT}/${bundle_name}" \
|
|
| zstd -dc \
|
|
| nsenter --target 1 --mount --uts --ipc --net --pid /usr/local/bin/k3s ctr images import -
|
|
while IFS= read -r image; do
|
|
[[ -z "\${image}" ]] && continue
|
|
nsenter --target 1 --mount --uts --ipc --net --pid /usr/local/bin/k3s ctr images ls | awk '{print \$1}' | grep -Fx "\${image}" >/dev/null
|
|
done <<'IMAGES'
|
|
${images_text}
|
|
IMAGES
|
|
SCRIPT
|
|
)
|
|
run_helper_pod "${HARBOR_TARGET_NODE}" "harbor-seed" 900 "${script_content}" || seed_rc=$?
|
|
stop_bundle_server
|
|
[[ "${seed_rc}" -eq 0 ]] || return "${seed_rc}"
|
|
mark_checkpoint startup_harbor_seeded
|
|
}
|
|
|
|
bootstrap_local_minimal() {
|
|
apply_kustomization infrastructure/core
|
|
apply_kustomization infrastructure/sources/helm
|
|
apply_kustomization infrastructure/longhorn/core
|
|
apply_kustomization infrastructure/metallb
|
|
apply_kustomization infrastructure/traefik
|
|
apply_kustomization infrastructure/vault-csi
|
|
apply_kustomization infrastructure/vault-injector
|
|
apply_kustomization services/vault
|
|
apply_kustomization infrastructure/postgres
|
|
apply_kustomization services/gitea
|
|
}
|
|
|
|
bootstrap_local_harbor() {
|
|
apply_kustomization services/harbor
|
|
}
|
|
|
|
reconcile_kustomization_with_self_heal() {
|
|
local item="$1"
|
|
if [[ "${EXECUTE}" -eq 0 ]]; then
|
|
run flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m
|
|
return 0
|
|
fi
|
|
local attempt output rc
|
|
for attempt in 1 2; do
|
|
set +e
|
|
output="$(flux reconcile kustomization "${item}" -n flux-system --with-source --timeout=15m 2>&1)"
|
|
rc=$?
|
|
set -e
|
|
if (( rc == 0 )); then
|
|
[[ -n "${output}" ]] && printf '%s\n' "${output}"
|
|
return 0
|
|
fi
|
|
[[ -n "${output}" ]] && printf '%s\n' "${output}" >&2
|
|
if (( attempt == 1 )) && grep -Eqi 'immutable|field is immutable|cannot patch.*Job|Job.*invalid' <<< "${output}"; then
|
|
warn "Flux reconcile for '${item}' failed due immutable Job/template signal. Attempting self-heal."
|
|
heal_failed_flux_jobs || true
|
|
trigger_flux_reconcile_all || true
|
|
sleep 5
|
|
continue
|
|
fi
|
|
return "${rc}"
|
|
done
|
|
}
|
|
|
|
reconcile_stage() {
|
|
local stage_name="$1"
|
|
shift
|
|
if ! command -v flux >/dev/null 2>&1; then
|
|
local now
|
|
now="$(date --iso-8601=seconds)"
|
|
run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt="${now}" --overwrite
|
|
return 0
|
|
fi
|
|
local item
|
|
for item in "$@"; do
|
|
reconcile_kustomization_with_self_heal "${item}"
|
|
done
|
|
mark_checkpoint "reconciled_${stage_name}"
|
|
}
|
|
|
|
resume_flux_and_reconcile() {
|
|
patch_flux_suspend_all false
|
|
if command -v flux >/dev/null 2>&1; then
|
|
run flux reconcile source git flux-system -n flux-system --timeout=3m
|
|
fi
|
|
reconcile_stage core core helm longhorn metallb traefik vault-csi vault-injector
|
|
check_ingress_stack
|
|
check_longhorn_stack
|
|
reconcile_stage stateful vault postgres gitea
|
|
check_vault_stack
|
|
check_postgres_stack
|
|
check_gitea_stack
|
|
reconcile_stage registry harbor
|
|
check_harbor_stack
|
|
check_harbor_endpoint
|
|
run_harbor_pull_canary
|
|
}
|
|
|
|
status_report() {
|
|
local battery flux_ready flux_url flux_branch flux_url_drift flux_branch_drift harbor_code workers ingress_hosts_count
|
|
local effective_target effective_canary
|
|
local labeled_nodes
|
|
battery="$(read_ups_battery || true)"
|
|
flux_ready="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)"
|
|
flux_url="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.url}' 2>/dev/null || true)"
|
|
flux_branch="$(kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.spec.ref.branch}' 2>/dev/null || true)"
|
|
flux_url_drift=false
|
|
flux_branch_drift=false
|
|
if [[ -n "${EXPECTED_FLUX_URL}" && -n "${flux_url}" && "${flux_url}" != "${EXPECTED_FLUX_URL}" ]]; then
|
|
flux_url_drift=true
|
|
fi
|
|
if [[ -n "${EXPECTED_FLUX_BRANCH}" && -n "${flux_branch}" && "${flux_branch}" != "${EXPECTED_FLUX_BRANCH}" ]]; then
|
|
flux_branch_drift=true
|
|
fi
|
|
ingress_hosts_count="$(list_ingress_hosts | sed '/^[[:space:]]*$/d' | wc -l | tr -d ' ')"
|
|
harbor_code="$(curl -ksS -o /dev/null -w '%{http_code}' https://registry.bstein.dev/v2/ || true)"
|
|
workers="$(discover_workers_csv 2>/dev/null || true)"
|
|
effective_target="${HARBOR_TARGET_NODE}"
|
|
if ! node_is_ready "${effective_target}"; then
|
|
effective_target="$(select_ready_arm64_worker || true)"
|
|
fi
|
|
effective_canary="${HARBOR_CANARY_NODE}"
|
|
if ! node_is_ready "${effective_canary}"; then
|
|
effective_canary="${effective_target}"
|
|
fi
|
|
echo "mode=status"
|
|
echo "shutdown_mode=${SHUTDOWN_MODE}"
|
|
echo "bundle_file=${HARBOR_BUNDLE_FILE}"
|
|
echo "bundle_present=$([[ -f "${HARBOR_BUNDLE_FILE}" ]] && echo true || echo false)"
|
|
echo "replica_snapshot_file=${REPLICA_SNAPSHOT_FILE}"
|
|
echo "replica_snapshot_present=$([[ -f "${REPLICA_SNAPSHOT_FILE}" ]] && echo true || echo false)"
|
|
echo "node_helper_image=${NODE_HELPER_IMAGE}"
|
|
echo "harbor_target_node=${effective_target:-unknown}"
|
|
echo "harbor_canary_node=${effective_canary:-unknown}"
|
|
labeled_nodes="$(kubectl get nodes -l "${HARBOR_HOST_LABEL_KEY}=true" -o jsonpath='{range .items[*]}{.metadata.name}{","}{end}' 2>/dev/null || true)"
|
|
labeled_nodes="${labeled_nodes%,}"
|
|
echo "harbor_host_label_key=${HARBOR_HOST_LABEL_KEY}"
|
|
echo "harbor_host_label_nodes=${labeled_nodes:-none}"
|
|
echo "workers=${workers}"
|
|
echo "recovery_pending=${RECOVERY_PENDING}"
|
|
echo "startup_attempted=${STARTUP_ATTEMPTED_DURING_OUTAGE}"
|
|
echo "last_checkpoint=${LAST_CHECKPOINT}"
|
|
echo "ups_host=${UPS_HOST_IN_USE:-${UPS_HOST}}"
|
|
echo "ups_battery=${battery:-unknown}"
|
|
echo "flux_source_expected_url=${EXPECTED_FLUX_URL}"
|
|
echo "flux_source_expected_branch=${EXPECTED_FLUX_BRANCH}"
|
|
echo "flux_source_actual_url=${flux_url:-unknown}"
|
|
echo "flux_source_actual_branch=${flux_branch:-unknown}"
|
|
echo "flux_source_url_drift=${flux_url_drift}"
|
|
echo "flux_source_branch_drift=${flux_branch_drift}"
|
|
echo "flux_source_ready=${flux_ready:-unknown}"
|
|
echo "ingress_hosts_count=${ingress_hosts_count}"
|
|
echo "harbor_http=${harbor_code:-unknown}"
|
|
kubectl get ingressclass traefik >/dev/null 2>&1 && echo "traefik_ingressclass=true" || echo "traefik_ingressclass=false"
|
|
kubectl -n traefik get deploy traefik >/dev/null 2>&1 && echo "traefik_deploy=true" || echo "traefik_deploy=false"
|
|
kubectl -n longhorn-system get ds longhorn-manager >/dev/null 2>&1 && echo "longhorn_manager=true" || echo "longhorn_manager=false"
|
|
kubectl -n vault get sts vault >/dev/null 2>&1 && echo "vault_statefulset=true" || echo "vault_statefulset=false"
|
|
kubectl -n postgres get sts postgres >/dev/null 2>&1 && echo "postgres_statefulset=true" || echo "postgres_statefulset=false"
|
|
kubectl -n gitea get deploy gitea >/dev/null 2>&1 && echo "gitea_deploy=true" || echo "gitea_deploy=false"
|
|
kubectl -n harbor get deploy harbor-core >/dev/null 2>&1 && echo "harbor_deploy=true" || echo "harbor_deploy=false"
|
|
}
|
|
|
|
planned_shutdown() {
|
|
local workers_csv
|
|
workers_csv="$(discover_workers_csv 2>/dev/null || true)"
|
|
as_array_from_csv "${workers_csv}" WORKER_NODES
|
|
as_array_from_csv "titan-0a,titan-0b,titan-0c" CONTROL_PLANE_NODES
|
|
|
|
RECOVERY_PENDING=1
|
|
STARTUP_ATTEMPTED_DURING_OUTAGE=0
|
|
save_recovery_state 1 0 shutdown_started
|
|
|
|
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
|
|
KEEP_PREWARM_DAEMONSET=1
|
|
prewarm_node_helper_image
|
|
mark_checkpoint shutdown_helper_prewarmed
|
|
fi
|
|
|
|
if [[ "${SKIP_ETCD_SNAPSHOT}" -eq 0 ]]; then
|
|
local ts
|
|
ts="$(date +%Y%m%d-%H%M%S)"
|
|
run_host_command_via_helper "${CONTROL_PLANE_NODES[0]}" "etcd-snapshot" 300 "/usr/local/bin/k3s etcd-snapshot save --name pre-shutdown-${ts}"
|
|
mark_checkpoint shutdown_snapshot_complete
|
|
else
|
|
warn "Skipping etcd snapshot by request."
|
|
fi
|
|
|
|
save_workload_replica_snapshot
|
|
mark_checkpoint shutdown_replicas_snapshot
|
|
|
|
patch_flux_suspend_all true
|
|
best_effort_scale_down_apps
|
|
mark_checkpoint shutdown_apps_scaled_down
|
|
|
|
if [[ "${SKIP_DRAIN}" -eq 0 ]]; then
|
|
best_effort_drain_workers "${DRAIN_TIMEOUT_SECONDS}" "${WORKER_NODES[@]}"
|
|
mark_checkpoint shutdown_workers_drained
|
|
else
|
|
warn "Skipping worker drain by request."
|
|
fi
|
|
|
|
local node
|
|
if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
|
|
warn "shutdown-mode=cluster-only: stopping k3s services only; host poweroff is disabled."
|
|
else
|
|
log "shutdown-mode=host-poweroff: scheduling host poweroff after service stop."
|
|
fi
|
|
|
|
for node in "${WORKER_NODES[@]}"; do
|
|
[[ -z "${node}" ]] && continue
|
|
if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
|
|
schedule_host_service_stop_via_helper "${node}" k3s-agent 20
|
|
else
|
|
schedule_host_shutdown_via_helper "${node}" k3s-agent 20
|
|
fi
|
|
done
|
|
mark_checkpoint shutdown_workers_scheduled
|
|
|
|
for node in "${CONTROL_PLANE_NODES[@]}"; do
|
|
[[ -z "${node}" ]] && continue
|
|
if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
|
|
schedule_host_service_stop_via_helper "${node}" k3s 45
|
|
else
|
|
schedule_host_shutdown_via_helper "${node}" k3s 45
|
|
fi
|
|
done
|
|
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
|
|
cleanup_prewarm_daemonset
|
|
fi
|
|
mark_checkpoint shutdown_control_planes_scheduled
|
|
if [[ "${SHUTDOWN_MODE}" == "cluster-only" ]]; then
|
|
log "Cluster-only shutdown actions scheduled (hosts remain powered on)."
|
|
else
|
|
log "Shutdown + host poweroff actions scheduled on hosts."
|
|
fi
|
|
}
|
|
|
|
emergency_shutdown_after_outage() {
|
|
warn "Entering outage-aware emergency shutdown path due insufficient startup budget."
|
|
patch_flux_suspend_all true || true
|
|
best_effort_scale_down_apps || true
|
|
local workers_csv
|
|
workers_csv="$(discover_workers_csv 2>/dev/null || true)"
|
|
as_array_from_csv "${workers_csv}" WORKER_NODES
|
|
best_effort_drain_workers "${EMERGENCY_DRAIN_TIMEOUT_SECONDS}" "${WORKER_NODES[@]}" || true
|
|
planned_shutdown
|
|
}
|
|
|
|
startup_flow() {
|
|
if [[ "${RECOVERY_PENDING}" -eq 1 ]]; then
|
|
if ! ensure_minimum_battery_for_bootstrap; then
|
|
if [[ "${STARTUP_ATTEMPTED_DURING_OUTAGE}" -eq 1 ]]; then
|
|
emergency_shutdown_after_outage
|
|
exit 1
|
|
fi
|
|
warn "Startup deferred due low battery after recent outage; marking for second-outage fallback."
|
|
save_recovery_state 1 1 deferred_low_battery
|
|
exit 1
|
|
fi
|
|
STARTUP_ATTEMPTED_DURING_OUTAGE=1
|
|
save_recovery_state 1 1 waiting_for_api
|
|
fi
|
|
|
|
if ! wait_for_api; then
|
|
die "Kubernetes API did not become reachable in time."
|
|
fi
|
|
mark_checkpoint startup_api_ready
|
|
|
|
ensure_harbor_target_node
|
|
ensure_harbor_host_label
|
|
mark_checkpoint startup_harbor_host_labeled
|
|
|
|
if [[ -n "${FORCE_FLUX_URL}" ]]; then
|
|
warn "Breakglass: forcing Flux source URL to '${FORCE_FLUX_URL}'."
|
|
run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"url\":\"${FORCE_FLUX_URL}\"}}"
|
|
mark_checkpoint startup_flux_url_forced
|
|
fi
|
|
|
|
if [[ -n "${FORCE_FLUX_BRANCH}" ]]; then
|
|
run kubectl -n flux-system patch gitrepository flux-system --type=merge -p "{\"spec\":{\"ref\":{\"branch\":\"${FORCE_FLUX_BRANCH}\"}}}"
|
|
mark_checkpoint startup_flux_branch_forced
|
|
fi
|
|
|
|
assert_flux_source_expected
|
|
|
|
if [[ "${SKIP_LOCAL_BOOTSTRAP}" -eq 0 ]]; then
|
|
if ! kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q True; then
|
|
warn "Flux source not Ready; executing local bootstrap fallback path."
|
|
bootstrap_local_minimal
|
|
mark_checkpoint startup_local_bootstrap_complete
|
|
check_ingress_stack
|
|
check_longhorn_stack
|
|
check_vault_stack
|
|
check_postgres_stack
|
|
check_gitea_stack
|
|
|
|
if [[ "${SKIP_HARBOR_BOOTSTRAP}" -eq 0 ]]; then
|
|
if harbor_is_ready; then
|
|
log "Harbor already healthy; skipping Harbor seed/bootstrap."
|
|
else
|
|
if [[ "${SKIP_HARBOR_SEED}" -eq 0 ]]; then
|
|
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
|
|
prewarm_node_helper_image
|
|
fi
|
|
seed_harbor_images
|
|
else
|
|
warn "Skipping Harbor seed/import by request."
|
|
fi
|
|
bootstrap_local_harbor
|
|
mark_checkpoint startup_local_harbor_applied
|
|
check_harbor_stack
|
|
check_harbor_endpoint
|
|
fi
|
|
else
|
|
warn "Skipping Harbor bootstrap fallback by request."
|
|
fi
|
|
fi
|
|
else
|
|
warn "Skipping local bootstrap fallback by request."
|
|
fi
|
|
|
|
resume_flux_and_reconcile
|
|
wait_for_flux_kustomizations_ready
|
|
restore_workload_replica_snapshot
|
|
wait_for_startup_workloads_ready
|
|
wait_for_startup_service_checklist
|
|
wait_for_startup_stability_window
|
|
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
|
|
prewarm_node_helper_image
|
|
mark_checkpoint startup_helper_prewarmed
|
|
fi
|
|
clear_recovery_state
|
|
log "Startup flow complete."
|
|
}
|
|
|
|
prepare_flow() {
|
|
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
|
|
ensure_harbor_target_node
|
|
ensure_harbor_host_label
|
|
mark_checkpoint prepare_harbor_host_labeled
|
|
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
|
|
prewarm_node_helper_image
|
|
mark_checkpoint prepare_helper_prewarmed
|
|
fi
|
|
log "Prepare flow complete."
|
|
}
|
|
|
|
harbor_seed_flow() {
|
|
[[ -f "${HARBOR_BUNDLE_FILE}" ]] || die "Harbor bundle missing at ${HARBOR_BUNDLE_FILE}. Build and copy it to the canonical control host first."
|
|
if [[ "${SKIP_HELPER_PREWARM}" -eq 0 ]]; then
|
|
prewarm_node_helper_image
|
|
mark_checkpoint harbor_seed_helper_prewarmed
|
|
fi
|
|
seed_harbor_images
|
|
check_harbor_endpoint
|
|
run_harbor_pull_canary
|
|
log "Harbor seed flow complete."
|
|
}
|
|
|
|
load_recovery_state
|
|
log "mode=${MODE} execute=${EXECUTE}"
|
|
log "shutdown-mode=${SHUTDOWN_MODE}"
|
|
log "recovery-state-file=${RECOVERY_STATE_FILE}"
|
|
log "bundle-file=${HARBOR_BUNDLE_FILE}"
|
|
log "node-helper-image=${NODE_HELPER_IMAGE}"
|
|
log "harbor-target-node-config=${HARBOR_TARGET_NODE:-auto}"
|
|
log "harbor-canary-node-config=${HARBOR_CANARY_NODE:-auto}"
|
|
log "harbor-host-label-key=${HARBOR_HOST_LABEL_KEY}"
|
|
log "expected-flux-url=${EXPECTED_FLUX_URL}"
|
|
log "expected-flux-branch=${EXPECTED_FLUX_BRANCH}"
|
|
log "startup-optional-kustomizations=${STARTUP_OPTIONAL_KUSTOMIZATIONS:-none}"
|
|
report_flux_source_state
|
|
|
|
case "${MODE}" in
|
|
status)
|
|
status_report
|
|
;;
|
|
prepare)
|
|
prepare_flow
|
|
;;
|
|
harbor-seed)
|
|
harbor_seed_flow
|
|
;;
|
|
shutdown)
|
|
planned_shutdown
|
|
;;
|
|
startup)
|
|
startup_flow
|
|
;;
|
|
esac
|