2026-04-06 00:22:54 -03:00
#!/usr/bin/env bash
set -euo pipefail
usage( ) {
cat <<'USAGE'
Usage:
scripts/cluster_power_recovery.sh shutdown [ options]
scripts/cluster_power_recovery.sh startup [ options]
Options:
--execute Actually run commands ( default is dry-run)
--ssh-user <user> SSH user for node commands ( default: current SSH config user)
--control-planes <csv> Control plane hosts ( default: titan-0a,titan-0b,titan-0c)
--workers <csv> Worker hosts ( default: static atlas inventory, with API discovery when available)
--expected-flux-branch <name> Expected Flux source branch during startup checks ( default: main)
--skip-etcd-snapshot Skip etcd snapshot before shutdown
--skip-drain Skip worker drain during shutdown
--skip-local-bootstrap Startup: skip local bootstrap fallback applies
--skip-harbor-bootstrap Startup: skip Harbor recovery bootstrap stage
--force-flux-branch <name> Startup: patch flux-system GitRepository branch to this value
--min-startup-battery <pct> Minimum UPS percent required before bootstrap ( default: 35)
--ups-host <name> UPS identifier for upsc ( default: ups@localhost)
--ups-battery-key <key> UPS battery key for upsc ( default: battery.charge)
--recovery-state-file <path> Recovery state file for second-outage detection
--drain-timeout <seconds> Worker drain timeout for normal shutdown ( default: 180)
--emergency-drain-timeout <seconds>
Worker drain timeout for emergency fallback ( default: 45)
--require-ups-battery Hard-fail startup if UPS battery cannot be read
-h, --help Show help
Examples:
scripts/cluster_power_recovery.sh shutdown --execute
scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main
USAGE
}
MODE = " ${ 1 :- } "
if [ [ -z " ${ MODE } " || " ${ MODE } " = = "-h" || " ${ MODE } " = = "--help" ] ] ; then
usage
exit 0
fi
shift || true
2026-04-06 01:05:18 -03:00
SCRIPT_DIR = " $( cd -- " $( dirname -- " ${ BASH_SOURCE [0] } " ) " && pwd ) "
if [ [ -z " ${ KUBECONFIG :- } " && -f " ${ SCRIPT_DIR } /kubeconfig " ] ] ; then
export KUBECONFIG = " ${ SCRIPT_DIR } /kubeconfig "
fi
2026-04-06 00:22:54 -03:00
if [ [ " ${ MODE } " != "shutdown" && " ${ MODE } " != "startup" ] ] ; then
echo " Unknown mode: ${ MODE } " >& 2
usage
exit 1
fi
EXECUTE = 0
SSH_USER = ""
CONTROL_PLANES = "titan-0a,titan-0b,titan-0c"
WORKERS = ""
DEFAULT_WORKERS = "titan-04,titan-05,titan-06,titan-07,titan-08,titan-09,titan-10,titan-11,titan-12,titan-13,titan-14,titan-15,titan-17,titan-18,titan-19,titan-20,titan-21,titan-22,titan-24"
EXPECTED_FLUX_BRANCH = "main"
SKIP_ETCD_SNAPSHOT = 0
SKIP_DRAIN = 0
SKIP_LOCAL_BOOTSTRAP = 0
SKIP_HARBOR_BOOTSTRAP = 0
FORCE_FLUX_BRANCH = ""
UPS_HOST = "ups@localhost"
UPS_BATTERY_KEY = "battery.charge"
RECOVERY_STATE_FILE = " ${ HOME } /.local/state/cluster_power_recovery.state "
MIN_STARTUP_BATTERY = 35
DRAIN_TIMEOUT_SECONDS = 180
EMERGENCY_DRAIN_TIMEOUT_SECONDS = 45
REQUIRE_UPS_BATTERY = 0
RECOVERY_PENDING = 0
STARTUP_ATTEMPTED_DURING_OUTAGE = 0
while [ [ $# -gt 0 ] ] ; do
case " $1 " in
--execute)
EXECUTE = 1
shift
; ;
--ssh-user)
SSH_USER = " ${ 2 :- } "
shift 2
; ;
--control-planes)
CONTROL_PLANES = " ${ 2 :- } "
shift 2
; ;
--workers)
WORKERS = " ${ 2 :- } "
shift 2
; ;
--expected-flux-branch)
EXPECTED_FLUX_BRANCH = " ${ 2 :- } "
shift 2
; ;
--skip-etcd-snapshot)
SKIP_ETCD_SNAPSHOT = 1
shift
; ;
--skip-drain)
SKIP_DRAIN = 1
shift
; ;
--skip-local-bootstrap)
SKIP_LOCAL_BOOTSTRAP = 1
shift
; ;
--skip-harbor-bootstrap)
SKIP_HARBOR_BOOTSTRAP = 1
shift
; ;
--force-flux-branch)
FORCE_FLUX_BRANCH = " ${ 2 :- } "
shift 2
; ;
--ups-host)
UPS_HOST = " ${ 2 :- } "
shift 2
; ;
--ups-battery-key)
UPS_BATTERY_KEY = " ${ 2 :- } "
shift 2
; ;
--min-startup-battery)
MIN_STARTUP_BATTERY = " ${ 2 :- } "
shift 2
; ;
--recovery-state-file)
RECOVERY_STATE_FILE = " ${ 2 :- } "
shift 2
; ;
--drain-timeout)
DRAIN_TIMEOUT_SECONDS = " ${ 2 :- } "
shift 2
; ;
--emergency-drain-timeout)
EMERGENCY_DRAIN_TIMEOUT_SECONDS = " ${ 2 :- } "
shift 2
; ;
--require-ups-battery)
REQUIRE_UPS_BATTERY = 1
shift
; ;
-h| --help)
usage
exit 0
; ;
*)
echo " Unknown option: $1 " >& 2
usage
exit 1
; ;
esac
done
require_cmd( ) {
local cmd = " $1 "
if ! command -v " ${ cmd } " >/dev/null 2>& 1; then
echo " Missing required command: ${ cmd } " >& 2
exit 1
fi
}
require_cmd kubectl
require_cmd ssh
log( ) { echo " [cluster-power] $* " ; }
warn( ) { echo " [cluster-power][warn] $* " >& 2; }
run( ) {
if [ [ " ${ EXECUTE } " -eq 1 ] ] ; then
log " EXEC: $* "
" $@ "
else
log " DRY-RUN: $* "
fi
}
run_shell( ) {
if [ [ " ${ EXECUTE } " -eq 1 ] ] ; then
log " EXEC: $* "
bash -lc " $* "
else
log " DRY-RUN: $* "
fi
}
as_array_from_csv( ) {
local csv = " $1 "
local out_var = " $2 "
local old_ifs = " ${ IFS } "
IFS = ',' read -r -a _tmp <<< " ${ csv } "
IFS = " ${ old_ifs } "
eval " ${ out_var } " '=( "${_tmp[@]}" )'
}
ssh_target( ) {
local node = " $1 "
if [ [ -n " ${ SSH_USER } " ] ] ; then
printf "%s@%s" " ${ SSH_USER } " " ${ node } "
else
printf "%s" " ${ node } "
fi
}
discover_workers_csv( ) {
# Include every non-control-plane node by default (workers + accelerators).
kubectl get nodes \
-o custom-columns= NAME:.metadata.name,CP:.metadata.labels.node-role\\ .kubernetes\\ .io/control-plane,MASTER:.metadata.labels.node-role\\ .kubernetes\\ .io/master \
--no-headers \
| awk '$2=="<none>" && $3=="<none>" {print $1}' \
| paste -sd, -
}
load_recovery_state( ) {
if [ [ ! -f " ${ RECOVERY_STATE_FILE } " ] ] ; then
RECOVERY_PENDING = 0
STARTUP_ATTEMPTED_DURING_OUTAGE = 0
return 0
fi
while IFS = '=' read -r key value; do
case " ${ key } " in
recovery_pending)
RECOVERY_PENDING = " ${ value } "
; ;
startup_attempted)
STARTUP_ATTEMPTED_DURING_OUTAGE = " ${ value } "
; ;
esac
done < " ${ RECOVERY_STATE_FILE } "
}
save_recovery_state( ) {
mkdir -p " $( dirname " ${ RECOVERY_STATE_FILE } " ) "
cat > " ${ RECOVERY_STATE_FILE } " <<EOF
recovery_pending = ${ 1 }
startup_attempted = ${ 2 }
EOF
}
clear_recovery_state( ) {
if [ [ -f " ${ RECOVERY_STATE_FILE } " ] ] ; then
rm -f " ${ RECOVERY_STATE_FILE } "
fi
}
read_ups_battery( ) {
if ! command -v upsc >/dev/null 2>& 1; then
return 1
fi
local raw
raw = " $( upsc " ${ UPS_HOST } " " ${ UPS_BATTERY_KEY } " 2>/dev/null || true ) "
if [ [ -z " ${ raw } " ] ] ; then
return 1
fi
# battery.charge can include units/decimals in some setups; normalize.
raw = " ${ raw %%.* } "
if ! [ [ " ${ raw } " = ~ ^[ 0-9] +$ ] ] ; then
return 1
fi
echo " ${ raw } "
}
ensure_minimum_battery_for_bootstrap( ) {
local battery
battery = " $( read_ups_battery || true ) "
if [ [ -z " ${ battery } " ] ] ; then
if [ [ " ${ REQUIRE_UPS_BATTERY } " -eq 1 ] ] ; then
warn "Unable to read UPS battery status and --require-ups-battery is set."
return 1
fi
warn "Unable to read UPS battery status; continuing without hard battery gating."
return 0
fi
log " ups-battery= ${ battery } % "
if ( ( battery < MIN_STARTUP_BATTERY ) ) ; then
warn " UPS battery ${ battery } % below minimum startup threshold ${ MIN_STARTUP_BATTERY } %. "
return 1
fi
return 0
}
emergency_shutdown_after_outage( ) {
warn "Entering outage-aware emergency shutdown path due insufficient startup budget."
patch_flux_suspend_all true || true
best_effort_scale_down_apps
# Give the cluster one short chance to drain, then force progress.
best_effort_drain_workers " ${ EMERGENCY_DRAIN_TIMEOUT_SECONDS } " " ${ WORKER_NODES [@] } "
stop_workers_agents " ${ WORKER_NODES [@] } "
stop_control_planes " ${ CONTROL_PLANE_NODES [@] } "
}
patch_flux_suspend_all( ) {
local value = " $1 "
local patch
patch = $( printf '{"spec":{"suspend":%s}}' " ${ value } " )
local ks_list hr_list
ks_list = " $( kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath = '{range .items[*]}{.metadata.name}{"\n"}{end}' || true ) "
hr_list = " $( kubectl get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath = '{range .items[*]}{.metadata.namespace}{"/"}{.metadata.name}{"\n"}{end}' || true ) "
while IFS = read -r k; do
[ [ -z " ${ k } " ] ] && continue
run kubectl -n flux-system patch kustomization " ${ k } " --type= merge -p " ${ patch } "
done <<< " ${ ks_list } "
while IFS = read -r hr; do
[ [ -z " ${ hr } " ] ] && continue
local ns = " ${ hr %%/* } "
local name = " ${ hr ##*/ } "
run kubectl -n " ${ ns } " patch helmrelease " ${ name } " --type= merge -p " ${ patch } "
done <<< " ${ hr_list } "
}
report_flux_source_state( ) {
local flux_url flux_branch
flux_url = " $( kubectl -n flux-system get gitrepository flux-system -o jsonpath = '{.spec.url}' 2>/dev/null || true ) "
flux_branch = " $( kubectl -n flux-system get gitrepository flux-system -o jsonpath = '{.spec.ref.branch}' 2>/dev/null || true ) "
if [ [ -n " ${ flux_url } " ] ] ; then
log " flux-source-url= ${ flux_url } "
fi
if [ [ -n " ${ flux_branch } " ] ] ; then
log " flux-source-branch= ${ flux_branch } "
if [ [ " ${ MODE } " = = "startup" && -z " ${ FORCE_FLUX_BRANCH } " && " ${ flux_branch } " != " ${ EXPECTED_FLUX_BRANCH } " ] ] ; then
warn " Flux source branch is ' ${ flux_branch } '. Expected ' ${ EXPECTED_FLUX_BRANCH } ' for canonical cold-start recovery. Use --force-flux-branch ${ EXPECTED_FLUX_BRANCH } if needed. "
fi
fi
}
wait_for_api( ) {
local attempts = " ${ 1 :- 90 } "
local sleep_s = " ${ 2 :- 2 } "
if [ [ " ${ EXECUTE } " -eq 0 ] ] ; then
log "DRY-RUN: skipping live Kubernetes API wait"
return 0
fi
local i
for i in $( seq 1 " ${ attempts } " ) ; do
if kubectl version --request-timeout= 5s >/dev/null 2>& 1; then
return 0
fi
sleep " ${ sleep_s } "
done
return 1
}
best_effort_scale_down_apps( ) {
local excludes = '^(kube-system|kube-public|kube-node-lease|flux-system|traefik|metallb-system|cert-manager|longhorn-system|vault|postgres|maintenance)$'
local ns_list
ns_list = " $( kubectl get ns -o jsonpath = '{range .items[*]}{.metadata.name}{"\n"}{end}' ) "
while IFS = read -r ns; do
[ [ -z " ${ ns } " ] ] && continue
if [ [ " ${ ns } " = ~ ${ excludes } ] ] ; then
continue
fi
run_shell " kubectl -n ${ ns } scale deployment --all --replicas=0 || true "
run_shell " kubectl -n ${ ns } scale statefulset --all --replicas=0 || true "
done <<< " ${ ns_list } "
}
best_effort_drain_workers( ) {
local timeout_seconds = " $1 "
shift || true
local workers = ( " $@ " )
local node
for node in " ${ workers [@] } " ; do
[ [ -z " ${ node } " ] ] && continue
run kubectl cordon " ${ node } "
if run_shell " kubectl drain ${ node } --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout= ${ timeout_seconds } s " ; then
continue
fi
warn " Gentle drain timed out for ${ node } ; retrying with --force. "
if run_shell " kubectl drain ${ node } --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout= ${ timeout_seconds } s --force " ; then
continue
fi
warn " Force drain timed out for ${ node } ; final attempt with --disable-eviction. "
run_shell " kubectl drain ${ node } --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout= ${ timeout_seconds } s --force --disable-eviction || true "
done
}
stop_workers_agents( ) {
local workers = ( " $@ " )
local node target
for node in " ${ workers [@] } " ; do
[ [ -z " ${ node } " ] ] && continue
target = " $( ssh_target " ${ node } " ) "
run ssh -o BatchMode = yes -o ConnectTimeout = 8 " ${ target } " "sudo systemctl stop k3s-agent || true"
done
}
start_workers_agents( ) {
local workers = ( " $@ " )
local node target
for node in " ${ workers [@] } " ; do
[ [ -z " ${ node } " ] ] && continue
target = " $( ssh_target " ${ node } " ) "
run ssh -o BatchMode = yes -o ConnectTimeout = 8 " ${ target } " "sudo systemctl start k3s-agent || true"
done
}
stop_control_planes( ) {
local cps = ( " $@ " )
local node target
for node in " ${ cps [@] } " ; do
[ [ -z " ${ node } " ] ] && continue
target = " $( ssh_target " ${ node } " ) "
run ssh -o BatchMode = yes -o ConnectTimeout = 8 " ${ target } " "sudo systemctl stop k3s || true"
done
}
start_control_planes( ) {
local cps = ( " $@ " )
local node target
for node in " ${ cps [@] } " ; do
[ [ -z " ${ node } " ] ] && continue
target = " $( ssh_target " ${ node } " ) "
run ssh -o BatchMode = yes -o ConnectTimeout = 8 " ${ target } " "sudo systemctl start k3s || true"
done
}
take_etcd_snapshot( ) {
local cp = " $1 "
local target
target = " $( ssh_target " ${ cp } " ) "
local ts
ts = " $( date +%Y%m%d-%H%M%S) "
run ssh -o BatchMode = yes -o ConnectTimeout = 8 " ${ target } " \
" sudo k3s etcd-snapshot save --name pre-shutdown- ${ ts } "
}
bootstrap_local_minimal( ) {
# Local apply path to break Flux<->Gitea boot deadlock during cold-start recovery.
# Longhorn is applied before stateful workloads so astreae-backed PVCs can bind.
run kubectl apply -k infrastructure/core
run kubectl apply -k infrastructure/sources/helm
run kubectl apply -k infrastructure/longhorn/core
run kubectl apply -k infrastructure/metallb
run kubectl apply -k infrastructure/traefik
run kubectl apply -k infrastructure/vault-csi
run kubectl apply -k infrastructure/vault-injector
run kubectl apply -k services/vault
run kubectl apply -k infrastructure/postgres
run kubectl apply -k services/gitea
}
bootstrap_local_harbor( ) {
# Optional Harbor bootstrap stage for environments where Harbor is authoritative for images.
run kubectl apply -k services/harbor
}
resume_flux_and_reconcile( ) {
patch_flux_suspend_all false
if command -v flux >/dev/null 2>& 1; then
run flux reconcile source git flux-system -n flux-system --timeout= 3m
run flux reconcile kustomization core -n flux-system --with-source --timeout= 5m
run flux reconcile kustomization helm -n flux-system --with-source --timeout= 5m
run flux reconcile kustomization longhorn -n flux-system --with-source --timeout= 15m
run flux reconcile kustomization metallb -n flux-system --with-source --timeout= 5m
run flux reconcile kustomization traefik -n flux-system --with-source --timeout= 5m
run flux reconcile kustomization vault-csi -n flux-system --with-source --timeout= 5m
run flux reconcile kustomization vault-injector -n flux-system --with-source --timeout= 5m
run flux reconcile kustomization vault -n flux-system --with-source --timeout= 10m
run flux reconcile kustomization postgres -n flux-system --with-source --timeout= 10m
run flux reconcile kustomization gitea -n flux-system --with-source --timeout= 10m
run flux reconcile kustomization harbor -n flux-system --with-source --timeout= 15m
else
local now
now = " $( date --iso-8601= seconds) "
run kubectl -n flux-system annotate kustomizations.kustomize.toolkit.fluxcd.io --all reconcile.fluxcd.io/requestedAt= " ${ now } " --overwrite
fi
}
as_array_from_csv " ${ CONTROL_PLANES } " CONTROL_PLANE_NODES
if [ [ -z " ${ WORKERS } " ] ] ; then
WORKERS = " $( discover_workers_csv 2>/dev/null || true ) "
if [ [ -z " ${ WORKERS } " ] ] ; then
warn "Unable to auto-discover workers from the API; falling back to static atlas worker inventory."
WORKERS = " ${ DEFAULT_WORKERS } "
fi
fi
as_array_from_csv " ${ WORKERS } " WORKER_NODES
load_recovery_state
log " mode= ${ MODE } execute= ${ EXECUTE } "
log " control-planes= ${ CONTROL_PLANES } "
log " workers= ${ WORKERS } "
log " recovery-state-file= ${ RECOVERY_STATE_FILE } "
log " recovery_pending= ${ RECOVERY_PENDING } startup_attempted= ${ STARTUP_ATTEMPTED_DURING_OUTAGE } "
report_flux_source_state
if [ [ " ${ MODE } " = = "shutdown" ] ] ; then
save_recovery_state 1 0
if [ [ " ${ SKIP_ETCD_SNAPSHOT } " -eq 0 ] ] ; then
take_etcd_snapshot " ${ CONTROL_PLANE_NODES [0] } "
else
warn "Skipping etcd snapshot by request."
fi
patch_flux_suspend_all true
best_effort_scale_down_apps
if [ [ " ${ SKIP_DRAIN } " -eq 0 ] ] ; then
best_effort_drain_workers " ${ DRAIN_TIMEOUT_SECONDS } " " ${ WORKER_NODES [@] } "
else
warn "Skipping worker drain by request."
fi
stop_workers_agents " ${ WORKER_NODES [@] } "
stop_control_planes " ${ CONTROL_PLANE_NODES [@] } "
log "Shutdown flow complete."
exit 0
fi
# Startup mode
if [ [ " ${ RECOVERY_PENDING } " -eq 1 ] ] ; then
if ! ensure_minimum_battery_for_bootstrap; then
if [ [ " ${ STARTUP_ATTEMPTED_DURING_OUTAGE } " -eq 1 ] ] ; then
emergency_shutdown_after_outage
exit 1
fi
warn "Startup deferred due low battery after recent outage; marking for second-outage fallback."
save_recovery_state 1 1
exit 1
fi
save_recovery_state 1 1
fi
start_control_planes " ${ CONTROL_PLANE_NODES [@] } "
start_workers_agents " ${ WORKER_NODES [@] } "
if ! wait_for_api 120 2; then
warn "Kubernetes API did not become reachable in time."
exit 1
fi
if [ [ -n " ${ FORCE_FLUX_BRANCH } " ] ] ; then
run kubectl -n flux-system patch gitrepository flux-system --type= merge \
-p " {\"spec\":{\"ref\":{\"branch\":\" ${ FORCE_FLUX_BRANCH } \"}}} "
fi
if [ [ " ${ SKIP_LOCAL_BOOTSTRAP } " -eq 0 ] ] ; then
# If source is not ready, bootstrap critical pieces from local checkout first.
if ! kubectl -n flux-system get gitrepository flux-system -o jsonpath = '{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q True; then
warn "Flux source not Ready; executing local bootstrap fallback path."
bootstrap_local_minimal
if [ [ " ${ SKIP_HARBOR_BOOTSTRAP } " -eq 0 ] ] ; then
bootstrap_local_harbor
else
warn "Skipping Harbor bootstrap fallback by request."
fi
fi
else
warn "Skipping local bootstrap fallback by request."
fi
resume_flux_and_reconcile
clear_recovery_state
log "Startup flow complete."