drills: add optional coordinator relay hop for startup tests

This commit is contained in:
Brad Stein 2026-04-05 16:18:54 -03:00
parent 73b1c2063b
commit 56df211261

View File

@ -5,6 +5,7 @@ KUBECTL="${KUBECTL:-kubectl}"
HECATE_COORDINATOR_HOST="${HECATE_COORDINATOR_HOST:-titan-db}"
HECATE_BIN="${HECATE_BIN:-/usr/local/bin/hecate}"
HECATE_CONFIG="${HECATE_CONFIG:-/etc/hecate/hecate.yaml}"
HECATE_COORDINATOR_RELAY="${HECATE_COORDINATOR_RELAY:-}"
LOG_DIR="${HECATE_DRILL_LOG_DIR:-/tmp/hecate-drills}"
STARTUP_TIMEOUT_SECONDS="${HECATE_DRILL_STARTUP_TIMEOUT_SECONDS:-1800}"
EXECUTE=0
@ -24,6 +25,7 @@ Drills:
Notes:
- Drills are intentionally disruptive and are not part of regular `make test`.
- Use --execute to run live changes. Without it, this script prints planned actions only.
- Optional relay: set HECATE_COORDINATOR_RELAY="ssh titan-db" to run coordinator commands via a jump host.
EOF
}
@ -76,10 +78,29 @@ run_hecate_startup() {
local reason="$1"
local cmd=(sudo "${HECATE_BIN}" startup --config "${HECATE_CONFIG}" --execute --force-flux-branch main --reason "${reason}")
if [[ "${EXECUTE}" -eq 0 ]]; then
if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then
log "plan: ssh ${HECATE_COORDINATOR_HOST} ${HECATE_COORDINATOR_RELAY} '${cmd[*]}'"
else
log "plan: ssh ${HECATE_COORDINATOR_HOST} '${cmd[*]}'"
fi
return 0
fi
if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then
# shellcheck disable=SC2086
timeout "${STARTUP_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" ${HECATE_COORDINATOR_RELAY} "${cmd[@]}"
else
timeout "${STARTUP_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" "${cmd[@]}"
fi
}
run_coordinator_bash() {
local script="$1"
if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then
# shellcheck disable=SC2086
printf '%s\n' "${script}" | ssh "${HECATE_COORDINATOR_HOST}" ${HECATE_COORDINATOR_RELAY} "bash -se"
else
printf '%s\n' "${script}" | ssh "${HECATE_COORDINATOR_HOST}" "bash -se"
fi
}
declare -A SNAPSHOT_REPLICAS=()
@ -299,12 +320,12 @@ fi
return 0
fi
ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${inject_cmd@Q}"
if ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${startup_cmd@Q}"; then
ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${restore_cmd@Q}" || true
run_coordinator_bash "${inject_cmd}"
if run_coordinator_bash "${startup_cmd}"; then
run_coordinator_bash "${restore_cmd}" || true
die "startup-intent-guard failed: startup unexpectedly succeeded while shutdown intent was active"
fi
ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${restore_cmd@Q}"
run_coordinator_bash "${restore_cmd}"
log "pass: startup-intent-guard"
}