From 56df211261db03dfb23ea3e82b165007c8c6f138 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 5 Apr 2026 16:18:54 -0300 Subject: [PATCH] drills: add optional coordinator relay hop for startup tests --- scripts/hecate-drills.sh | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/scripts/hecate-drills.sh b/scripts/hecate-drills.sh index 0e2ba11..611f606 100755 --- a/scripts/hecate-drills.sh +++ b/scripts/hecate-drills.sh @@ -5,6 +5,7 @@ KUBECTL="${KUBECTL:-kubectl}" HECATE_COORDINATOR_HOST="${HECATE_COORDINATOR_HOST:-titan-db}" HECATE_BIN="${HECATE_BIN:-/usr/local/bin/hecate}" HECATE_CONFIG="${HECATE_CONFIG:-/etc/hecate/hecate.yaml}" +HECATE_COORDINATOR_RELAY="${HECATE_COORDINATOR_RELAY:-}" LOG_DIR="${HECATE_DRILL_LOG_DIR:-/tmp/hecate-drills}" STARTUP_TIMEOUT_SECONDS="${HECATE_DRILL_STARTUP_TIMEOUT_SECONDS:-1800}" EXECUTE=0 @@ -24,6 +25,7 @@ Drills: Notes: - Drills are intentionally disruptive and are not part of regular `make test`. - Use --execute to run live changes. Without it, this script prints planned actions only. + - Optional relay: set HECATE_COORDINATOR_RELAY="ssh titan-db" to run coordinator commands via a jump host. EOF } @@ -76,10 +78,29 @@ run_hecate_startup() { local reason="$1" local cmd=(sudo "${HECATE_BIN}" startup --config "${HECATE_CONFIG}" --execute --force-flux-branch main --reason "${reason}") if [[ "${EXECUTE}" -eq 0 ]]; then - log "plan: ssh ${HECATE_COORDINATOR_HOST} '${cmd[*]}'" + if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then + log "plan: ssh ${HECATE_COORDINATOR_HOST} ${HECATE_COORDINATOR_RELAY} '${cmd[*]}'" + else + log "plan: ssh ${HECATE_COORDINATOR_HOST} '${cmd[*]}'" + fi return 0 fi - timeout "${STARTUP_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" "${cmd[@]}" + if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then + # shellcheck disable=SC2086 + timeout "${STARTUP_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" ${HECATE_COORDINATOR_RELAY} "${cmd[@]}" + else + timeout "${STARTUP_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" "${cmd[@]}" + fi +} + +run_coordinator_bash() { + local script="$1" + if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then + # shellcheck disable=SC2086 + printf '%s\n' "${script}" | ssh "${HECATE_COORDINATOR_HOST}" ${HECATE_COORDINATOR_RELAY} "bash -se" + else + printf '%s\n' "${script}" | ssh "${HECATE_COORDINATOR_HOST}" "bash -se" + fi } declare -A SNAPSHOT_REPLICAS=() @@ -299,12 +320,12 @@ fi return 0 fi - ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${inject_cmd@Q}" - if ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${startup_cmd@Q}"; then - ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${restore_cmd@Q}" || true + run_coordinator_bash "${inject_cmd}" + if run_coordinator_bash "${startup_cmd}"; then + run_coordinator_bash "${restore_cmd}" || true die "startup-intent-guard failed: startup unexpectedly succeeded while shutdown intent was active" fi - ssh "${HECATE_COORDINATOR_HOST}" "bash -lc ${restore_cmd@Q}" + run_coordinator_bash "${restore_cmd}" log "pass: startup-intent-guard" }