diff --git a/scripts/cluster_power_recovery.sh b/scripts/cluster_power_recovery.sh index ea6319fb..eb8d1451 100755 --- a/scripts/cluster_power_recovery.sh +++ b/scripts/cluster_power_recovery.sh @@ -1456,6 +1456,22 @@ wait_for_terminating_running_pods_to_clear() { done } +run_host_command_via_agent_restart_pod() { + local node="$1" + local host_command="$2" + local pod encoded_command + pod="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app=k3s-agent-restart --field-selector "spec.nodeName=${node},status.phase=Running" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" + if [[ -z "${pod}" ]]; then + return 1 + fi + encoded_command="$(printf '%s' "${host_command}" | base64 -w0)" + if [[ "${EXECUTE}" -eq 0 ]]; then + log "DRY-RUN: k3s-agent-restart exec via ${pod} on ${node}" + return 0 + fi + run kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "HOST_COMMAND=\$(printf '%s' '${encoded_command}' | base64 -d); nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu \"\${HOST_COMMAND}\"" +} + schedule_host_service_restart_via_helper() { local node="$1" local service_name="$2" @@ -1463,6 +1479,9 @@ schedule_host_service_restart_via_helper() { local unit_name host_command unit_name="ananke-restart-${service_name}-$(date +%s)" host_command="/usr/bin/systemd-run --unit ${unit_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl restart ${service_name} || /bin/systemctl restart ${service_name}'" + if run_host_command_via_agent_restart_pod "${node}" "${host_command}"; then + return 0 + fi if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then return 0 fi