recovery(ananke): use resident restart helper

This commit is contained in:
jenkins 2026-06-18 18:04:11 -03:00
parent 0f58aa16a9
commit 8c45f9509e

View File

@ -1456,6 +1456,22 @@ wait_for_terminating_running_pods_to_clear() {
done done
} }
run_host_command_via_agent_restart_pod() {
local node="$1"
local host_command="$2"
local pod encoded_command
pod="$(kubectl -n "${NODE_HELPER_NAMESPACE}" get pods -l app=k3s-agent-restart --field-selector "spec.nodeName=${node},status.phase=Running" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
if [[ -z "${pod}" ]]; then
return 1
fi
encoded_command="$(printf '%s' "${host_command}" | base64 -w0)"
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: k3s-agent-restart exec via ${pod} on ${node}"
return 0
fi
run kubectl -n "${NODE_HELPER_NAMESPACE}" exec "${pod}" -- /bin/sh -ceu "HOST_COMMAND=\$(printf '%s' '${encoded_command}' | base64 -d); nsenter --target 1 --mount --uts --ipc --net --pid /bin/sh -ceu \"\${HOST_COMMAND}\""
}
schedule_host_service_restart_via_helper() { schedule_host_service_restart_via_helper() {
local node="$1" local node="$1"
local service_name="$2" local service_name="$2"
@ -1463,6 +1479,9 @@ schedule_host_service_restart_via_helper() {
local unit_name host_command local unit_name host_command
unit_name="ananke-restart-${service_name}-$(date +%s)" unit_name="ananke-restart-${service_name}-$(date +%s)"
host_command="/usr/bin/systemd-run --unit ${unit_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl restart ${service_name} || /bin/systemctl restart ${service_name}'" host_command="/usr/bin/systemd-run --unit ${unit_name} --on-active=${delay_seconds}s /bin/sh -lc '/usr/bin/systemctl restart ${service_name} || /bin/systemctl restart ${service_name}'"
if run_host_command_via_agent_restart_pod "${node}" "${host_command}"; then
return 0
fi
if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then if run_host_command_via_prewarm_pod "${node}" "${host_command}"; then
return 0 return 0
fi fi