ops: stage rpi reservations without auto restart

This commit is contained in:
jenkins 2026-05-19 15:51:05 -03:00
parent 8806739d3d
commit e3e8a046e4
2 changed files with 21 additions and 6 deletions

View File

@ -15,7 +15,7 @@ spec:
labels:
app: rpi-resource-reservation
annotations:
atlas.bstein.dev/reservation-revision: "2026-05-19-4"
atlas.bstein.dev/reservation-revision: "2026-05-19-5"
spec:
hostPID: true
serviceAccountName: node-nofile
@ -46,6 +46,9 @@ spec:
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/usr/bin/env", "bash"]
args: ["/scripts/rpi_resource_reservation.sh"]
env:
- name: ATLAS_RPI_AUTO_RESTART
value: "false"
resources:
requests:
cpu: 10m

View File

@ -10,6 +10,7 @@ kubelet_config_dir="${host_root}/var/lib/rancher/k3s/agent/etc/kubelet.conf.d"
kubelet_config_file="${kubelet_config_dir}/90-atlas-rpi-reservations.conf"
systemd_override_dir="${host_root}/etc/systemd/system/${unit}.service.d"
systemd_override_file="${systemd_override_dir}/90-atlas-rpi-reservations.conf"
auto_restart="${ATLAS_RPI_AUTO_RESTART:-false}"
if [ ! -f "${unit_file}" ]; then
echo "k3s-agent unit not found; this guardrail only manages worker agents"
@ -74,7 +75,7 @@ rm -f "${kubelet_tmp_file}"
override_tmp_file="$(mktemp)"
cat > "${override_tmp_file}" <<'EOF'
# Managed by Flux via rpi_resource_reservation.sh revision 2026-05-19-4.
# Managed by Flux via rpi_resource_reservation.sh revision 2026-05-19-5.
[Service]
UnsetEnvironment=K3S_KUBELET_ARG
ExecStart=
@ -96,11 +97,22 @@ fi
rm -f "${override_tmp_file}"
if [ "${changed}" -eq 1 ]; then
delay="$(( (RANDOM % 420) + 30 ))"
echo "updated RPi kubelet reservations; restarting ${unit} after ${delay}s"
sleep "${delay}"
echo "updated RPi kubelet reservations"
nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl daemon-reload
nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl restart "${unit}"
if [ "${auto_restart}" = "true" ]; then
root_usage="$(df -P "${host_root}" | awk 'NR==2 {gsub(/%/,"",$5); print $5}')" || root_usage=""
if [ -n "${root_usage}" ] && [ "${root_usage}" -ge 80 ]; then
echo "root filesystem is ${root_usage}% full; leaving ${unit} restart to operator"
else
delay="$(( (RANDOM % 420) + 30 ))"
echo "restarting ${unit} after ${delay}s"
sleep "${delay}"
nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl restart "${unit}"
fi
else
echo "auto restart disabled; ${unit} will pick up reservations on the next controlled restart"
fi
else
echo "${config_file} already up to date"
fi