From e3e8a046e46514f7ff0b822e25a58bfb86c2f41b Mon Sep 17 00:00:00 2001 From: jenkins Date: Tue, 19 May 2026 15:51:05 -0300 Subject: [PATCH] ops: stage rpi reservations without auto restart --- .../rpi-resource-reservation-daemonset.yaml | 5 ++++- .../scripts/rpi_resource_reservation.sh | 22 ++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/services/maintenance/rpi-resource-reservation-daemonset.yaml b/services/maintenance/rpi-resource-reservation-daemonset.yaml index 58543ed5..85165193 100644 --- a/services/maintenance/rpi-resource-reservation-daemonset.yaml +++ b/services/maintenance/rpi-resource-reservation-daemonset.yaml @@ -15,7 +15,7 @@ spec: labels: app: rpi-resource-reservation annotations: - atlas.bstein.dev/reservation-revision: "2026-05-19-4" + atlas.bstein.dev/reservation-revision: "2026-05-19-5" spec: hostPID: true serviceAccountName: node-nofile @@ -46,6 +46,9 @@ spec: image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 command: ["/usr/bin/env", "bash"] args: ["/scripts/rpi_resource_reservation.sh"] + env: + - name: ATLAS_RPI_AUTO_RESTART + value: "false" resources: requests: cpu: 10m diff --git a/services/maintenance/scripts/rpi_resource_reservation.sh b/services/maintenance/scripts/rpi_resource_reservation.sh index 008bc7b2..7040f064 100644 --- a/services/maintenance/scripts/rpi_resource_reservation.sh +++ b/services/maintenance/scripts/rpi_resource_reservation.sh @@ -10,6 +10,7 @@ kubelet_config_dir="${host_root}/var/lib/rancher/k3s/agent/etc/kubelet.conf.d" kubelet_config_file="${kubelet_config_dir}/90-atlas-rpi-reservations.conf" systemd_override_dir="${host_root}/etc/systemd/system/${unit}.service.d" systemd_override_file="${systemd_override_dir}/90-atlas-rpi-reservations.conf" +auto_restart="${ATLAS_RPI_AUTO_RESTART:-false}" if [ ! -f "${unit_file}" ]; then echo "k3s-agent unit not found; this guardrail only manages worker agents" @@ -74,7 +75,7 @@ rm -f "${kubelet_tmp_file}" override_tmp_file="$(mktemp)" cat > "${override_tmp_file}" <<'EOF' -# Managed by Flux via rpi_resource_reservation.sh revision 2026-05-19-4. +# Managed by Flux via rpi_resource_reservation.sh revision 2026-05-19-5. [Service] UnsetEnvironment=K3S_KUBELET_ARG ExecStart= @@ -96,11 +97,22 @@ fi rm -f "${override_tmp_file}" if [ "${changed}" -eq 1 ]; then - delay="$(( (RANDOM % 420) + 30 ))" - echo "updated RPi kubelet reservations; restarting ${unit} after ${delay}s" - sleep "${delay}" + echo "updated RPi kubelet reservations" nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl daemon-reload - nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl restart "${unit}" + + if [ "${auto_restart}" = "true" ]; then + root_usage="$(df -P "${host_root}" | awk 'NR==2 {gsub(/%/,"",$5); print $5}')" || root_usage="" + if [ -n "${root_usage}" ] && [ "${root_usage}" -ge 80 ]; then + echo "root filesystem is ${root_usage}% full; leaving ${unit} restart to operator" + else + delay="$(( (RANDOM % 420) + 30 ))" + echo "restarting ${unit} after ${delay}s" + sleep "${delay}" + nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl restart "${unit}" + fi + else + echo "auto restart disabled; ${unit} will pick up reservations on the next controlled restart" + fi else echo "${config_file} already up to date" fi