titan-iac/services/maintenance/scripts/rpi_resource_reservation.sh
2026-05-19 13:35:56 -03:00

84 lines
2.5 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
host_root="/host"
unit="k3s-agent"
unit_file="${host_root}/etc/systemd/system/${unit}.service"
config_dir="${host_root}/etc/rancher/k3s/config.yaml.d"
config_file="${config_dir}/90-atlas-rpi-reservations.yaml"
kubelet_config_dir="${host_root}/var/lib/rancher/k3s/agent/etc/kubelet.conf.d"
kubelet_config_file="${kubelet_config_dir}/90-atlas-rpi-reservations.conf"
if [ ! -f "${unit_file}" ]; then
echo "k3s-agent unit not found; this guardrail only manages worker agents"
sleep infinity
fi
tmp_file="$(mktemp)"
cat > "${tmp_file}" <<'EOF'
# Managed by Flux via services/maintenance/scripts/rpi_resource_reservation.sh.
# Keep RPi workers below saturation so kubelet and the OS keep enough headroom
# to evict or recover before the board wedges.
kubelet-arg:
- "system-reserved=cpu=250m,memory=384Mi,ephemeral-storage=1Gi"
- "kube-reserved=cpu=150m,memory=256Mi,ephemeral-storage=1Gi"
- "eviction-hard=memory.available<512Mi,nodefs.available<10%,imagefs.available<10%"
- "eviction-soft=memory.available<768Mi,nodefs.available<15%,imagefs.available<15%"
- "eviction-soft-grace-period=memory.available=1m,nodefs.available=2m,imagefs.available=2m"
- "eviction-max-pod-grace-period=60"
EOF
changed=0
if [ ! -f "${config_file}" ] || ! cmp -s "${tmp_file}" "${config_file}"; then
mkdir -p "${config_dir}"
install -m 0644 "${tmp_file}" "${config_file}"
changed=1
fi
rm -f "${tmp_file}"
kubelet_tmp_file="$(mktemp)"
cat > "${kubelet_tmp_file}" <<'EOF'
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
systemReserved:
cpu: 250m
memory: 384Mi
ephemeral-storage: 1Gi
kubeReserved:
cpu: 150m
memory: 256Mi
ephemeral-storage: 1Gi
evictionHard:
memory.available: 512Mi
nodefs.available: 10%
imagefs.available: 10%
evictionSoft:
memory.available: 768Mi
nodefs.available: 15%
imagefs.available: 15%
evictionSoftGracePeriod:
memory.available: 1m
nodefs.available: 2m
imagefs.available: 2m
evictionMaxPodGracePeriod: 60
EOF
if [ ! -f "${kubelet_config_file}" ] || ! cmp -s "${kubelet_tmp_file}" "${kubelet_config_file}"; then
mkdir -p "${kubelet_config_dir}"
install -m 0644 "${kubelet_tmp_file}" "${kubelet_config_file}"
changed=1
fi
rm -f "${kubelet_tmp_file}"
if [ "${changed}" -eq 1 ]; then
delay="$(( (RANDOM % 420) + 30 ))"
echo "updated RPi kubelet reservations; restarting ${unit} after ${delay}s"
sleep "${delay}"
chroot "${host_root}" /bin/systemctl daemon-reload
chroot "${host_root}" /bin/systemctl restart "${unit}"
else
echo "${config_file} already up to date"
fi
sleep infinity