#!/usr/bin/env bash set -euo pipefail NODE_NAME=${NODE_NAME:?NODE_NAME is required} HOST_ROOT=${HOST_ROOT:-/host} CONFIG_ENV=${CONFIG_ENV:-/config/usb_scratch.env} INVENTORY_FILE=${INVENTORY_FILE:-/config/usb_scratch_inventory.tsv} FSTAB_PATH="${HOST_ROOT}/etc/fstab" STATE_DIR="${HOST_ROOT}/var/lib/maintenance/pi-usb-scratch" MANAGED_BEGIN="# BEGIN maintenance.bstein.dev usb-scratch" MANAGED_END="# END maintenance.bstein.dev usb-scratch" ONE_SHOT=${ONE_SHOT:-false} DEFAULT_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" USB_SCRATCH_DEFAULT_ENABLED=${USB_SCRATCH_DEFAULT_ENABLED:-true} USB_SCRATCH_DEFAULT_LABEL=${USB_SCRATCH_DEFAULT_LABEL:-} USB_SCRATCH_DEFAULT_FSTYPE=${USB_SCRATCH_DEFAULT_FSTYPE:-ext4} USB_SCRATCH_MOUNTPOINT=${USB_SCRATCH_MOUNTPOINT:-/mnt/astraios} USB_SCRATCH_ENFORCE_TMPFS_TMP=${USB_SCRATCH_ENFORCE_TMPFS_TMP:-true} USB_SCRATCH_AUTO_SELECT_REMOVABLE=${USB_SCRATCH_AUTO_SELECT_REMOVABLE:-true} USB_SCRATCH_AUTO_MIN_SIZE_GIB=${USB_SCRATCH_AUTO_MIN_SIZE_GIB:-50} USB_SCRATCH_AUTO_FORMAT_REMOVABLE=${USB_SCRATCH_AUTO_FORMAT_REMOVABLE:-true} USB_SCRATCH_AUTO_FORMAT_LABEL=${USB_SCRATCH_AUTO_FORMAT_LABEL:-astraios} USB_SCRATCH_SEED_K3S_AGENT_DIRS=${USB_SCRATCH_SEED_K3S_AGENT_DIRS:-false} USB_SCRATCH_REQUIRED_FREE_GIB=${USB_SCRATCH_REQUIRED_FREE_GIB:-20} USB_SCRATCH_RECONCILE_INTERVAL_SEC=${USB_SCRATCH_RECONCILE_INTERVAL_SEC:-900} USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC:-900} TARGET_PATHS=( "/var/log/pods" "/var/log/containers" "/var/lib/rancher/k3s/agent/containerd" "/var/lib/rancher/k3s/agent/kubelet" "/var/lib/rancher/k3s/agent/images" "/var/tmp" ) agent_stopped=0 log() { printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*" } sanitize_annotation_value() { printf '%s' "$1" | tr ' ' '_' | tr -cd '[:alnum:]._:/=-' } annotate_node() { local status="$1" local detail="$2" local selector="$3" local timestamp timestamp="$(date -u +%FT%TZ)" kubectl annotate --overwrite node "${NODE_NAME}" \ maintenance.bstein.dev/astraios-status="$(sanitize_annotation_value "${status}")" \ maintenance.bstein.dev/astraios-detail="$(sanitize_annotation_value "${detail}")" \ maintenance.bstein.dev/astraios-selector="$(sanitize_annotation_value "${selector}")" \ maintenance.bstein.dev/astraios-mountpoint="$(sanitize_annotation_value "${USB_SCRATCH_MOUNTPOINT}")" \ maintenance.bstein.dev/astraios-managed-paths="$(sanitize_annotation_value "${TARGET_PATHS[*]}")" \ maintenance.bstein.dev/astraios-last-apply="${timestamp}" \ maintenance.bstein.dev/usb-scratch-status="$(sanitize_annotation_value "${status}")" \ maintenance.bstein.dev/usb-scratch-detail="$(sanitize_annotation_value "${detail}")" \ maintenance.bstein.dev/usb-scratch-selector="$(sanitize_annotation_value "${selector}")" \ maintenance.bstein.dev/usb-scratch-mountpoint="$(sanitize_annotation_value "${USB_SCRATCH_MOUNTPOINT}")" \ maintenance.bstein.dev/usb-scratch-managed-paths="$(sanitize_annotation_value "${TARGET_PATHS[*]}")" \ maintenance.bstein.dev/usb-scratch-last-apply="${timestamp}" \ >/dev/null 2>&1 || true } host_sh() { local command="$1" if command -v nsenter >/dev/null 2>&1; then nsenter -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" elif [ -x "${HOST_ROOT}/usr/bin/nsenter" ]; then "${HOST_ROOT}/usr/bin/nsenter" -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" elif [ -x "${HOST_ROOT}/bin/nsenter" ]; then "${HOST_ROOT}/bin/nsenter" -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" else chroot "${HOST_ROOT}" /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" fi } cleanup() { if [ "${agent_stopped}" -eq 1 ]; then log "starting k3s-agent after interrupted cutover" host_sh "systemctl start k3s-agent || true" agent_stopped=0 fi } trap cleanup EXIT load_config() { if [ -f "${CONFIG_ENV}" ]; then # shellcheck disable=SC1090 . "${CONFIG_ENV}" fi USB_SCRATCH_DEFAULT_ENABLED=${USB_SCRATCH_DEFAULT_ENABLED:-true} USB_SCRATCH_DEFAULT_LABEL=${USB_SCRATCH_DEFAULT_LABEL:-} USB_SCRATCH_DEFAULT_FSTYPE=${USB_SCRATCH_DEFAULT_FSTYPE:-ext4} USB_SCRATCH_MOUNTPOINT=${USB_SCRATCH_MOUNTPOINT:-/mnt/astraios} USB_SCRATCH_ENFORCE_TMPFS_TMP=${USB_SCRATCH_ENFORCE_TMPFS_TMP:-true} USB_SCRATCH_AUTO_SELECT_REMOVABLE=${USB_SCRATCH_AUTO_SELECT_REMOVABLE:-true} USB_SCRATCH_AUTO_MIN_SIZE_GIB=${USB_SCRATCH_AUTO_MIN_SIZE_GIB:-50} USB_SCRATCH_AUTO_FORMAT_REMOVABLE=${USB_SCRATCH_AUTO_FORMAT_REMOVABLE:-true} USB_SCRATCH_AUTO_FORMAT_LABEL=${USB_SCRATCH_AUTO_FORMAT_LABEL:-astraios} USB_SCRATCH_SEED_K3S_AGENT_DIRS=${USB_SCRATCH_SEED_K3S_AGENT_DIRS:-false} USB_SCRATCH_REQUIRED_FREE_GIB=${USB_SCRATCH_REQUIRED_FREE_GIB:-20} USB_SCRATCH_RECONCILE_INTERVAL_SEC=${USB_SCRATCH_RECONCILE_INTERVAL_SEC:-900} USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC:-900} } lookup_inventory() { local line="" if [ -f "${INVENTORY_FILE}" ]; then line="$(awk -v node="${NODE_NAME}" 'NF >= 4 && $1 !~ /^#/ && $1 == node { print; exit }' "${INVENTORY_FILE}" || true)" fi printf '%s' "${line}" } strip_managed_block() { local source_file="$1" awk -v begin="${MANAGED_BEGIN}" -v end="${MANAGED_END}" ' $0 == begin { skip=1; next } $0 == end { skip=0; next } skip != 1 { print } ' "${source_file}" } ensure_fstab_block() { local selector="$1" local fstype="$2" local tmp_base tmp_candidate bind_source target mkdir -p "${STATE_DIR}" tmp_base="${STATE_DIR}/fstab.base" tmp_candidate="${STATE_DIR}/fstab.candidate" strip_managed_block "${FSTAB_PATH}" > "${tmp_base}" if [ "${USB_SCRATCH_ENFORCE_TMPFS_TMP}" = "true" ]; then awk '$1 ~ /^#/ || $2 != "/tmp" { print }' "${tmp_base}" > "${tmp_base}.tmpfs" mv "${tmp_base}.tmpfs" "${tmp_base}" fi cp "${tmp_base}" "${tmp_candidate}" { printf '%s\n' "${MANAGED_BEGIN}" printf '%s %s %s defaults,noatime,lazytime,commit=60,x-systemd.device-timeout=15s,x-systemd.mount-timeout=30s 0 2\n' \ "${selector}" "${USB_SCRATCH_MOUNTPOINT}" "${fstype}" if [ "${USB_SCRATCH_ENFORCE_TMPFS_TMP}" = "true" ]; then printf '%s\n' 'tmpfs /tmp tmpfs defaults,nosuid,nodev,mode=1777 0 0' fi for target in "${TARGET_PATHS[@]}"; do bind_source="${USB_SCRATCH_MOUNTPOINT}${target}" printf '%s %s none bind,x-systemd.requires-mounts-for=%s 0 0\n' \ "${bind_source}" "${target}" "${USB_SCRATCH_MOUNTPOINT}" done printf '%s\n' "${MANAGED_END}" } >> "${tmp_candidate}" if ! cmp -s "${FSTAB_PATH}" "${tmp_candidate}"; then cp "${tmp_candidate}" "${FSTAB_PATH}" log "updated ${FSTAB_PATH} managed block" return 0 fi return 1 } ensure_k3s_agent_guard() { local dropin_dir dropin_file guard_dir guard_file target verify_cmd requires_mounts local tmp_dropin dropin_dir="${HOST_ROOT}/etc/systemd/system/k3s-agent.service.d" dropin_file="${dropin_dir}/20-astraios-guard.conf" guard_dir="${HOST_ROOT}/usr/local/lib/maintenance" guard_file="${guard_dir}/verify_astraios_mounts.sh" tmp_dropin="${STATE_DIR}/k3s-agent-astraios-dropin.conf" mkdir -p "${dropin_dir}" "${guard_dir}" "${STATE_DIR}" cat > "${guard_file}" <> "${guard_file}" </dev/null || true) if [[ "\${src}" != '${USB_SCRATCH_MOUNTPOINT}${target}' ]]; then echo "astraios guard: ${target} is not bound to ${USB_SCRATCH_MOUNTPOINT}${target}" >&2 exit 1 fi EOF done chmod 0755 "${guard_file}" requires_mounts="${USB_SCRATCH_MOUNTPOINT}" for target in "${TARGET_PATHS[@]}"; do requires_mounts="${requires_mounts} ${target}" done verify_cmd="${guard_file#${HOST_ROOT}}" cat > "${tmp_dropin}" </dev/null || true")" [ "${fstype}" = "tmpfs" ] } ensure_tmp_tmpfs_live() { host_sh "mkdir -p /tmp; chmod 1777 /tmp; fstype=\$(findmnt -T /tmp -n -o FSTYPE 2>/dev/null || true); if [ \"\${fstype}\" != \"tmpfs\" ]; then mount /tmp 2>/dev/null || mount -t tmpfs -o defaults,nosuid,nodev,mode=1777 tmpfs /tmp; fi" } find_existing_mount_source() { local target="$1" host_sh "if mountpoint -q '${target}'; then findmnt -T '${target}' -n -o SOURCE 2>/dev/null || true; fi" } auto_discover_removable_partition() { local min_bytes min_bytes=$(( USB_SCRATCH_AUTO_MIN_SIZE_GIB * 1024 * 1024 * 1024 )) host_sh "lsblk -brnpo NAME,TYPE,SIZE,RM | awk '\$2==\"part\" && \$4==\"1\" && \$3>=${min_bytes} {print \$1; exit}'" } format_device_ext4() { local device="$1" local label="$2" host_sh "mountpoint=\$(findmnt -S '${device}' -n -o TARGET 2>/dev/null || true); if [ -n \"\${mountpoint}\" ]; then umount \"\${mountpoint}\"; fi; wipefs -a '${device}'; mkfs.ext4 -F -L '${label}' '${device}'" } resolve_selector() { local inventory_line enabled kind value fstype actual_device actual_fstype actual_uuid actual_label selector expected_fstype inventory_line="$(lookup_inventory)" enabled="${USB_SCRATCH_DEFAULT_ENABLED}" kind="" value="" fstype="${USB_SCRATCH_DEFAULT_FSTYPE}" if [ -n "${inventory_line}" ]; then read -r _ enabled kind value fstype _ <<<"${inventory_line}" elif [ -n "${USB_SCRATCH_DEFAULT_LABEL}" ]; then kind="label" value="${USB_SCRATCH_DEFAULT_LABEL}" elif [ "${USB_SCRATCH_AUTO_SELECT_REMOVABLE}" = "true" ]; then kind="auto" value="removable-${USB_SCRATCH_AUTO_MIN_SIZE_GIB}Gi-plus" fi if [ "${enabled}" != "true" ]; then SELECTOR_KIND="disabled" SELECTOR_VALUE="" SELECTOR_SPEC="" DEVICE_PATH="" DEVICE_FSTYPE="${fstype}" return 0 fi if [ -z "${kind}" ] || [ -z "${value}" ]; then SELECTOR_KIND="missing" SELECTOR_VALUE="" SELECTOR_SPEC="" DEVICE_PATH="" DEVICE_FSTYPE="${fstype}" return 0 fi case "${kind}" in uuid) selector="UUID=${value}" actual_device="$(host_sh "blkid -U '${value}' 2>/dev/null || true")" ;; label) selector="LABEL=${value}" actual_device="$(host_sh "blkid -L '${value}' 2>/dev/null || true")" ;; device) selector="${value}" actual_device="$(host_sh "if [ -b '${value}' ]; then printf '%s' '${value}'; fi")" ;; auto) actual_device="$(auto_discover_removable_partition)" selector="${actual_device}" ;; *) SELECTOR_KIND="invalid" SELECTOR_VALUE="${value}" SELECTOR_SPEC="" DEVICE_PATH="" DEVICE_FSTYPE="${fstype}" return 0 ;; esac actual_fstype="" actual_uuid="" actual_label="" if [ "${kind}" = "auto" ] && [ -z "${actual_device}" ]; then SELECTOR_KIND="missing" SELECTOR_VALUE="${value}" SELECTOR_SPEC="" DEVICE_PATH="" DEVICE_FSTYPE="${fstype}" SELECTOR_MATCH_KIND="${kind}" return 0 fi if [ -n "${actual_device}" ]; then actual_fstype="$(host_sh "blkid -o value -s TYPE '${actual_device}' 2>/dev/null || true")" actual_uuid="$(host_sh "blkid -o value -s UUID '${actual_device}' 2>/dev/null || true")" actual_label="$(host_sh "blkid -o value -s LABEL '${actual_device}' 2>/dev/null || true")" fi if [ "${kind}" = "auto" ] && [ -n "${actual_uuid}" ]; then selector="UUID=${actual_uuid}" fi expected_fstype="${fstype:-${USB_SCRATCH_DEFAULT_FSTYPE}}" SELECTOR_MATCH_KIND="${kind}" if [ -n "${actual_fstype}" ] && [ -n "${expected_fstype}" ] && [ "${actual_fstype}" != "${expected_fstype}" ]; then SELECTOR_KIND="fs-mismatch" SELECTOR_VALUE="${selector}" SELECTOR_SPEC="${selector}" DEVICE_PATH="${actual_device}" DEVICE_FSTYPE="${actual_fstype}" return 0 fi SELECTOR_KIND="${kind}" SELECTOR_VALUE="${value}" SELECTOR_SPEC="${selector}" DEVICE_PATH="${actual_device}" DEVICE_FSTYPE="${expected_fstype}" DEVICE_UUID="${actual_uuid}" DEVICE_LABEL="${actual_label}" } ensure_directories() { local target source_dir mkdir -p "${STATE_DIR}" "${HOST_ROOT}${USB_SCRATCH_MOUNTPOINT}" for target in "${TARGET_PATHS[@]}"; do mkdir -p "${HOST_ROOT}${target}" "${HOST_ROOT}${USB_SCRATCH_MOUNTPOINT}${target}" done } ensure_usb_mount_live() { local existing_source existing_source="$(find_existing_mount_source "${USB_SCRATCH_MOUNTPOINT}")" if [ -n "${existing_source}" ] && [ -n "${DEVICE_PATH}" ] && [ "${existing_source}" != "${DEVICE_PATH}" ] && [ "${existing_source}" != "${SELECTOR_SPEC}" ]; then log "usb scratch already mounted from unexpected source ${existing_source}" return 1 fi host_sh "mkdir -p '${USB_SCRATCH_MOUNTPOINT}'; mountpoint -q '${USB_SCRATCH_MOUNTPOINT}' || mount '${USB_SCRATCH_MOUNTPOINT}'" return 0 } free_space_gib() { host_sh "df -Pk '${USB_SCRATCH_MOUNTPOINT}' | awk 'NR==2 { printf \"%.0f\", \$4 / 1024 / 1024 }'" } target_bound_to_scratch() { local target="$1" local current_source current_source="$(find_existing_mount_source "${target}")" [ "${current_source}" = "${USB_SCRATCH_MOUNTPOINT}${target}" ] } seed_target_data() { local target="$1" local source_dir source_dir="${USB_SCRATCH_MOUNTPOINT}${target}" host_sh "mkdir -p '${source_dir}' '${target}'; if command -v rsync >/dev/null 2>&1; then rsync -aHAX --numeric-ids '${target}/' '${source_dir}/'; else tar -C '${target}' -cf - . | tar -C '${source_dir}' -xf -; fi" } should_seed_target() { local target="$1" case "${target}" in /var/lib/rancher/k3s/agent/containerd|/var/lib/rancher/k3s/agent/kubelet|/var/lib/rancher/k3s/agent/images) [ "${USB_SCRATCH_SEED_K3S_AGENT_DIRS}" = "true" ] return ;; esac return 0 } mount_target_live() { local target="$1" host_sh "mountpoint -q '${target}' || mount '${target}'" } cutover_needed() { local target for target in "${TARGET_PATHS[@]}"; do if ! target_bound_to_scratch "${target}"; then return 0 fi done return 1 } perform_cutover() { local jitter target if ! cutover_needed; then return 0 fi jitter=0 if [ "${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC}" -gt 0 ]; then jitter=$(( RANDOM % (USB_SCRATCH_CUTOVER_JITTER_MAX_SEC + 1) )) fi if [ "${jitter}" -gt 0 ]; then log "sleeping ${jitter}s before first live cutover" sleep "${jitter}" fi log "stopping k3s-agent for Astraios cutover" host_sh "systemctl stop k3s-agent" agent_stopped=1 for target in "${TARGET_PATHS[@]}"; do if ! target_bound_to_scratch "${target}"; then if should_seed_target "${target}"; then log "seeding ${target} into ${USB_SCRATCH_MOUNTPOINT}${target}" seed_target_data "${target}" else log "skipping seed for ${target}; binding fresh Astraios path" fi log "mounting bind target ${target}" mount_target_live "${target}" fi done log "starting k3s-agent after Astraios cutover" host_sh "systemctl start k3s-agent" agent_stopped=0 } reconcile_once() { local fstab_changed=false guard_changed=false free_gib selector_detail tmp_detail load_config resolve_selector ensure_directories case "${SELECTOR_KIND}" in disabled) annotate_node "disabled" "inventory-disabled" "none" log "inventory disables Astraios on ${NODE_NAME}" return 0 ;; missing) annotate_node "pending" "missing-inventory" "none" log "no inventory entry or default selector for ${NODE_NAME}" return 0 ;; invalid) annotate_node "error" "invalid-selector" "${SELECTOR_VALUE}" log "invalid selector configured for ${NODE_NAME}" return 0 ;; fs-mismatch) if [ "${USB_SCRATCH_AUTO_FORMAT_REMOVABLE}" = "true" ] && [ "${SELECTOR_MATCH_KIND:-}" = "auto" ] && [ -n "${DEVICE_PATH}" ]; then log "formatting auto-discovered device ${DEVICE_PATH} as ext4 label=${USB_SCRATCH_AUTO_FORMAT_LABEL}" if format_device_ext4 "${DEVICE_PATH}" "${USB_SCRATCH_AUTO_FORMAT_LABEL}"; then resolve_selector fi fi if [ "${SELECTOR_KIND}" = "fs-mismatch" ]; then annotate_node "error" "filesystem-mismatch" "${SELECTOR_SPEC}" log "filesystem mismatch on ${DEVICE_PATH}: expected ${USB_SCRATCH_DEFAULT_FSTYPE}, got ${DEVICE_FSTYPE}" return 0 fi ;; esac selector_detail="${SELECTOR_SPEC}" if ensure_fstab_block "${SELECTOR_SPEC}" "${DEVICE_FSTYPE}"; then fstab_changed=true host_sh "systemctl daemon-reload || true" fi if [ -z "${DEVICE_PATH}" ]; then annotate_node "pending" "device-not-found" "${selector_detail}" log "Astraios device not present yet for selector ${selector_detail}" return 0 fi if ! ensure_usb_mount_live; then annotate_node "error" "mount-conflict" "${selector_detail}" return 0 fi free_gib="$(free_space_gib || true)" if [ -z "${free_gib}" ]; then annotate_node "error" "free-space-check-failed" "${selector_detail}" return 0 fi if [ "${free_gib}" -lt "${USB_SCRATCH_REQUIRED_FREE_GIB}" ]; then annotate_node "error" "insufficient-free-space-${free_gib}Gi" "${selector_detail}" log "Astraios free space ${free_gib}Gi below required ${USB_SCRATCH_REQUIRED_FREE_GIB}Gi" return 0 fi if ensure_k3s_agent_guard; then guard_changed=true host_sh "systemctl daemon-reload || true" fi if host_sh "systemctl list-unit-files | grep -q '^k3s-agent.service'"; then perform_cutover else annotate_node "error" "missing-k3s-agent-service" "${selector_detail}" log "k3s-agent.service missing on ${NODE_NAME}" return 0 fi if cutover_needed; then annotate_node "error" "bind-mount-incomplete" "${selector_detail}" return 0 fi tmp_detail="tmpfs-ok" if [ "${USB_SCRATCH_ENFORCE_TMPFS_TMP}" = "true" ]; then if ! ensure_tmp_tmpfs_live || ! tmp_is_tmpfs; then annotate_node "error" "tmpfs-tmp-enforce-failed" "${selector_detail}" log "failed to enforce /tmp tmpfs on ${NODE_NAME}" return 0 fi elif ! tmp_is_tmpfs; then tmp_detail="tmp-not-tmpfs" log "warning: /tmp is not tmpfs on ${NODE_NAME}; SD wear reduction is lower than expected" fi if [ "${fstab_changed}" = true ]; then log "Astraios fstab refreshed for ${NODE_NAME}" fi if [ "${guard_changed}" = true ]; then log "k3s-agent Astraios guard refreshed for ${NODE_NAME}" fi annotate_node "ready" "astraios-online-${free_gib}Gi-${tmp_detail}" "${selector_detail}" log "Astraios ready on ${NODE_NAME} via ${selector_detail} mounted at ${USB_SCRATCH_MOUNTPOINT}" } main() { while true; do reconcile_once || true if [ "${ONE_SHOT}" = "true" ]; then exit 0 fi sleep "${USB_SCRATCH_RECONCILE_INTERVAL_SEC}" done } main