From 616c6308b1e3c3cbb3c7ab31449e699861e07bf8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 12 Apr 2026 01:02:41 -0300 Subject: [PATCH] maintenance: remove pi-usb-scratch guard rollout --- services/maintenance/kustomization.yaml | 10 - .../maintenance/pi-usb-scratch-configmap.yaml | 51 -- .../maintenance/pi-usb-scratch-daemonset.yaml | 68 --- services/maintenance/pi-usb-scratch-rbac.yaml | 26 - .../pi-usb-scratch-serviceaccount.yaml | 6 - .../maintenance/scripts/pi_usb_scratch.sh | 572 ------------------ 6 files changed, 733 deletions(-) delete mode 100644 services/maintenance/pi-usb-scratch-configmap.yaml delete mode 100644 services/maintenance/pi-usb-scratch-daemonset.yaml delete mode 100644 services/maintenance/pi-usb-scratch-rbac.yaml delete mode 100644 services/maintenance/pi-usb-scratch-serviceaccount.yaml delete mode 100755 services/maintenance/scripts/pi_usb_scratch.sh diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml index f99efd65..5ff09a17 100644 --- a/services/maintenance/kustomization.yaml +++ b/services/maintenance/kustomization.yaml @@ -3,7 +3,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml - - pi-usb-scratch-configmap.yaml - image.yaml - secretproviderclass.yaml - metis-configmap.yaml @@ -22,9 +21,7 @@ resources: - metis-rbac.yaml - metis-token-sync-serviceaccount.yaml - node-nofile-serviceaccount.yaml - - pi-usb-scratch-serviceaccount.yaml - pod-cleaner-rbac.yaml - - pi-usb-scratch-rbac.yaml - ariadne-deployment.yaml - metis-deployment.yaml - soteria-deployment.yaml @@ -34,7 +31,6 @@ resources: - disable-k3s-traefik-daemonset.yaml - oneoffs/k3s-traefik-cleanup-job.yaml - node-nofile-daemonset.yaml - - pi-usb-scratch-daemonset.yaml - metis-sentinel-amd64-daemonset.yaml - metis-sentinel-arm64-daemonset.yaml - k3s-agent-restart-daemonset.yaml @@ -76,9 +72,3 @@ configMapGenerator: - node_image_sweeper.sh=scripts/node_image_sweeper.sh options: disableNameSuffixHash: true - - name: pi-usb-scratch-script - namespace: maintenance - files: - - pi_usb_scratch.sh=scripts/pi_usb_scratch.sh - options: - disableNameSuffixHash: true diff --git a/services/maintenance/pi-usb-scratch-configmap.yaml b/services/maintenance/pi-usb-scratch-configmap.yaml deleted file mode 100644 index 16aa2bc4..00000000 --- a/services/maintenance/pi-usb-scratch-configmap.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# services/maintenance/pi-usb-scratch-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: pi-usb-scratch-config - namespace: maintenance -data: - usb_scratch.env: | - USB_SCRATCH_DEFAULT_ENABLED=true - # Leave empty to avoid label-based fallback selection. - USB_SCRATCH_DEFAULT_LABEL= - USB_SCRATCH_DEFAULT_FSTYPE=ext4 - USB_SCRATCH_MOUNTPOINT=/mnt/astraios - # Auto-select the removable 64GB USB partition on each worker. - USB_SCRATCH_AUTO_SELECT_REMOVABLE=true - USB_SCRATCH_AUTO_MIN_SIZE_GIB=50 - # One-time bootstrap for new sticks that ship exfat/fat32. - USB_SCRATCH_AUTO_FORMAT_REMOVABLE=true - USB_SCRATCH_AUTO_FORMAT_LABEL=astraios - # Keep this false to avoid long rsync hangs on k3s runtime trees. - USB_SCRATCH_SEED_K3S_AGENT_DIRS=false - # Keep /tmp in RAM to reduce SD-card writes. - USB_SCRATCH_ENFORCE_TMPFS_TMP=true - USB_SCRATCH_REQUIRED_FREE_GIB=20 - USB_SCRATCH_RECONCILE_INTERVAL_SEC=900 - # Keep this at 0 so the initial cluster-wide cutover completes quickly. - USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=0 - usb_scratch_inventory.tsv: | - # node_name enabled match_kind match_value fstype - # match_kind: uuid | label | device - # Astraios policy: - # - use UUID entries per worker node (preferred) - # - avoid shared labels to prevent accidental wrong-device mounts - # - mountpoint is /mnt/astraios on every worker node - # Example: - # titan-04 true uuid 11111111-2222-3333-4444-555555555555 ext4 - # titan-05 true uuid ext4 - # titan-06 true uuid ext4 - # titan-07 true uuid ext4 - # titan-08 true uuid ext4 - # titan-09 true uuid ext4 - # titan-10 true uuid ext4 - # titan-11 true uuid ext4 - # titan-12 true uuid ext4 - # titan-13 true uuid ext4 - # titan-14 true uuid ext4 - # titan-15 true uuid ext4 - # titan-16 true uuid ext4 - # titan-17 true uuid ext4 - # titan-18 true uuid ext4 - # titan-19 true uuid ext4 diff --git a/services/maintenance/pi-usb-scratch-daemonset.yaml b/services/maintenance/pi-usb-scratch-daemonset.yaml deleted file mode 100644 index 26fa7cf1..00000000 --- a/services/maintenance/pi-usb-scratch-daemonset.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# services/maintenance/pi-usb-scratch-daemonset.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: pi-usb-scratch - namespace: maintenance -spec: - selector: - matchLabels: - app: pi-usb-scratch - updateStrategy: - type: RollingUpdate - rollingUpdate: - maxUnavailable: 1 - template: - metadata: - labels: - app: pi-usb-scratch - spec: - serviceAccountName: pi-usb-scratch - hostPID: true - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi4 - - rpi5 - containers: - - name: pi-usb-scratch - image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 - command: ["/usr/bin/env", "bash"] - args: ["/scripts/pi_usb_scratch.sh"] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - securityContext: - privileged: true - runAsUser: 0 - volumeMounts: - - name: host-root - mountPath: /host - - name: script - mountPath: /scripts - readOnly: true - - name: config - mountPath: /config - readOnly: true - volumes: - - name: host-root - hostPath: - path: / - - name: script - configMap: - name: pi-usb-scratch-script - defaultMode: 0555 - - name: config - configMap: - name: pi-usb-scratch-config - defaultMode: 0444 diff --git a/services/maintenance/pi-usb-scratch-rbac.yaml b/services/maintenance/pi-usb-scratch-rbac.yaml deleted file mode 100644 index 97b9cd9a..00000000 --- a/services/maintenance/pi-usb-scratch-rbac.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# services/maintenance/pi-usb-scratch-rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: pi-usb-scratch -rules: - - apiGroups: [""] - resources: - - nodes - verbs: - - get - - list - - patch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: pi-usb-scratch -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: pi-usb-scratch -subjects: - - kind: ServiceAccount - name: pi-usb-scratch - namespace: maintenance diff --git a/services/maintenance/pi-usb-scratch-serviceaccount.yaml b/services/maintenance/pi-usb-scratch-serviceaccount.yaml deleted file mode 100644 index 7ac84fb4..00000000 --- a/services/maintenance/pi-usb-scratch-serviceaccount.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# services/maintenance/pi-usb-scratch-serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: pi-usb-scratch - namespace: maintenance diff --git a/services/maintenance/scripts/pi_usb_scratch.sh b/services/maintenance/scripts/pi_usb_scratch.sh deleted file mode 100755 index c8f67814..00000000 --- a/services/maintenance/scripts/pi_usb_scratch.sh +++ /dev/null @@ -1,572 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -NODE_NAME=${NODE_NAME:?NODE_NAME is required} -HOST_ROOT=${HOST_ROOT:-/host} -CONFIG_ENV=${CONFIG_ENV:-/config/usb_scratch.env} -INVENTORY_FILE=${INVENTORY_FILE:-/config/usb_scratch_inventory.tsv} -FSTAB_PATH="${HOST_ROOT}/etc/fstab" -STATE_DIR="${HOST_ROOT}/var/lib/maintenance/pi-usb-scratch" -MANAGED_BEGIN="# BEGIN maintenance.bstein.dev usb-scratch" -MANAGED_END="# END maintenance.bstein.dev usb-scratch" -ONE_SHOT=${ONE_SHOT:-false} -DEFAULT_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - -USB_SCRATCH_DEFAULT_ENABLED=${USB_SCRATCH_DEFAULT_ENABLED:-true} -USB_SCRATCH_DEFAULT_LABEL=${USB_SCRATCH_DEFAULT_LABEL:-} -USB_SCRATCH_DEFAULT_FSTYPE=${USB_SCRATCH_DEFAULT_FSTYPE:-ext4} -USB_SCRATCH_MOUNTPOINT=${USB_SCRATCH_MOUNTPOINT:-/mnt/astraios} -USB_SCRATCH_ENFORCE_TMPFS_TMP=${USB_SCRATCH_ENFORCE_TMPFS_TMP:-true} -USB_SCRATCH_AUTO_SELECT_REMOVABLE=${USB_SCRATCH_AUTO_SELECT_REMOVABLE:-true} -USB_SCRATCH_AUTO_MIN_SIZE_GIB=${USB_SCRATCH_AUTO_MIN_SIZE_GIB:-50} -USB_SCRATCH_AUTO_FORMAT_REMOVABLE=${USB_SCRATCH_AUTO_FORMAT_REMOVABLE:-true} -USB_SCRATCH_AUTO_FORMAT_LABEL=${USB_SCRATCH_AUTO_FORMAT_LABEL:-astraios} -USB_SCRATCH_SEED_K3S_AGENT_DIRS=${USB_SCRATCH_SEED_K3S_AGENT_DIRS:-false} -USB_SCRATCH_REQUIRED_FREE_GIB=${USB_SCRATCH_REQUIRED_FREE_GIB:-20} -USB_SCRATCH_RECONCILE_INTERVAL_SEC=${USB_SCRATCH_RECONCILE_INTERVAL_SEC:-900} -USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC:-900} - -TARGET_PATHS=( - "/var/log/pods" - "/var/log/containers" - "/var/lib/rancher/k3s/agent/containerd" - "/var/lib/rancher/k3s/agent/kubelet" - "/var/lib/rancher/k3s/agent/images" - "/var/tmp" -) - -agent_stopped=0 - -log() { - printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*" -} - -sanitize_annotation_value() { - printf '%s' "$1" | tr ' ' '_' | tr -cd '[:alnum:]._:/=-' -} - -annotate_node() { - local status="$1" - local detail="$2" - local selector="$3" - local timestamp - timestamp="$(date -u +%FT%TZ)" - kubectl annotate --overwrite node "${NODE_NAME}" \ - maintenance.bstein.dev/astraios-status="$(sanitize_annotation_value "${status}")" \ - maintenance.bstein.dev/astraios-detail="$(sanitize_annotation_value "${detail}")" \ - maintenance.bstein.dev/astraios-selector="$(sanitize_annotation_value "${selector}")" \ - maintenance.bstein.dev/astraios-mountpoint="$(sanitize_annotation_value "${USB_SCRATCH_MOUNTPOINT}")" \ - maintenance.bstein.dev/astraios-managed-paths="$(sanitize_annotation_value "${TARGET_PATHS[*]}")" \ - maintenance.bstein.dev/astraios-last-apply="${timestamp}" \ - maintenance.bstein.dev/usb-scratch-status="$(sanitize_annotation_value "${status}")" \ - maintenance.bstein.dev/usb-scratch-detail="$(sanitize_annotation_value "${detail}")" \ - maintenance.bstein.dev/usb-scratch-selector="$(sanitize_annotation_value "${selector}")" \ - maintenance.bstein.dev/usb-scratch-mountpoint="$(sanitize_annotation_value "${USB_SCRATCH_MOUNTPOINT}")" \ - maintenance.bstein.dev/usb-scratch-managed-paths="$(sanitize_annotation_value "${TARGET_PATHS[*]}")" \ - maintenance.bstein.dev/usb-scratch-last-apply="${timestamp}" \ - >/dev/null 2>&1 || true -} - -host_sh() { - local command="$1" - if command -v nsenter >/dev/null 2>&1; then - nsenter -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" - elif [ -x "${HOST_ROOT}/usr/bin/nsenter" ]; then - "${HOST_ROOT}/usr/bin/nsenter" -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" - elif [ -x "${HOST_ROOT}/bin/nsenter" ]; then - "${HOST_ROOT}/bin/nsenter" -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" - else - chroot "${HOST_ROOT}" /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" - fi -} - -cleanup() { - if [ "${agent_stopped}" -eq 1 ]; then - log "starting k3s-agent after interrupted cutover" - host_sh "systemctl start k3s-agent || true" - agent_stopped=0 - fi -} -trap cleanup EXIT - -load_config() { - if [ -f "${CONFIG_ENV}" ]; then - # shellcheck disable=SC1090 - . "${CONFIG_ENV}" - fi - - USB_SCRATCH_DEFAULT_ENABLED=${USB_SCRATCH_DEFAULT_ENABLED:-true} - USB_SCRATCH_DEFAULT_LABEL=${USB_SCRATCH_DEFAULT_LABEL:-} - USB_SCRATCH_DEFAULT_FSTYPE=${USB_SCRATCH_DEFAULT_FSTYPE:-ext4} - USB_SCRATCH_MOUNTPOINT=${USB_SCRATCH_MOUNTPOINT:-/mnt/astraios} - USB_SCRATCH_ENFORCE_TMPFS_TMP=${USB_SCRATCH_ENFORCE_TMPFS_TMP:-true} - USB_SCRATCH_AUTO_SELECT_REMOVABLE=${USB_SCRATCH_AUTO_SELECT_REMOVABLE:-true} - USB_SCRATCH_AUTO_MIN_SIZE_GIB=${USB_SCRATCH_AUTO_MIN_SIZE_GIB:-50} - USB_SCRATCH_AUTO_FORMAT_REMOVABLE=${USB_SCRATCH_AUTO_FORMAT_REMOVABLE:-true} - USB_SCRATCH_AUTO_FORMAT_LABEL=${USB_SCRATCH_AUTO_FORMAT_LABEL:-astraios} - USB_SCRATCH_SEED_K3S_AGENT_DIRS=${USB_SCRATCH_SEED_K3S_AGENT_DIRS:-false} - USB_SCRATCH_REQUIRED_FREE_GIB=${USB_SCRATCH_REQUIRED_FREE_GIB:-20} - USB_SCRATCH_RECONCILE_INTERVAL_SEC=${USB_SCRATCH_RECONCILE_INTERVAL_SEC:-900} - USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC:-900} -} - -lookup_inventory() { - local line="" - if [ -f "${INVENTORY_FILE}" ]; then - line="$(awk -v node="${NODE_NAME}" 'NF >= 4 && $1 !~ /^#/ && $1 == node { print; exit }' "${INVENTORY_FILE}" || true)" - fi - printf '%s' "${line}" -} - -strip_managed_block() { - local source_file="$1" - awk -v begin="${MANAGED_BEGIN}" -v end="${MANAGED_END}" ' - $0 == begin { skip=1; next } - $0 == end { skip=0; next } - skip != 1 { print } - ' "${source_file}" -} - -ensure_fstab_block() { - local selector="$1" - local fstype="$2" - local tmp_base tmp_candidate bind_source target - - mkdir -p "${STATE_DIR}" - tmp_base="${STATE_DIR}/fstab.base" - tmp_candidate="${STATE_DIR}/fstab.candidate" - - strip_managed_block "${FSTAB_PATH}" > "${tmp_base}" - if [ "${USB_SCRATCH_ENFORCE_TMPFS_TMP}" = "true" ]; then - awk '$1 ~ /^#/ || $2 != "/tmp" { print }' "${tmp_base}" > "${tmp_base}.tmpfs" - mv "${tmp_base}.tmpfs" "${tmp_base}" - fi - cp "${tmp_base}" "${tmp_candidate}" - - { - printf '%s\n' "${MANAGED_BEGIN}" - printf '%s %s %s defaults,noatime,lazytime,commit=60,x-systemd.device-timeout=15s,x-systemd.mount-timeout=30s 0 2\n' \ - "${selector}" "${USB_SCRATCH_MOUNTPOINT}" "${fstype}" - if [ "${USB_SCRATCH_ENFORCE_TMPFS_TMP}" = "true" ]; then - printf '%s\n' 'tmpfs /tmp tmpfs defaults,nosuid,nodev,mode=1777 0 0' - fi - for target in "${TARGET_PATHS[@]}"; do - bind_source="${USB_SCRATCH_MOUNTPOINT}${target}" - printf '%s %s none bind,x-systemd.requires-mounts-for=%s 0 0\n' \ - "${bind_source}" "${target}" "${USB_SCRATCH_MOUNTPOINT}" - done - printf '%s\n' "${MANAGED_END}" - } >> "${tmp_candidate}" - - if ! cmp -s "${FSTAB_PATH}" "${tmp_candidate}"; then - cp "${tmp_candidate}" "${FSTAB_PATH}" - log "updated ${FSTAB_PATH} managed block" - return 0 - fi - - return 1 -} - -ensure_k3s_agent_guard() { - local dropin_dir dropin_file guard_dir guard_file target verify_cmd requires_mounts - local tmp_dropin - - dropin_dir="${HOST_ROOT}/etc/systemd/system/k3s-agent.service.d" - dropin_file="${dropin_dir}/20-astraios-guard.conf" - guard_dir="${HOST_ROOT}/usr/local/lib/maintenance" - guard_file="${guard_dir}/verify_astraios_mounts.sh" - tmp_dropin="${STATE_DIR}/k3s-agent-astraios-dropin.conf" - - mkdir -p "${dropin_dir}" "${guard_dir}" "${STATE_DIR}" - - cat > "${guard_file}" <> "${guard_file}" </dev/null || true) -if [[ "\${src}" != '${USB_SCRATCH_MOUNTPOINT}${target}' ]]; then - echo "astraios guard: ${target} is not bound to ${USB_SCRATCH_MOUNTPOINT}${target}" >&2 - exit 1 -fi -EOF - done - chmod 0755 "${guard_file}" - - requires_mounts="${USB_SCRATCH_MOUNTPOINT}" - for target in "${TARGET_PATHS[@]}"; do - requires_mounts="${requires_mounts} ${target}" - done - verify_cmd="${guard_file#${HOST_ROOT}}" - - cat > "${tmp_dropin}" </dev/null || true")" - [ "${fstype}" = "tmpfs" ] -} - -ensure_tmp_tmpfs_live() { - host_sh "mkdir -p /tmp; chmod 1777 /tmp; fstype=\$(findmnt -T /tmp -n -o FSTYPE 2>/dev/null || true); if [ \"\${fstype}\" != \"tmpfs\" ]; then mount /tmp 2>/dev/null || mount -t tmpfs -o defaults,nosuid,nodev,mode=1777 tmpfs /tmp; fi" -} - -find_existing_mount_source() { - local target="$1" - host_sh "if mountpoint -q '${target}'; then findmnt -T '${target}' -n -o SOURCE 2>/dev/null || true; fi" -} - -auto_discover_removable_partition() { - local min_bytes - min_bytes=$(( USB_SCRATCH_AUTO_MIN_SIZE_GIB * 1024 * 1024 * 1024 )) - host_sh "lsblk -brnpo NAME,TYPE,SIZE,RM | awk '\$2==\"part\" && \$4==\"1\" && \$3>=${min_bytes} {print \$1; exit}'" -} - -format_device_ext4() { - local device="$1" - local label="$2" - host_sh "mountpoint=\$(findmnt -S '${device}' -n -o TARGET 2>/dev/null || true); if [ -n \"\${mountpoint}\" ]; then umount \"\${mountpoint}\"; fi; wipefs -a '${device}'; mkfs.ext4 -F -L '${label}' '${device}'" -} - -resolve_selector() { - local inventory_line enabled kind value fstype actual_device actual_fstype actual_uuid actual_label selector expected_fstype - inventory_line="$(lookup_inventory)" - enabled="${USB_SCRATCH_DEFAULT_ENABLED}" - kind="" - value="" - fstype="${USB_SCRATCH_DEFAULT_FSTYPE}" - - if [ -n "${inventory_line}" ]; then - read -r _ enabled kind value fstype _ <<<"${inventory_line}" - elif [ -n "${USB_SCRATCH_DEFAULT_LABEL}" ]; then - kind="label" - value="${USB_SCRATCH_DEFAULT_LABEL}" - elif [ "${USB_SCRATCH_AUTO_SELECT_REMOVABLE}" = "true" ]; then - kind="auto" - value="removable-${USB_SCRATCH_AUTO_MIN_SIZE_GIB}Gi-plus" - fi - - if [ "${enabled}" != "true" ]; then - SELECTOR_KIND="disabled" - SELECTOR_VALUE="" - SELECTOR_SPEC="" - DEVICE_PATH="" - DEVICE_FSTYPE="${fstype}" - return 0 - fi - - if [ -z "${kind}" ] || [ -z "${value}" ]; then - SELECTOR_KIND="missing" - SELECTOR_VALUE="" - SELECTOR_SPEC="" - DEVICE_PATH="" - DEVICE_FSTYPE="${fstype}" - return 0 - fi - - case "${kind}" in - uuid) - selector="UUID=${value}" - actual_device="$(host_sh "blkid -U '${value}' 2>/dev/null || true")" - ;; - label) - selector="LABEL=${value}" - actual_device="$(host_sh "blkid -L '${value}' 2>/dev/null || true")" - ;; - device) - selector="${value}" - actual_device="$(host_sh "if [ -b '${value}' ]; then printf '%s' '${value}'; fi")" - ;; - auto) - actual_device="$(auto_discover_removable_partition)" - selector="${actual_device}" - ;; - *) - SELECTOR_KIND="invalid" - SELECTOR_VALUE="${value}" - SELECTOR_SPEC="" - DEVICE_PATH="" - DEVICE_FSTYPE="${fstype}" - return 0 - ;; - esac - - actual_fstype="" - actual_uuid="" - actual_label="" - if [ "${kind}" = "auto" ] && [ -z "${actual_device}" ]; then - SELECTOR_KIND="missing" - SELECTOR_VALUE="${value}" - SELECTOR_SPEC="" - DEVICE_PATH="" - DEVICE_FSTYPE="${fstype}" - SELECTOR_MATCH_KIND="${kind}" - return 0 - fi - if [ -n "${actual_device}" ]; then - actual_fstype="$(host_sh "blkid -o value -s TYPE '${actual_device}' 2>/dev/null || true")" - actual_uuid="$(host_sh "blkid -o value -s UUID '${actual_device}' 2>/dev/null || true")" - actual_label="$(host_sh "blkid -o value -s LABEL '${actual_device}' 2>/dev/null || true")" - fi - if [ "${kind}" = "auto" ] && [ -n "${actual_uuid}" ]; then - selector="UUID=${actual_uuid}" - fi - - expected_fstype="${fstype:-${USB_SCRATCH_DEFAULT_FSTYPE}}" - SELECTOR_MATCH_KIND="${kind}" - if [ -n "${actual_fstype}" ] && [ -n "${expected_fstype}" ] && [ "${actual_fstype}" != "${expected_fstype}" ]; then - SELECTOR_KIND="fs-mismatch" - SELECTOR_VALUE="${selector}" - SELECTOR_SPEC="${selector}" - DEVICE_PATH="${actual_device}" - DEVICE_FSTYPE="${actual_fstype}" - return 0 - fi - - SELECTOR_KIND="${kind}" - SELECTOR_VALUE="${value}" - SELECTOR_SPEC="${selector}" - DEVICE_PATH="${actual_device}" - DEVICE_FSTYPE="${expected_fstype}" - DEVICE_UUID="${actual_uuid}" - DEVICE_LABEL="${actual_label}" -} - -ensure_directories() { - local target source_dir - mkdir -p "${STATE_DIR}" "${HOST_ROOT}${USB_SCRATCH_MOUNTPOINT}" - for target in "${TARGET_PATHS[@]}"; do - mkdir -p "${HOST_ROOT}${target}" "${HOST_ROOT}${USB_SCRATCH_MOUNTPOINT}${target}" - done -} - -ensure_usb_mount_live() { - local existing_source - existing_source="$(find_existing_mount_source "${USB_SCRATCH_MOUNTPOINT}")" - if [ -n "${existing_source}" ] && [ -n "${DEVICE_PATH}" ] && [ "${existing_source}" != "${DEVICE_PATH}" ] && [ "${existing_source}" != "${SELECTOR_SPEC}" ]; then - log "usb scratch already mounted from unexpected source ${existing_source}" - return 1 - fi - - host_sh "mkdir -p '${USB_SCRATCH_MOUNTPOINT}'; mountpoint -q '${USB_SCRATCH_MOUNTPOINT}' || mount '${USB_SCRATCH_MOUNTPOINT}'" - return 0 -} - -free_space_gib() { - host_sh "df -Pk '${USB_SCRATCH_MOUNTPOINT}' | awk 'NR==2 { printf \"%.0f\", \$4 / 1024 / 1024 }'" -} - -target_bound_to_scratch() { - local target="$1" - local current_source - current_source="$(find_existing_mount_source "${target}")" - [ "${current_source}" = "${USB_SCRATCH_MOUNTPOINT}${target}" ] -} - -seed_target_data() { - local target="$1" - local source_dir - source_dir="${USB_SCRATCH_MOUNTPOINT}${target}" - host_sh "mkdir -p '${source_dir}' '${target}'; if command -v rsync >/dev/null 2>&1; then rsync -aHAX --numeric-ids '${target}/' '${source_dir}/'; else tar -C '${target}' -cf - . | tar -C '${source_dir}' -xf -; fi" -} - -should_seed_target() { - local target="$1" - case "${target}" in - /var/lib/rancher/k3s/agent/containerd|/var/lib/rancher/k3s/agent/kubelet|/var/lib/rancher/k3s/agent/images) - [ "${USB_SCRATCH_SEED_K3S_AGENT_DIRS}" = "true" ] - return - ;; - esac - return 0 -} - -mount_target_live() { - local target="$1" - host_sh "mountpoint -q '${target}' || mount '${target}'" -} - -cutover_needed() { - local target - for target in "${TARGET_PATHS[@]}"; do - if ! target_bound_to_scratch "${target}"; then - return 0 - fi - done - return 1 -} - -perform_cutover() { - local jitter target - - if ! cutover_needed; then - return 0 - fi - - jitter=0 - if [ "${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC}" -gt 0 ]; then - jitter=$(( RANDOM % (USB_SCRATCH_CUTOVER_JITTER_MAX_SEC + 1) )) - fi - if [ "${jitter}" -gt 0 ]; then - log "sleeping ${jitter}s before first live cutover" - sleep "${jitter}" - fi - - log "stopping k3s-agent for Astraios cutover" - host_sh "systemctl stop k3s-agent" - agent_stopped=1 - - for target in "${TARGET_PATHS[@]}"; do - if ! target_bound_to_scratch "${target}"; then - if should_seed_target "${target}"; then - log "seeding ${target} into ${USB_SCRATCH_MOUNTPOINT}${target}" - seed_target_data "${target}" - else - log "skipping seed for ${target}; binding fresh Astraios path" - fi - log "mounting bind target ${target}" - mount_target_live "${target}" - fi - done - - log "starting k3s-agent after Astraios cutover" - host_sh "systemctl start k3s-agent" - agent_stopped=0 -} - -reconcile_once() { - local fstab_changed=false guard_changed=false free_gib selector_detail tmp_detail - - load_config - resolve_selector - ensure_directories - - case "${SELECTOR_KIND}" in - disabled) - annotate_node "disabled" "inventory-disabled" "none" - log "inventory disables Astraios on ${NODE_NAME}" - return 0 - ;; - missing) - annotate_node "pending" "missing-inventory" "none" - log "no inventory entry or default selector for ${NODE_NAME}" - return 0 - ;; - invalid) - annotate_node "error" "invalid-selector" "${SELECTOR_VALUE}" - log "invalid selector configured for ${NODE_NAME}" - return 0 - ;; - fs-mismatch) - if [ "${USB_SCRATCH_AUTO_FORMAT_REMOVABLE}" = "true" ] && [ "${SELECTOR_MATCH_KIND:-}" = "auto" ] && [ -n "${DEVICE_PATH}" ]; then - log "formatting auto-discovered device ${DEVICE_PATH} as ext4 label=${USB_SCRATCH_AUTO_FORMAT_LABEL}" - if format_device_ext4 "${DEVICE_PATH}" "${USB_SCRATCH_AUTO_FORMAT_LABEL}"; then - resolve_selector - fi - fi - if [ "${SELECTOR_KIND}" = "fs-mismatch" ]; then - annotate_node "error" "filesystem-mismatch" "${SELECTOR_SPEC}" - log "filesystem mismatch on ${DEVICE_PATH}: expected ${USB_SCRATCH_DEFAULT_FSTYPE}, got ${DEVICE_FSTYPE}" - return 0 - fi - ;; - esac - - selector_detail="${SELECTOR_SPEC}" - - if ensure_fstab_block "${SELECTOR_SPEC}" "${DEVICE_FSTYPE}"; then - fstab_changed=true - host_sh "systemctl daemon-reload || true" - fi - - if [ -z "${DEVICE_PATH}" ]; then - annotate_node "pending" "device-not-found" "${selector_detail}" - log "Astraios device not present yet for selector ${selector_detail}" - return 0 - fi - - if ! ensure_usb_mount_live; then - annotate_node "error" "mount-conflict" "${selector_detail}" - return 0 - fi - - free_gib="$(free_space_gib || true)" - if [ -z "${free_gib}" ]; then - annotate_node "error" "free-space-check-failed" "${selector_detail}" - return 0 - fi - - if [ "${free_gib}" -lt "${USB_SCRATCH_REQUIRED_FREE_GIB}" ]; then - annotate_node "error" "insufficient-free-space-${free_gib}Gi" "${selector_detail}" - log "Astraios free space ${free_gib}Gi below required ${USB_SCRATCH_REQUIRED_FREE_GIB}Gi" - return 0 - fi - - if ensure_k3s_agent_guard; then - guard_changed=true - host_sh "systemctl daemon-reload || true" - fi - - if host_sh "systemctl list-unit-files | grep -q '^k3s-agent.service'"; then - perform_cutover - else - annotate_node "error" "missing-k3s-agent-service" "${selector_detail}" - log "k3s-agent.service missing on ${NODE_NAME}" - return 0 - fi - - if cutover_needed; then - annotate_node "error" "bind-mount-incomplete" "${selector_detail}" - return 0 - fi - - tmp_detail="tmpfs-ok" - if [ "${USB_SCRATCH_ENFORCE_TMPFS_TMP}" = "true" ]; then - if ! ensure_tmp_tmpfs_live || ! tmp_is_tmpfs; then - annotate_node "error" "tmpfs-tmp-enforce-failed" "${selector_detail}" - log "failed to enforce /tmp tmpfs on ${NODE_NAME}" - return 0 - fi - elif ! tmp_is_tmpfs; then - tmp_detail="tmp-not-tmpfs" - log "warning: /tmp is not tmpfs on ${NODE_NAME}; SD wear reduction is lower than expected" - fi - - if [ "${fstab_changed}" = true ]; then - log "Astraios fstab refreshed for ${NODE_NAME}" - fi - if [ "${guard_changed}" = true ]; then - log "k3s-agent Astraios guard refreshed for ${NODE_NAME}" - fi - annotate_node "ready" "astraios-online-${free_gib}Gi-${tmp_detail}" "${selector_detail}" - log "Astraios ready on ${NODE_NAME} via ${selector_detail} mounted at ${USB_SCRATCH_MOUNTPOINT}" -} - -main() { - while true; do - reconcile_once || true - if [ "${ONE_SHOT}" = "true" ]; then - exit 0 - fi - sleep "${USB_SCRATCH_RECONCILE_INTERVAL_SEC}" - done -} - -main