diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml index 5ff09a17..f99efd65 100644 --- a/services/maintenance/kustomization.yaml +++ b/services/maintenance/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml + - pi-usb-scratch-configmap.yaml - image.yaml - secretproviderclass.yaml - metis-configmap.yaml @@ -21,7 +22,9 @@ resources: - metis-rbac.yaml - metis-token-sync-serviceaccount.yaml - node-nofile-serviceaccount.yaml + - pi-usb-scratch-serviceaccount.yaml - pod-cleaner-rbac.yaml + - pi-usb-scratch-rbac.yaml - ariadne-deployment.yaml - metis-deployment.yaml - soteria-deployment.yaml @@ -31,6 +34,7 @@ resources: - disable-k3s-traefik-daemonset.yaml - oneoffs/k3s-traefik-cleanup-job.yaml - node-nofile-daemonset.yaml + - pi-usb-scratch-daemonset.yaml - metis-sentinel-amd64-daemonset.yaml - metis-sentinel-arm64-daemonset.yaml - k3s-agent-restart-daemonset.yaml @@ -72,3 +76,9 @@ configMapGenerator: - node_image_sweeper.sh=scripts/node_image_sweeper.sh options: disableNameSuffixHash: true + - name: pi-usb-scratch-script + namespace: maintenance + files: + - pi_usb_scratch.sh=scripts/pi_usb_scratch.sh + options: + disableNameSuffixHash: true diff --git a/services/maintenance/pi-usb-scratch-configmap.yaml b/services/maintenance/pi-usb-scratch-configmap.yaml new file mode 100644 index 00000000..18fc6712 --- /dev/null +++ b/services/maintenance/pi-usb-scratch-configmap.yaml @@ -0,0 +1,35 @@ +# services/maintenance/pi-usb-scratch-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: pi-usb-scratch-config + namespace: maintenance +data: + usb_scratch.env: | + USB_SCRATCH_DEFAULT_ENABLED=true + USB_SCRATCH_DEFAULT_LABEL=atlas-scratch + USB_SCRATCH_DEFAULT_FSTYPE=ext4 + USB_SCRATCH_MOUNTPOINT=/mnt/usb-scratch + USB_SCRATCH_REQUIRED_FREE_GIB=20 + USB_SCRATCH_RECONCILE_INTERVAL_SEC=900 + USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=900 + usb_scratch_inventory.tsv: | + # node_name enabled match_kind match_value fstype + # match_kind: uuid | label | device + # Prefer UUID entries for the first rollout. A shared label works too if every Pi USB stick is formatted consistently. + # Example: + # titan-04 true uuid 11111111-2222-3333-4444-555555555555 ext4 + # titan-05 true label atlas-scratch ext4 + # titan-06 true label atlas-scratch ext4 + # titan-07 true label atlas-scratch ext4 + # titan-08 true label atlas-scratch ext4 + # titan-09 true label atlas-scratch ext4 + # titan-10 true label atlas-scratch ext4 + # titan-11 true label atlas-scratch ext4 + # titan-12 true label atlas-scratch ext4 + # titan-13 true label atlas-scratch ext4 + # titan-14 true label atlas-scratch ext4 + # titan-15 true label atlas-scratch ext4 + # titan-17 true label atlas-scratch ext4 + # titan-18 true label atlas-scratch ext4 + # titan-19 true label atlas-scratch ext4 diff --git a/services/maintenance/pi-usb-scratch-daemonset.yaml b/services/maintenance/pi-usb-scratch-daemonset.yaml new file mode 100644 index 00000000..26fa7cf1 --- /dev/null +++ b/services/maintenance/pi-usb-scratch-daemonset.yaml @@ -0,0 +1,68 @@ +# services/maintenance/pi-usb-scratch-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: pi-usb-scratch + namespace: maintenance +spec: + selector: + matchLabels: + app: pi-usb-scratch + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + app: pi-usb-scratch + spec: + serviceAccountName: pi-usb-scratch + hostPID: true + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi4 + - rpi5 + containers: + - name: pi-usb-scratch + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/pi_usb_scratch.sh"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + - name: config + mountPath: /config + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: pi-usb-scratch-script + defaultMode: 0555 + - name: config + configMap: + name: pi-usb-scratch-config + defaultMode: 0444 diff --git a/services/maintenance/pi-usb-scratch-rbac.yaml b/services/maintenance/pi-usb-scratch-rbac.yaml new file mode 100644 index 00000000..97b9cd9a --- /dev/null +++ b/services/maintenance/pi-usb-scratch-rbac.yaml @@ -0,0 +1,26 @@ +# services/maintenance/pi-usb-scratch-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: pi-usb-scratch +rules: + - apiGroups: [""] + resources: + - nodes + verbs: + - get + - list + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: pi-usb-scratch +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pi-usb-scratch +subjects: + - kind: ServiceAccount + name: pi-usb-scratch + namespace: maintenance diff --git a/services/maintenance/pi-usb-scratch-serviceaccount.yaml b/services/maintenance/pi-usb-scratch-serviceaccount.yaml new file mode 100644 index 00000000..7ac84fb4 --- /dev/null +++ b/services/maintenance/pi-usb-scratch-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/maintenance/pi-usb-scratch-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pi-usb-scratch + namespace: maintenance diff --git a/services/maintenance/scripts/pi_usb_scratch.sh b/services/maintenance/scripts/pi_usb_scratch.sh new file mode 100755 index 00000000..fad30339 --- /dev/null +++ b/services/maintenance/scripts/pi_usb_scratch.sh @@ -0,0 +1,409 @@ +#!/usr/bin/env bash +set -euo pipefail + +NODE_NAME=${NODE_NAME:?NODE_NAME is required} +HOST_ROOT=${HOST_ROOT:-/host} +CONFIG_ENV=${CONFIG_ENV:-/config/usb_scratch.env} +INVENTORY_FILE=${INVENTORY_FILE:-/config/usb_scratch_inventory.tsv} +FSTAB_PATH="${HOST_ROOT}/etc/fstab" +STATE_DIR="${HOST_ROOT}/var/lib/maintenance/pi-usb-scratch" +MANAGED_BEGIN="# BEGIN maintenance.bstein.dev usb-scratch" +MANAGED_END="# END maintenance.bstein.dev usb-scratch" +ONE_SHOT=${ONE_SHOT:-false} +DEFAULT_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + +USB_SCRATCH_DEFAULT_ENABLED=${USB_SCRATCH_DEFAULT_ENABLED:-true} +USB_SCRATCH_DEFAULT_LABEL=${USB_SCRATCH_DEFAULT_LABEL:-atlas-scratch} +USB_SCRATCH_DEFAULT_FSTYPE=${USB_SCRATCH_DEFAULT_FSTYPE:-ext4} +USB_SCRATCH_MOUNTPOINT=${USB_SCRATCH_MOUNTPOINT:-/mnt/usb-scratch} +USB_SCRATCH_REQUIRED_FREE_GIB=${USB_SCRATCH_REQUIRED_FREE_GIB:-20} +USB_SCRATCH_RECONCILE_INTERVAL_SEC=${USB_SCRATCH_RECONCILE_INTERVAL_SEC:-900} +USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC:-900} + +TARGET_PATHS=( + "/var/log/pods" + "/var/log/containers" + "/var/lib/rancher/k3s/agent/containerd" + "/var/lib/rancher/k3s/agent/images" +) + +agent_stopped=0 + +log() { + printf '[%s] %s\n' "$(date -u +%FT%TZ)" "$*" +} + +sanitize_annotation_value() { + printf '%s' "$1" | tr ' ' '_' | tr -cd '[:alnum:]._:/=-' +} + +annotate_node() { + local status="$1" + local detail="$2" + local selector="$3" + local timestamp + timestamp="$(date -u +%FT%TZ)" + kubectl annotate --overwrite node "${NODE_NAME}" \ + maintenance.bstein.dev/usb-scratch-status="$(sanitize_annotation_value "${status}")" \ + maintenance.bstein.dev/usb-scratch-detail="$(sanitize_annotation_value "${detail}")" \ + maintenance.bstein.dev/usb-scratch-selector="$(sanitize_annotation_value "${selector}")" \ + maintenance.bstein.dev/usb-scratch-mountpoint="$(sanitize_annotation_value "${USB_SCRATCH_MOUNTPOINT}")" \ + maintenance.bstein.dev/usb-scratch-managed-paths="$(sanitize_annotation_value "${TARGET_PATHS[*]}")" \ + maintenance.bstein.dev/usb-scratch-last-apply="${timestamp}" \ + >/dev/null 2>&1 || true +} + +host_sh() { + local command="$1" + if command -v nsenter >/dev/null 2>&1; then + nsenter -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" + elif [ -x "${HOST_ROOT}/usr/bin/nsenter" ]; then + "${HOST_ROOT}/usr/bin/nsenter" -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" + elif [ -x "${HOST_ROOT}/bin/nsenter" ]; then + "${HOST_ROOT}/bin/nsenter" -t 1 -m -u -i -n -p -- /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" + else + chroot "${HOST_ROOT}" /bin/sh -ceu "PATH=${DEFAULT_PATH}; ${command}" + fi +} + +cleanup() { + if [ "${agent_stopped}" -eq 1 ]; then + log "starting k3s-agent after interrupted cutover" + host_sh "systemctl start k3s-agent || true" + agent_stopped=0 + fi +} +trap cleanup EXIT + +load_config() { + if [ -f "${CONFIG_ENV}" ]; then + # shellcheck disable=SC1090 + . "${CONFIG_ENV}" + fi + + USB_SCRATCH_DEFAULT_ENABLED=${USB_SCRATCH_DEFAULT_ENABLED:-true} + USB_SCRATCH_DEFAULT_LABEL=${USB_SCRATCH_DEFAULT_LABEL:-atlas-scratch} + USB_SCRATCH_DEFAULT_FSTYPE=${USB_SCRATCH_DEFAULT_FSTYPE:-ext4} + USB_SCRATCH_MOUNTPOINT=${USB_SCRATCH_MOUNTPOINT:-/mnt/usb-scratch} + USB_SCRATCH_REQUIRED_FREE_GIB=${USB_SCRATCH_REQUIRED_FREE_GIB:-20} + USB_SCRATCH_RECONCILE_INTERVAL_SEC=${USB_SCRATCH_RECONCILE_INTERVAL_SEC:-900} + USB_SCRATCH_CUTOVER_JITTER_MAX_SEC=${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC:-900} +} + +lookup_inventory() { + local line="" + if [ -f "${INVENTORY_FILE}" ]; then + line="$(awk -v node="${NODE_NAME}" 'NF >= 4 && $1 !~ /^#/ && $1 == node { print; exit }' "${INVENTORY_FILE}" || true)" + fi + printf '%s' "${line}" +} + +strip_managed_block() { + local source_file="$1" + awk -v begin="${MANAGED_BEGIN}" -v end="${MANAGED_END}" ' + $0 == begin { skip=1; next } + $0 == end { skip=0; next } + skip != 1 { print } + ' "${source_file}" +} + +ensure_fstab_block() { + local selector="$1" + local fstype="$2" + local tmp_base tmp_candidate bind_source target + + mkdir -p "${STATE_DIR}" + tmp_base="${STATE_DIR}/fstab.base" + tmp_candidate="${STATE_DIR}/fstab.candidate" + + strip_managed_block "${FSTAB_PATH}" > "${tmp_base}" + cp "${tmp_base}" "${tmp_candidate}" + + { + printf '%s\n' "${MANAGED_BEGIN}" + printf '%s %s %s defaults,nofail,noatime,lazytime,commit=60,x-systemd.device-timeout=15s,x-systemd.mount-timeout=30s 0 2\n' \ + "${selector}" "${USB_SCRATCH_MOUNTPOINT}" "${fstype}" + for target in "${TARGET_PATHS[@]}"; do + bind_source="${USB_SCRATCH_MOUNTPOINT}${target}" + printf '%s %s none bind,nofail,x-systemd.requires-mounts-for=%s 0 0\n' \ + "${bind_source}" "${target}" "${USB_SCRATCH_MOUNTPOINT}" + done + printf '%s\n' "${MANAGED_END}" + } >> "${tmp_candidate}" + + if ! cmp -s "${FSTAB_PATH}" "${tmp_candidate}"; then + cp "${tmp_candidate}" "${FSTAB_PATH}" + log "updated ${FSTAB_PATH} managed block" + return 0 + fi + + return 1 +} + +find_existing_mount_source() { + local target="$1" + host_sh "findmnt -T '${target}' -n -o SOURCE 2>/dev/null || true" +} + +resolve_selector() { + local inventory_line enabled kind value fstype actual_device actual_fstype actual_uuid actual_label selector expected_fstype + inventory_line="$(lookup_inventory)" + enabled="${USB_SCRATCH_DEFAULT_ENABLED}" + kind="" + value="" + fstype="${USB_SCRATCH_DEFAULT_FSTYPE}" + + if [ -n "${inventory_line}" ]; then + read -r _ enabled kind value fstype _ <<<"${inventory_line}" + elif [ -n "${USB_SCRATCH_DEFAULT_LABEL}" ]; then + kind="label" + value="${USB_SCRATCH_DEFAULT_LABEL}" + fi + + if [ "${enabled}" != "true" ]; then + SELECTOR_KIND="disabled" + SELECTOR_VALUE="" + SELECTOR_SPEC="" + DEVICE_PATH="" + DEVICE_FSTYPE="${fstype}" + return 0 + fi + + if [ -z "${kind}" ] || [ -z "${value}" ]; then + SELECTOR_KIND="missing" + SELECTOR_VALUE="" + SELECTOR_SPEC="" + DEVICE_PATH="" + DEVICE_FSTYPE="${fstype}" + return 0 + fi + + case "${kind}" in + uuid) + selector="UUID=${value}" + actual_device="$(host_sh "blkid -U '${value}' 2>/dev/null || true")" + ;; + label) + selector="LABEL=${value}" + actual_device="$(host_sh "blkid -L '${value}' 2>/dev/null || true")" + ;; + device) + selector="${value}" + actual_device="$(host_sh "if [ -b '${value}' ]; then printf '%s' '${value}'; fi")" + ;; + *) + SELECTOR_KIND="invalid" + SELECTOR_VALUE="${value}" + SELECTOR_SPEC="" + DEVICE_PATH="" + DEVICE_FSTYPE="${fstype}" + return 0 + ;; + esac + + actual_fstype="" + actual_uuid="" + actual_label="" + if [ -n "${actual_device}" ]; then + actual_fstype="$(host_sh "blkid -o value -s TYPE '${actual_device}' 2>/dev/null || true")" + actual_uuid="$(host_sh "blkid -o value -s UUID '${actual_device}' 2>/dev/null || true")" + actual_label="$(host_sh "blkid -o value -s LABEL '${actual_device}' 2>/dev/null || true")" + fi + + expected_fstype="${fstype:-${USB_SCRATCH_DEFAULT_FSTYPE}}" + if [ -n "${actual_fstype}" ] && [ -n "${expected_fstype}" ] && [ "${actual_fstype}" != "${expected_fstype}" ]; then + SELECTOR_KIND="fs-mismatch" + SELECTOR_VALUE="${selector}" + SELECTOR_SPEC="${selector}" + DEVICE_PATH="${actual_device}" + DEVICE_FSTYPE="${actual_fstype}" + return 0 + fi + + SELECTOR_KIND="${kind}" + SELECTOR_VALUE="${value}" + SELECTOR_SPEC="${selector}" + DEVICE_PATH="${actual_device}" + DEVICE_FSTYPE="${expected_fstype}" + DEVICE_UUID="${actual_uuid}" + DEVICE_LABEL="${actual_label}" +} + +ensure_directories() { + local target source_dir + mkdir -p "${STATE_DIR}" "${HOST_ROOT}${USB_SCRATCH_MOUNTPOINT}" + for target in "${TARGET_PATHS[@]}"; do + mkdir -p "${HOST_ROOT}${target}" "${HOST_ROOT}${USB_SCRATCH_MOUNTPOINT}${target}" + done +} + +ensure_usb_mount_live() { + local existing_source + existing_source="$(find_existing_mount_source "${USB_SCRATCH_MOUNTPOINT}")" + if [ -n "${existing_source}" ] && [ -n "${DEVICE_PATH}" ] && [ "${existing_source}" != "${DEVICE_PATH}" ] && [ "${existing_source}" != "${SELECTOR_SPEC}" ]; then + log "usb scratch already mounted from unexpected source ${existing_source}" + return 1 + fi + + host_sh "mkdir -p '${USB_SCRATCH_MOUNTPOINT}'; mountpoint -q '${USB_SCRATCH_MOUNTPOINT}' || mount '${USB_SCRATCH_MOUNTPOINT}'" + return 0 +} + +free_space_gib() { + host_sh "df -Pk '${USB_SCRATCH_MOUNTPOINT}' | awk 'NR==2 { printf \"%.0f\", \$4 / 1024 / 1024 }'" +} + +target_bound_to_scratch() { + local target="$1" + local current_source + current_source="$(find_existing_mount_source "${target}")" + [ "${current_source}" = "${USB_SCRATCH_MOUNTPOINT}${target}" ] +} + +seed_target_data() { + local target="$1" + local source_dir + source_dir="${USB_SCRATCH_MOUNTPOINT}${target}" + host_sh "mkdir -p '${source_dir}' '${target}'; if command -v rsync >/dev/null 2>&1; then rsync -aHAX --numeric-ids '${target}/' '${source_dir}/'; else tar -C '${target}' -cf - . | tar -C '${source_dir}' -xf -; fi" +} + +mount_target_live() { + local target="$1" + host_sh "mountpoint -q '${target}' || mount '${target}'" +} + +cutover_needed() { + local target + for target in "${TARGET_PATHS[@]}"; do + if ! target_bound_to_scratch "${target}"; then + return 0 + fi + done + return 1 +} + +perform_cutover() { + local jitter target + + if ! cutover_needed; then + return 0 + fi + + jitter=0 + if [ "${USB_SCRATCH_CUTOVER_JITTER_MAX_SEC}" -gt 0 ]; then + jitter=$(( RANDOM % (USB_SCRATCH_CUTOVER_JITTER_MAX_SEC + 1) )) + fi + if [ "${jitter}" -gt 0 ]; then + log "sleeping ${jitter}s before first live cutover" + sleep "${jitter}" + fi + + log "stopping k3s-agent for USB scratch cutover" + host_sh "systemctl stop k3s-agent" + agent_stopped=1 + + for target in "${TARGET_PATHS[@]}"; do + if ! target_bound_to_scratch "${target}"; then + log "seeding ${target} into ${USB_SCRATCH_MOUNTPOINT}${target}" + seed_target_data "${target}" + log "mounting bind target ${target}" + mount_target_live "${target}" + fi + done + + log "starting k3s-agent after USB scratch cutover" + host_sh "systemctl start k3s-agent" + agent_stopped=0 +} + +reconcile_once() { + local fstab_changed=false free_gib selector_detail + + load_config + resolve_selector + ensure_directories + + case "${SELECTOR_KIND}" in + disabled) + annotate_node "disabled" "inventory-disabled" "none" + log "inventory disables USB scratch on ${NODE_NAME}" + return 0 + ;; + missing) + annotate_node "pending" "missing-inventory" "none" + log "no inventory entry or default selector for ${NODE_NAME}" + return 0 + ;; + invalid) + annotate_node "error" "invalid-selector" "${SELECTOR_VALUE}" + log "invalid selector configured for ${NODE_NAME}" + return 0 + ;; + fs-mismatch) + annotate_node "error" "filesystem-mismatch" "${SELECTOR_SPEC}" + log "filesystem mismatch on ${DEVICE_PATH}: expected ${USB_SCRATCH_DEFAULT_FSTYPE}, got ${DEVICE_FSTYPE}" + return 0 + ;; + esac + + selector_detail="${SELECTOR_SPEC}" + + if ensure_fstab_block "${SELECTOR_SPEC}" "${DEVICE_FSTYPE}"; then + fstab_changed=true + host_sh "systemctl daemon-reload || true" + fi + + if [ -z "${DEVICE_PATH}" ]; then + annotate_node "pending" "device-not-found" "${selector_detail}" + log "scratch device not present yet for selector ${selector_detail}" + return 0 + fi + + if ! ensure_usb_mount_live; then + annotate_node "error" "mount-conflict" "${selector_detail}" + return 0 + fi + + free_gib="$(free_space_gib || true)" + if [ -z "${free_gib}" ]; then + annotate_node "error" "free-space-check-failed" "${selector_detail}" + return 0 + fi + + if [ "${free_gib}" -lt "${USB_SCRATCH_REQUIRED_FREE_GIB}" ]; then + annotate_node "error" "insufficient-free-space-${free_gib}Gi" "${selector_detail}" + log "usb scratch free space ${free_gib}Gi below required ${USB_SCRATCH_REQUIRED_FREE_GIB}Gi" + return 0 + fi + + if host_sh "systemctl list-unit-files | grep -q '^k3s-agent.service'"; then + perform_cutover + else + annotate_node "error" "missing-k3s-agent-service" "${selector_detail}" + log "k3s-agent.service missing on ${NODE_NAME}" + return 0 + fi + + if cutover_needed; then + annotate_node "error" "bind-mount-incomplete" "${selector_detail}" + return 0 + fi + + if [ "${fstab_changed}" = true ]; then + log "usb scratch fstab refreshed for ${NODE_NAME}" + fi + annotate_node "ready" "scratch-online-${free_gib}Gi" "${selector_detail}" + log "usb scratch ready on ${NODE_NAME} via ${selector_detail} mounted at ${USB_SCRATCH_MOUNTPOINT}" +} + +main() { + while true; do + reconcile_once || true + if [ "${ONE_SHOT}" = "true" ]; then + exit 0 + fi + sleep "${USB_SCRATCH_RECONCILE_INTERVAL_SEC}" + done +} + +main