maintenance(titan-24): add desktop helper and rootfs sweep
This commit is contained in:
parent
fb43b02b2a
commit
c55d5ac3b5
@ -26,6 +26,9 @@ resources:
|
|||||||
- metis-deployment.yaml
|
- metis-deployment.yaml
|
||||||
- soteria-deployment.yaml
|
- soteria-deployment.yaml
|
||||||
- oneoffs/ariadne-migrate-job.yaml
|
- oneoffs/ariadne-migrate-job.yaml
|
||||||
|
- oneoffs/titan-24-rootfs-sweep-job.yaml
|
||||||
|
- oneoffs/titan-24-lesavka-desktop-helper-job.yaml
|
||||||
|
- oneoffs/titan-24-lesavka-desktop-helper-cleanup-job.yaml
|
||||||
- ariadne-service.yaml
|
- ariadne-service.yaml
|
||||||
- soteria-service.yaml
|
- soteria-service.yaml
|
||||||
- disable-k3s-traefik-daemonset.yaml
|
- disable-k3s-traefik-daemonset.yaml
|
||||||
|
|||||||
@ -0,0 +1,61 @@
|
|||||||
|
# services/maintenance/oneoffs/titan-24-lesavka-desktop-helper-cleanup-job.yaml
|
||||||
|
# One-off cleanup for the temporary Lesavka paste-test desktop on titan-24.
|
||||||
|
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: titan-24-lesavka-desktop-helper-cleanup
|
||||||
|
namespace: maintenance
|
||||||
|
annotations:
|
||||||
|
kustomize.toolkit.fluxcd.io/force: "true"
|
||||||
|
spec:
|
||||||
|
suspend: true
|
||||||
|
backoffLimit: 1
|
||||||
|
ttlSecondsAfterFinished: 3600
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: titan-24-lesavka-desktop-helper-cleanup
|
||||||
|
spec:
|
||||||
|
restartPolicy: Never
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: titan-24
|
||||||
|
tolerations:
|
||||||
|
- key: node.kubernetes.io/not-ready
|
||||||
|
operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
tolerationSeconds: 300
|
||||||
|
- key: node.kubernetes.io/unreachable
|
||||||
|
operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
tolerationSeconds: 300
|
||||||
|
hostPID: true
|
||||||
|
containers:
|
||||||
|
- name: cleanup
|
||||||
|
image: debian:13-slim
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
runAsUser: 0
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -euo pipefail
|
||||||
|
chroot /host /usr/bin/env bash <<'EOS'
|
||||||
|
set -euo pipefail
|
||||||
|
username="lesavka-test"
|
||||||
|
rm -f /etc/sddm.conf.d/60-lesavka-test-autologin.conf
|
||||||
|
if id "${username}" >/dev/null 2>&1; then
|
||||||
|
loginctl terminate-user "${username}" >/dev/null 2>&1 || true
|
||||||
|
userdel -r "${username}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
EOS
|
||||||
|
|
||||||
|
nsenter -t 1 -m -u -i -n -p -- systemctl restart sddm || \
|
||||||
|
nsenter -t 1 -m -u -i -n -p -- systemctl restart display-manager
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-root
|
||||||
|
mountPath: /host
|
||||||
|
volumes:
|
||||||
|
- name: host-root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
@ -0,0 +1,118 @@
|
|||||||
|
# services/maintenance/oneoffs/titan-24-lesavka-desktop-helper-job.yaml
|
||||||
|
# One-off job to create a temporary autologin desktop for Lesavka paste testing on titan-24.
|
||||||
|
# Safe to delete the finished Job/pod after it succeeds.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: titan-24-lesavka-desktop-helper
|
||||||
|
namespace: maintenance
|
||||||
|
annotations:
|
||||||
|
kustomize.toolkit.fluxcd.io/force: "true"
|
||||||
|
spec:
|
||||||
|
backoffLimit: 1
|
||||||
|
ttlSecondsAfterFinished: 3600
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: titan-24-lesavka-desktop-helper
|
||||||
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: titan-24
|
||||||
|
tolerations:
|
||||||
|
- key: node.kubernetes.io/not-ready
|
||||||
|
operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
tolerationSeconds: 300
|
||||||
|
- key: node.kubernetes.io/unreachable
|
||||||
|
operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
tolerationSeconds: 300
|
||||||
|
hostPID: true
|
||||||
|
containers:
|
||||||
|
- name: setup
|
||||||
|
image: debian:13-slim
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
runAsUser: 0
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -euo pipefail
|
||||||
|
chroot /host /usr/bin/env bash <<'EOS'
|
||||||
|
set -euo pipefail
|
||||||
|
username="lesavka-test"
|
||||||
|
home="/home/${username}"
|
||||||
|
session_name="plasmax11.desktop"
|
||||||
|
|
||||||
|
if ! id "${username}" >/dev/null 2>&1; then
|
||||||
|
useradd -m -s /bin/bash "${username}"
|
||||||
|
fi
|
||||||
|
passwd -l "${username}" >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
for group in audio video render input netdev plugdev; do
|
||||||
|
if getent group "${group}" >/dev/null 2>&1; then
|
||||||
|
usermod -a -G "${group}" "${username}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
install -d -m 755 /etc/sddm.conf.d
|
||||||
|
printf '%s\n' \
|
||||||
|
'[Autologin]' \
|
||||||
|
"User=${username}" \
|
||||||
|
"Session=${session_name}" \
|
||||||
|
'Relogin=false' \
|
||||||
|
>/etc/sddm.conf.d/60-lesavka-test-autologin.conf
|
||||||
|
|
||||||
|
install -d -o "${username}" -g "${username}" -m 700 \
|
||||||
|
"${home}/.config/autostart" \
|
||||||
|
"${home}/.local/bin"
|
||||||
|
install -o "${username}" -g "${username}" -m 644 /dev/null "${home}/lesavka-paste-test.txt"
|
||||||
|
|
||||||
|
printf '%s\n' \
|
||||||
|
'#!/usr/bin/env bash' \
|
||||||
|
'set -euo pipefail' \
|
||||||
|
'cd "${HOME}"' \
|
||||||
|
'touch "${HOME}/lesavka-paste-test.txt"' \
|
||||||
|
'if command -v kate >/dev/null 2>&1; then' \
|
||||||
|
' exec kate "${HOME}/lesavka-paste-test.txt"' \
|
||||||
|
'fi' \
|
||||||
|
'if command -v kwrite >/dev/null 2>&1; then' \
|
||||||
|
' exec kwrite "${HOME}/lesavka-paste-test.txt"' \
|
||||||
|
'fi' \
|
||||||
|
'if command -v gedit >/dev/null 2>&1; then' \
|
||||||
|
' exec gedit "${HOME}/lesavka-paste-test.txt"' \
|
||||||
|
'fi' \
|
||||||
|
'if command -v mousepad >/dev/null 2>&1; then' \
|
||||||
|
' exec mousepad "${HOME}/lesavka-paste-test.txt"' \
|
||||||
|
'fi' \
|
||||||
|
'if command -v xterm >/dev/null 2>&1; then' \
|
||||||
|
' exec xterm -fa Monospace -fs 14 -e sh -lc "exec ${EDITOR:-vi} '\''${HOME}/lesavka-paste-test.txt'\''"' \
|
||||||
|
'fi' \
|
||||||
|
'exit 0' \
|
||||||
|
>"${home}/.local/bin/lesavka-test-launch.sh"
|
||||||
|
chmod 755 "${home}/.local/bin/lesavka-test-launch.sh"
|
||||||
|
|
||||||
|
printf '%s\n' \
|
||||||
|
'[Desktop Entry]' \
|
||||||
|
'Type=Application' \
|
||||||
|
'Version=1.0' \
|
||||||
|
'Name=Lesavka Paste Test' \
|
||||||
|
'Comment=Open a visible editor for Lesavka clipboard testing' \
|
||||||
|
'Exec=/home/lesavka-test/.local/bin/lesavka-test-launch.sh' \
|
||||||
|
'Terminal=false' \
|
||||||
|
'X-GNOME-Autostart-enabled=true' \
|
||||||
|
>"${home}/.config/autostart/lesavka-test.desktop"
|
||||||
|
|
||||||
|
chown -R "${username}:${username}" "${home}/.config" "${home}/.local" "${home}/lesavka-paste-test.txt"
|
||||||
|
EOS
|
||||||
|
|
||||||
|
nsenter -t 1 -m -u -i -n -p -- systemctl restart sddm || \
|
||||||
|
nsenter -t 1 -m -u -i -n -p -- systemctl restart display-manager
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-root
|
||||||
|
mountPath: /host
|
||||||
|
volumes:
|
||||||
|
- name: host-root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
64
services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml
Normal file
64
services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml
|
||||||
|
# One-off emergency cleanup for titan-24 rootfs pressure.
|
||||||
|
# Safe to delete the finished Job/pod after it succeeds.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: titan-24-rootfs-sweep
|
||||||
|
namespace: maintenance
|
||||||
|
annotations:
|
||||||
|
kustomize.toolkit.fluxcd.io/force: "true"
|
||||||
|
spec:
|
||||||
|
backoffLimit: 1
|
||||||
|
ttlSecondsAfterFinished: 3600
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: titan-24-rootfs-sweep
|
||||||
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: titan-24
|
||||||
|
tolerations:
|
||||||
|
- key: node.kubernetes.io/not-ready
|
||||||
|
operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
tolerationSeconds: 300
|
||||||
|
- key: node.kubernetes.io/unreachable
|
||||||
|
operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
tolerationSeconds: 300
|
||||||
|
containers:
|
||||||
|
- name: sweep
|
||||||
|
image: python:3.12.9-alpine3.20
|
||||||
|
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||||
|
env:
|
||||||
|
- name: ONE_SHOT
|
||||||
|
value: "true"
|
||||||
|
- name: HIGH_USAGE_PERCENT
|
||||||
|
value: "0"
|
||||||
|
- name: EMERGENCY_USAGE_PERCENT
|
||||||
|
value: "0"
|
||||||
|
- name: LOG_RETENTION_DAYS
|
||||||
|
value: "1"
|
||||||
|
- name: ORPHAN_POD_RETENTION_DAYS
|
||||||
|
value: "0"
|
||||||
|
- name: JOURNAL_MAX_SIZE
|
||||||
|
value: "100M"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
runAsUser: 0
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-root
|
||||||
|
mountPath: /host
|
||||||
|
- name: script
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: host-root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: script
|
||||||
|
configMap:
|
||||||
|
name: node-image-sweeper-script
|
||||||
|
defaultMode: 0555
|
||||||
@ -51,6 +51,48 @@ for name in hdd_names:
|
|||||||
PY
|
PY
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cleanup_orphaned_root_pod_logs() {
|
||||||
|
if [ ! -d /host/var/log/pods ] || [ ! -d /host/var/lib/kubelet/pods ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
ORPHAN_POD_RETENTION_DAYS="${ORPHAN_POD_RETENTION_DAYS}" python3 - <<'PY'
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import time
|
||||||
|
|
||||||
|
root_pods = "/host/var/log/pods"
|
||||||
|
active_pods = "/host/var/lib/kubelet/pods"
|
||||||
|
retention_days = int(os.environ.get("ORPHAN_POD_RETENTION_DAYS", "3"))
|
||||||
|
cutoff = time.time() - (retention_days * 86400)
|
||||||
|
|
||||||
|
try:
|
||||||
|
active_names = set(os.listdir(active_pods))
|
||||||
|
except Exception:
|
||||||
|
active_names = set()
|
||||||
|
|
||||||
|
try:
|
||||||
|
root_names = os.listdir(root_pods)
|
||||||
|
except Exception:
|
||||||
|
root_names = []
|
||||||
|
|
||||||
|
for name in root_names:
|
||||||
|
path = os.path.join(root_pods, name)
|
||||||
|
if not os.path.isdir(path):
|
||||||
|
continue
|
||||||
|
if name in active_names:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
mtime = os.path.getmtime(path)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if mtime > cutoff:
|
||||||
|
continue
|
||||||
|
print(path)
|
||||||
|
shutil.rmtree(path, ignore_errors=True)
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
sweep_once() {
|
sweep_once() {
|
||||||
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
||||||
|
|
||||||
@ -61,6 +103,7 @@ sweep_once() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
cleanup_orphaned_hdd_pod_logs
|
cleanup_orphaned_hdd_pod_logs
|
||||||
|
cleanup_orphaned_root_pod_logs
|
||||||
|
|
||||||
if [ -d /host/var/log.hdd/pods ]; then
|
if [ -d /host/var/log.hdd/pods ]; then
|
||||||
find /host/var/log.hdd/pods -type f -name "*.log" -mtime +"${LOG_RETENTION_DAYS}" -print -delete 2>/dev/null || true
|
find /host/var/log.hdd/pods -type f -name "*.log" -mtime +"${LOG_RETENTION_DAYS}" -print -delete 2>/dev/null || true
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user