titan-iac/services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml

71 lines
2.0 KiB
YAML
Raw Normal View History

# services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml
# One-off emergency cleanup for titan-24 rootfs pressure.
# Safe to delete the finished Job/pod after it succeeds.
apiVersion: batch/v1
kind: Job
metadata:
name: titan-24-rootfs-sweep
namespace: maintenance
annotations:
kustomize.toolkit.fluxcd.io/force: "true"
spec:
backoffLimit: 6
ttlSecondsAfterFinished: 3600
template:
metadata:
labels:
app: titan-24-rootfs-sweep
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: titan-24
tolerations:
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoExecute
tolerationSeconds: 300
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoExecute
tolerationSeconds: 300
containers:
- name: sweep
image: python:3.12.9-alpine3.20
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
env:
- name: ONE_SHOT
value: "true"
- name: HIGH_USAGE_PERCENT
value: "0"
- name: EMERGENCY_USAGE_PERCENT
value: "0"
- name: LOG_RETENTION_DAYS
value: "1"
- name: ORPHAN_POD_RETENTION_DAYS
value: "0"
- name: JOURNAL_MAX_SIZE
value: "100M"
securityContext:
privileged: true
runAsUser: 0
volumeMounts:
- name: host-root
mountPath: /host
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: host-root
hostPath:
path: /
- name: script
configMap:
name: node-image-sweeper-script
defaultMode: 0555