maintenance: sweep unused images on arm workers
This commit is contained in:
parent
e2efeeacba
commit
99a6b4c054
@ -0,0 +1,14 @@
|
|||||||
|
# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: maintenance
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/maintenance
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
wait: false
|
||||||
44
services/maintenance/image-sweeper-cronjob.yaml
Normal file
44
services/maintenance/image-sweeper-cronjob.yaml
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# services/maintenance/image-sweeper-cronjob.yaml
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: image-sweeper
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
schedule: "30 4 * * 0"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 2
|
||||||
|
failedJobsHistoryLimit: 2
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
serviceAccountName: node-image-sweeper
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/arch: arm64
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
containers:
|
||||||
|
- name: image-sweeper
|
||||||
|
image: python:3.12.9-alpine3.20
|
||||||
|
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||||
|
env:
|
||||||
|
- name: ONE_SHOT
|
||||||
|
value: "true"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
runAsUser: 0
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-root
|
||||||
|
mountPath: /host
|
||||||
|
- name: script
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: host-root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: script
|
||||||
|
configMap:
|
||||||
|
name: node-image-sweeper-script
|
||||||
|
defaultMode: 0555
|
||||||
15
services/maintenance/kustomization.yaml
Normal file
15
services/maintenance/kustomization.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# services/maintenance/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- node-nofile-serviceaccount.yaml
|
||||||
|
- pod-cleaner-rbac.yaml
|
||||||
|
- node-nofile-script.yaml
|
||||||
|
- pod-cleaner-script.yaml
|
||||||
|
- node-nofile-daemonset.yaml
|
||||||
|
- pod-cleaner-cronjob.yaml
|
||||||
|
- node-image-sweeper-serviceaccount.yaml
|
||||||
|
- node-image-sweeper-script.yaml
|
||||||
|
- node-image-sweeper-daemonset.yaml
|
||||||
|
- image-sweeper-cronjob.yaml
|
||||||
5
services/maintenance/namespace.yaml
Normal file
5
services/maintenance/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# services/maintenance/namespace.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: maintenance
|
||||||
42
services/maintenance/node-image-sweeper-daemonset.yaml
Normal file
42
services/maintenance/node-image-sweeper-daemonset.yaml
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# services/maintenance/node-image-sweeper-daemonset.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: node-image-sweeper
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: node-image-sweeper
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: node-image-sweeper
|
||||||
|
spec:
|
||||||
|
serviceAccountName: node-image-sweeper
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/arch: arm64
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
containers:
|
||||||
|
- name: node-image-sweeper
|
||||||
|
image: python:3.12.9-alpine3.20
|
||||||
|
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
runAsUser: 0
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-root
|
||||||
|
mountPath: /host
|
||||||
|
- name: script
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: host-root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: script
|
||||||
|
configMap:
|
||||||
|
name: node-image-sweeper-script
|
||||||
|
defaultMode: 0555
|
||||||
96
services/maintenance/node-image-sweeper-script.yaml
Normal file
96
services/maintenance/node-image-sweeper-script.yaml
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# services/maintenance/node-image-sweeper-script.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: node-image-sweeper-script
|
||||||
|
namespace: maintenance
|
||||||
|
data:
|
||||||
|
node_image_sweeper.sh: |
|
||||||
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
ONE_SHOT=${ONE_SHOT:-false}
|
||||||
|
THRESHOLD_DAYS=14
|
||||||
|
|
||||||
|
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
||||||
|
if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then
|
||||||
|
THRESHOLD_DAYS=3
|
||||||
|
fi
|
||||||
|
|
||||||
|
cutoff=$(date -d "${THRESHOLD_DAYS} days ago" +%s 2>/dev/null || date -v -"${THRESHOLD_DAYS}"d +%s)
|
||||||
|
|
||||||
|
RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ')
|
||||||
|
IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}')
|
||||||
|
|
||||||
|
SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause"
|
||||||
|
|
||||||
|
prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY'
|
||||||
|
import json, os, sys, time
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
except Exception:
|
||||||
|
print("", end="")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
cutoff = int(os.environ.get("CUTOFF", "0"))
|
||||||
|
running = set(os.environ.get("RUNNING", "").split())
|
||||||
|
skip = os.environ.get("SKIP", "").split()
|
||||||
|
now = int(time.time())
|
||||||
|
prune = []
|
||||||
|
|
||||||
|
|
||||||
|
def is_skip(tags):
|
||||||
|
if not tags:
|
||||||
|
return False
|
||||||
|
for t in tags:
|
||||||
|
for prefix in skip:
|
||||||
|
if prefix and t.startswith(prefix):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
for img in data.get("images", []):
|
||||||
|
image_id = img.get("id", "")
|
||||||
|
if not image_id:
|
||||||
|
continue
|
||||||
|
if image_id in running:
|
||||||
|
continue
|
||||||
|
tags = img.get("repoTags") or []
|
||||||
|
if is_skip(tags):
|
||||||
|
continue
|
||||||
|
created = img.get("createdAt") or 0
|
||||||
|
try:
|
||||||
|
created = int(str(created)) // 1000000000
|
||||||
|
except Exception:
|
||||||
|
created = 0
|
||||||
|
if created and created > now:
|
||||||
|
created = now
|
||||||
|
if cutoff and created and created < cutoff:
|
||||||
|
prune.append(image_id)
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
for p in prune:
|
||||||
|
if p in seen:
|
||||||
|
continue
|
||||||
|
seen.add(p)
|
||||||
|
print(p)
|
||||||
|
PY
|
||||||
|
)
|
||||||
|
|
||||||
|
if [ -n "${prune_list}" ]; then
|
||||||
|
printf "%s" "${prune_list}" | while read -r image_id; do
|
||||||
|
if [ -n "${image_id}" ]; then
|
||||||
|
chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true
|
||||||
|
find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true
|
||||||
|
|
||||||
|
if [ "${ONE_SHOT}" = "true" ]; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep infinity
|
||||||
@ -0,0 +1,6 @@
|
|||||||
|
# services/maintenance/node-image-sweeper-serviceaccount.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: node-image-sweeper
|
||||||
|
namespace: maintenance
|
||||||
47
services/maintenance/node-nofile-daemonset.yaml
Normal file
47
services/maintenance/node-nofile-daemonset.yaml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# services/maintenance/node-nofile-daemonset.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: node-nofile
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: node-nofile
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: node-nofile
|
||||||
|
spec:
|
||||||
|
serviceAccountName: node-nofile
|
||||||
|
tolerations:
|
||||||
|
- key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: node-role.kubernetes.io/master
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
containers:
|
||||||
|
- name: node-nofile
|
||||||
|
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||||
|
command: ["/usr/bin/env", "bash"]
|
||||||
|
args: ["/scripts/node_nofile.sh"]
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
runAsUser: 0
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-root
|
||||||
|
mountPath: /host
|
||||||
|
- name: script
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: host-root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: script
|
||||||
|
configMap:
|
||||||
|
name: node-nofile-script
|
||||||
|
defaultMode: 0555
|
||||||
38
services/maintenance/node-nofile-script.yaml
Normal file
38
services/maintenance/node-nofile-script.yaml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# services/maintenance/node-nofile-script.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: node-nofile-script
|
||||||
|
namespace: maintenance
|
||||||
|
data:
|
||||||
|
node_nofile.sh: |
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
limit_line="LimitNOFILE=1048576"
|
||||||
|
changed=0
|
||||||
|
|
||||||
|
for unit in k3s k3s-agent; do
|
||||||
|
unit_file="/host/etc/systemd/system/${unit}.service"
|
||||||
|
if [ -f "${unit_file}" ]; then
|
||||||
|
dropin_dir="/host/etc/systemd/system/${unit}.service.d"
|
||||||
|
dropin_file="${dropin_dir}/99-nofile.conf"
|
||||||
|
if [ ! -f "${dropin_file}" ] || ! grep -q "${limit_line}" "${dropin_file}"; then
|
||||||
|
mkdir -p "${dropin_dir}"
|
||||||
|
printf "[Service]\n%s\n" "${limit_line}" > "${dropin_file}"
|
||||||
|
changed=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "${changed}" -eq 1 ]; then
|
||||||
|
sleep "$(( (RANDOM % 300) + 10 ))"
|
||||||
|
chroot /host /bin/systemctl daemon-reload
|
||||||
|
for unit in k3s k3s-agent; do
|
||||||
|
if [ -f "/host/etc/systemd/system/${unit}.service" ]; then
|
||||||
|
chroot /host /bin/systemctl restart "${unit}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep infinity
|
||||||
6
services/maintenance/node-nofile-serviceaccount.yaml
Normal file
6
services/maintenance/node-nofile-serviceaccount.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# services/maintenance/node-nofile-serviceaccount.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: node-nofile
|
||||||
|
namespace: maintenance
|
||||||
32
services/maintenance/pod-cleaner-cronjob.yaml
Normal file
32
services/maintenance/pod-cleaner-cronjob.yaml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# services/maintenance/pod-cleaner-cronjob.yaml
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: pod-cleaner
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
schedule: "0 * * * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 1
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
backoffLimit: 1
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
serviceAccountName: pod-cleaner
|
||||||
|
restartPolicy: Never
|
||||||
|
containers:
|
||||||
|
- name: cleaner
|
||||||
|
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||||
|
command: ["/usr/bin/env", "bash"]
|
||||||
|
args: ["/scripts/pod_cleaner.sh"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: script
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: script
|
||||||
|
configMap:
|
||||||
|
name: pod-cleaner-script
|
||||||
|
defaultMode: 0555
|
||||||
32
services/maintenance/pod-cleaner-rbac.yaml
Normal file
32
services/maintenance/pod-cleaner-rbac.yaml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# services/maintenance/pod-cleaner-rbac.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: pod-cleaner
|
||||||
|
namespace: maintenance
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: pod-cleaner
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["get", "list", "delete"]
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: pod-cleaner
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: pod-cleaner
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: pod-cleaner
|
||||||
|
namespace: maintenance
|
||||||
20
services/maintenance/pod-cleaner-script.yaml
Normal file
20
services/maintenance/pod-cleaner-script.yaml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# services/maintenance/pod-cleaner-script.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: pod-cleaner-script
|
||||||
|
namespace: maintenance
|
||||||
|
data:
|
||||||
|
pod_cleaner.sh: |
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
for phase in Succeeded Failed; do
|
||||||
|
kubectl get pods -A --field-selector="status.phase=${phase}" \
|
||||||
|
-o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' \
|
||||||
|
| while read -r namespace name; do
|
||||||
|
if [ -n "${namespace}" ] && [ -n "${name}" ]; then
|
||||||
|
kubectl delete pod -n "${namespace}" "${name}" --ignore-not-found --grace-period=0 --wait=false
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
Loading…
x
Reference in New Issue
Block a user