maintenance: sweep unused images on arm workers
This commit is contained in:
parent
e2efeeacba
commit
99a6b4c054
@ -0,0 +1,14 @@
|
||||
# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: maintenance
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/maintenance
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
wait: false
|
||||
44
services/maintenance/image-sweeper-cronjob.yaml
Normal file
44
services/maintenance/image-sweeper-cronjob.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
# services/maintenance/image-sweeper-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: image-sweeper
|
||||
namespace: maintenance
|
||||
spec:
|
||||
schedule: "30 4 * * 0"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 2
|
||||
failedJobsHistoryLimit: 2
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: node-image-sweeper
|
||||
restartPolicy: OnFailure
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: image-sweeper
|
||||
image: python:3.12.9-alpine3.20
|
||||
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||
env:
|
||||
- name: ONE_SHOT
|
||||
value: "true"
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
volumeMounts:
|
||||
- name: host-root
|
||||
mountPath: /host
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: host-root
|
||||
hostPath:
|
||||
path: /
|
||||
- name: script
|
||||
configMap:
|
||||
name: node-image-sweeper-script
|
||||
defaultMode: 0555
|
||||
15
services/maintenance/kustomization.yaml
Normal file
15
services/maintenance/kustomization.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
# services/maintenance/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- node-nofile-serviceaccount.yaml
|
||||
- pod-cleaner-rbac.yaml
|
||||
- node-nofile-script.yaml
|
||||
- pod-cleaner-script.yaml
|
||||
- node-nofile-daemonset.yaml
|
||||
- pod-cleaner-cronjob.yaml
|
||||
- node-image-sweeper-serviceaccount.yaml
|
||||
- node-image-sweeper-script.yaml
|
||||
- node-image-sweeper-daemonset.yaml
|
||||
- image-sweeper-cronjob.yaml
|
||||
5
services/maintenance/namespace.yaml
Normal file
5
services/maintenance/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# services/maintenance/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: maintenance
|
||||
42
services/maintenance/node-image-sweeper-daemonset.yaml
Normal file
42
services/maintenance/node-image-sweeper-daemonset.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
# services/maintenance/node-image-sweeper-daemonset.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-image-sweeper
|
||||
namespace: maintenance
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-image-sweeper
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-image-sweeper
|
||||
spec:
|
||||
serviceAccountName: node-image-sweeper
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: node-image-sweeper
|
||||
image: python:3.12.9-alpine3.20
|
||||
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
volumeMounts:
|
||||
- name: host-root
|
||||
mountPath: /host
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: host-root
|
||||
hostPath:
|
||||
path: /
|
||||
- name: script
|
||||
configMap:
|
||||
name: node-image-sweeper-script
|
||||
defaultMode: 0555
|
||||
96
services/maintenance/node-image-sweeper-script.yaml
Normal file
96
services/maintenance/node-image-sweeper-script.yaml
Normal file
@ -0,0 +1,96 @@
|
||||
# services/maintenance/node-image-sweeper-script.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: node-image-sweeper-script
|
||||
namespace: maintenance
|
||||
data:
|
||||
node_image_sweeper.sh: |
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
ONE_SHOT=${ONE_SHOT:-false}
|
||||
THRESHOLD_DAYS=14
|
||||
|
||||
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
||||
if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then
|
||||
THRESHOLD_DAYS=3
|
||||
fi
|
||||
|
||||
cutoff=$(date -d "${THRESHOLD_DAYS} days ago" +%s 2>/dev/null || date -v -"${THRESHOLD_DAYS}"d +%s)
|
||||
|
||||
RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ')
|
||||
IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}')
|
||||
|
||||
SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause"
|
||||
|
||||
prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY'
|
||||
import json, os, sys, time
|
||||
|
||||
try:
|
||||
data = json.load(sys.stdin)
|
||||
except Exception:
|
||||
print("", end="")
|
||||
sys.exit(0)
|
||||
|
||||
cutoff = int(os.environ.get("CUTOFF", "0"))
|
||||
running = set(os.environ.get("RUNNING", "").split())
|
||||
skip = os.environ.get("SKIP", "").split()
|
||||
now = int(time.time())
|
||||
prune = []
|
||||
|
||||
|
||||
def is_skip(tags):
|
||||
if not tags:
|
||||
return False
|
||||
for t in tags:
|
||||
for prefix in skip:
|
||||
if prefix and t.startswith(prefix):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
for img in data.get("images", []):
|
||||
image_id = img.get("id", "")
|
||||
if not image_id:
|
||||
continue
|
||||
if image_id in running:
|
||||
continue
|
||||
tags = img.get("repoTags") or []
|
||||
if is_skip(tags):
|
||||
continue
|
||||
created = img.get("createdAt") or 0
|
||||
try:
|
||||
created = int(str(created)) // 1000000000
|
||||
except Exception:
|
||||
created = 0
|
||||
if created and created > now:
|
||||
created = now
|
||||
if cutoff and created and created < cutoff:
|
||||
prune.append(image_id)
|
||||
|
||||
seen = set()
|
||||
for p in prune:
|
||||
if p in seen:
|
||||
continue
|
||||
seen.add(p)
|
||||
print(p)
|
||||
PY
|
||||
)
|
||||
|
||||
if [ -n "${prune_list}" ]; then
|
||||
printf "%s" "${prune_list}" | while read -r image_id; do
|
||||
if [ -n "${image_id}" ]; then
|
||||
chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true
|
||||
find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true
|
||||
|
||||
if [ "${ONE_SHOT}" = "true" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
sleep infinity
|
||||
@ -0,0 +1,6 @@
|
||||
# services/maintenance/node-image-sweeper-serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-image-sweeper
|
||||
namespace: maintenance
|
||||
47
services/maintenance/node-nofile-daemonset.yaml
Normal file
47
services/maintenance/node-nofile-daemonset.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
# services/maintenance/node-nofile-daemonset.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-nofile
|
||||
namespace: maintenance
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-nofile
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-nofile
|
||||
spec:
|
||||
serviceAccountName: node-nofile
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: node-nofile
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/usr/bin/env", "bash"]
|
||||
args: ["/scripts/node_nofile.sh"]
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
volumeMounts:
|
||||
- name: host-root
|
||||
mountPath: /host
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: host-root
|
||||
hostPath:
|
||||
path: /
|
||||
- name: script
|
||||
configMap:
|
||||
name: node-nofile-script
|
||||
defaultMode: 0555
|
||||
38
services/maintenance/node-nofile-script.yaml
Normal file
38
services/maintenance/node-nofile-script.yaml
Normal file
@ -0,0 +1,38 @@
|
||||
# services/maintenance/node-nofile-script.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: node-nofile-script
|
||||
namespace: maintenance
|
||||
data:
|
||||
node_nofile.sh: |
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
limit_line="LimitNOFILE=1048576"
|
||||
changed=0
|
||||
|
||||
for unit in k3s k3s-agent; do
|
||||
unit_file="/host/etc/systemd/system/${unit}.service"
|
||||
if [ -f "${unit_file}" ]; then
|
||||
dropin_dir="/host/etc/systemd/system/${unit}.service.d"
|
||||
dropin_file="${dropin_dir}/99-nofile.conf"
|
||||
if [ ! -f "${dropin_file}" ] || ! grep -q "${limit_line}" "${dropin_file}"; then
|
||||
mkdir -p "${dropin_dir}"
|
||||
printf "[Service]\n%s\n" "${limit_line}" > "${dropin_file}"
|
||||
changed=1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${changed}" -eq 1 ]; then
|
||||
sleep "$(( (RANDOM % 300) + 10 ))"
|
||||
chroot /host /bin/systemctl daemon-reload
|
||||
for unit in k3s k3s-agent; do
|
||||
if [ -f "/host/etc/systemd/system/${unit}.service" ]; then
|
||||
chroot /host /bin/systemctl restart "${unit}"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
sleep infinity
|
||||
6
services/maintenance/node-nofile-serviceaccount.yaml
Normal file
6
services/maintenance/node-nofile-serviceaccount.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# services/maintenance/node-nofile-serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-nofile
|
||||
namespace: maintenance
|
||||
32
services/maintenance/pod-cleaner-cronjob.yaml
Normal file
32
services/maintenance/pod-cleaner-cronjob.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
# services/maintenance/pod-cleaner-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: pod-cleaner
|
||||
namespace: maintenance
|
||||
spec:
|
||||
schedule: "0 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: pod-cleaner
|
||||
restartPolicy: Never
|
||||
containers:
|
||||
- name: cleaner
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/usr/bin/env", "bash"]
|
||||
args: ["/scripts/pod_cleaner.sh"]
|
||||
volumeMounts:
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: script
|
||||
configMap:
|
||||
name: pod-cleaner-script
|
||||
defaultMode: 0555
|
||||
32
services/maintenance/pod-cleaner-rbac.yaml
Normal file
32
services/maintenance/pod-cleaner-rbac.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
# services/maintenance/pod-cleaner-rbac.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: pod-cleaner
|
||||
namespace: maintenance
|
||||
|
||||
---
|
||||
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: pod-cleaner
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["get", "list", "delete"]
|
||||
|
||||
---
|
||||
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: pod-cleaner
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: pod-cleaner
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: pod-cleaner
|
||||
namespace: maintenance
|
||||
20
services/maintenance/pod-cleaner-script.yaml
Normal file
20
services/maintenance/pod-cleaner-script.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
# services/maintenance/pod-cleaner-script.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: pod-cleaner-script
|
||||
namespace: maintenance
|
||||
data:
|
||||
pod_cleaner.sh: |
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
for phase in Succeeded Failed; do
|
||||
kubectl get pods -A --field-selector="status.phase=${phase}" \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' \
|
||||
| while read -r namespace name; do
|
||||
if [ -n "${namespace}" ] && [ -n "${name}" ]; then
|
||||
kubectl delete pod -n "${namespace}" "${name}" --ignore-not-found --grace-period=0 --wait=false
|
||||
fi
|
||||
done
|
||||
done
|
||||
Loading…
x
Reference in New Issue
Block a user