titan-iac/infrastructure/core/node-prefer-noschedule-cronjob.yaml

67 lines
2.6 KiB
YAML

# infrastructure/core/node-prefer-noschedule-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: node-prefer-noschedule
namespace: kube-system
spec:
schedule: "* * * * *"
concurrencyPolicy: Replace
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
spec:
serviceAccountName: node-prefer-noschedule
restartPolicy: Never
containers:
- name: taint
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command:
- /usr/bin/env
- bash
- -ceu
- |
k() {
kubectl --request-timeout=10s "$@"
}
clear_worker() {
local node="${1}"
local hardware="${2}"
if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" node-role.kubernetes.io/worker=true "hardware=${hardware}" --overwrite=true || true
k label node "${node}" atlas.bstein.dev/spillover- || true
k taint node "${node}" node.kubernetes.io/unschedulable:NoSchedule- || true
k uncordon "${node}" || true
else
echo "skipping missing node ${node}"
fi
}
clear_worker titan-04 rpi5
clear_worker titan-05 rpi5
clear_worker titan-07 rpi5
clear_worker titan-08 rpi5
clear_worker titan-11 rpi5
clear_worker titan-12 rpi4
clear_worker titan-14 rpi4
clear_worker titan-18 rpi4
clear_worker titan-22 amd64
if k get node titan-22 >/dev/null 2>&1; then
k label node titan-22 atlas.bstein.dev/general-compute=last-resort --overwrite=true || true
fi
for node in titan-13 titan-15 titan-17 titan-19; do
if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" atlas.bstein.dev/spillover=true longhorn-host=true --overwrite=true || true
k taint node "${node}" longhorn=true:PreferNoSchedule --overwrite=true || true
k taint node "${node}" atlas.bstein.dev/spillover=true:PreferNoSchedule --overwrite=true || true
else
echo "skipping missing node ${node}"
fi
done