core: repair node role reconciler

This commit is contained in:
jenkins 2026-06-19 15:44:22 -03:00
parent 36f335c4c4
commit f6167b4ac1
2 changed files with 19 additions and 5 deletions

View File

@ -24,8 +24,17 @@ spec:
- bash - bash
- -ceu - -ceu
- | - |
KUBE_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token"
KUBE_CA_PATH="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
KUBE_SERVER="https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS:-443}"
k() { k() {
kubectl --request-timeout=10s "$@" kubectl \
--server="${KUBE_SERVER}" \
--certificate-authority="${KUBE_CA_PATH}" \
--token="$(cat "${KUBE_TOKEN_PATH}")" \
--request-timeout=10s \
"$@"
} }
clear_worker() { clear_worker() {
@ -33,9 +42,9 @@ spec:
local hardware="${2}" local hardware="${2}"
if k get node "${node}" >/dev/null 2>&1; then if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" node-role.kubernetes.io/worker=true "hardware=${hardware}" --overwrite=true || true k label node "${node}" node-role.kubernetes.io/worker=true "hardware=${hardware}" --overwrite=true || true
k label node "${node}" node-role.kubernetes.io/storage-backbone- || true
k label node "${node}" atlas.bstein.dev/spillover- || true k label node "${node}" atlas.bstein.dev/spillover- || true
k taint node "${node}" node.kubernetes.io/unschedulable:NoSchedule- || true # Recovery cordons are owned by Ananke, not this role reconciler.
k uncordon "${node}" || true
else else
echo "skipping missing node ${node}" echo "skipping missing node ${node}"
fi fi
@ -71,7 +80,12 @@ spec:
for node in titan-13 titan-15 titan-17 titan-19; do for node in titan-13 titan-15 titan-17 titan-19; do
if k get node "${node}" >/dev/null 2>&1; then if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" atlas.bstein.dev/spillover=true longhorn-host=true --overwrite=true || true k label node "${node}" \
atlas.bstein.dev/spillover=true \
longhorn-host=true \
node-role.kubernetes.io/worker=true \
node-role.kubernetes.io/storage-backbone=true \
--overwrite=true || true
k taint node "${node}" longhorn=true:PreferNoSchedule --overwrite=true || true k taint node "${node}" longhorn=true:PreferNoSchedule --overwrite=true || true
k taint node "${node}" atlas.bstein.dev/spillover=true:PreferNoSchedule --overwrite=true || true k taint node "${node}" atlas.bstein.dev/spillover=true:PreferNoSchedule --overwrite=true || true
else else

View File

@ -6,7 +6,7 @@ metadata:
rules: rules:
- apiGroups: [""] - apiGroups: [""]
resources: ["nodes"] resources: ["nodes"]
verbs: ["get", "list", "patch"] verbs: ["get", "list", "patch", "update"]
--- ---
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding kind: ClusterRoleBinding