titan-iac/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml

107 lines
4.1 KiB
YAML
Raw Normal View History

2026-06-09 01:53:04 -03:00
# infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-csi-toleration-ensure-4
2026-06-09 01:53:04 -03:00
namespace: longhorn-system
spec:
backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
nodeSelector:
kubernetes.io/hostname: titan-11
2026-06-09 01:53:04 -03:00
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
- key: node-role.kubernetes.io/worker
operator: Exists
containers:
- name: patch
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
ns="longhorn-system"
ds="longhorn-csi-plugin"
key="veles.bstein.dev/simulation"
value="true"
effect="NoSchedule"
patch_daemonset() {
target="$1"
current="$(kubectl -n "${ns}" get daemonset "${target}" -o json)"
if echo "${current}" | jq -e \
--arg key "${key}" \
--arg value "${value}" \
--arg effect "${effect}" \
'.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then
echo "${target} already tolerates ${key}=${value}:${effect}"
return 0
fi
2026-06-09 01:53:04 -03:00
patch="$(echo "${current}" | jq -c \
--arg key "${key}" \
--arg value "${value}" \
--arg effect "${effect}" \
'{
spec: {
template: {
spec: {
tolerations: ((.spec.template.spec.tolerations // []) + [
{key: $key, operator: "Equal", value: $value, effect: $effect}
])
}
}
}
}')"
kubectl -n "${ns}" patch daemonset "${target}" --type=merge -p "${patch}"
}
2026-06-09 01:53:04 -03:00
patch_daemonset "${ds}"
engine_daemonsets="$(kubectl -n "${ns}" get daemonset -l longhorn.io/component=engine-image -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
for engine_ds in ${engine_daemonsets}; do
patch_daemonset "${engine_ds}"
done
csi_ready="false"
for attempt in $(seq 1 90); do
if kubectl get csinode titan-23 -o json | jq -e '.spec.drivers[]? | select(.name == "driver.longhorn.io")' >/dev/null; then
echo "driver.longhorn.io registered on titan-23"
csi_ready="true"
break
fi
sleep 2
done
if [ "${csi_ready}" != "true" ]; then
echo "driver.longhorn.io did not register on titan-23 before timeout" >&2
exit 1
fi
for engine_ds in ${engine_daemonsets}; do
for attempt in $(seq 1 90); do
if kubectl -n "${ns}" get pods -o json | jq -e \
--arg engine_ds "${engine_ds}" \
'.items[] | select(.spec.nodeName == "titan-23") | select(.metadata.ownerReferences[]?.name == $engine_ds) | select([.status.containerStatuses[]?.ready] | all)' >/dev/null; then
echo "${engine_ds} ready on titan-23"
break
fi
if [ "${attempt}" = "90" ]; then
echo "${engine_ds} did not become ready on titan-23 before timeout" >&2
exit 1
fi
sleep 2
done
done