2026-06-09 01:53:04 -03:00
|
|
|
# infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml
|
|
|
|
|
apiVersion: batch/v1
|
|
|
|
|
kind: Job
|
|
|
|
|
metadata:
|
2026-06-09 02:06:34 -03:00
|
|
|
name: longhorn-csi-toleration-ensure-4
|
2026-06-09 01:53:04 -03:00
|
|
|
namespace: longhorn-system
|
|
|
|
|
spec:
|
|
|
|
|
backoffLimit: 0
|
|
|
|
|
activeDeadlineSeconds: 240
|
|
|
|
|
ttlSecondsAfterFinished: 3600
|
|
|
|
|
template:
|
|
|
|
|
spec:
|
|
|
|
|
serviceAccountName: longhorn-service-account
|
|
|
|
|
restartPolicy: Never
|
2026-06-09 01:55:02 -03:00
|
|
|
nodeSelector:
|
|
|
|
|
kubernetes.io/hostname: titan-11
|
2026-06-09 01:53:04 -03:00
|
|
|
affinity:
|
|
|
|
|
nodeAffinity:
|
|
|
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
|
|
|
nodeSelectorTerms:
|
|
|
|
|
- matchExpressions:
|
|
|
|
|
- key: kubernetes.io/arch
|
|
|
|
|
operator: In
|
|
|
|
|
values: ["arm64"]
|
|
|
|
|
- key: node-role.kubernetes.io/worker
|
|
|
|
|
operator: Exists
|
|
|
|
|
containers:
|
|
|
|
|
- name: patch
|
|
|
|
|
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
|
|
|
|
command: ["/bin/sh", "-c"]
|
|
|
|
|
args:
|
|
|
|
|
- |
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
ns="longhorn-system"
|
|
|
|
|
ds="longhorn-csi-plugin"
|
|
|
|
|
key="veles.bstein.dev/simulation"
|
|
|
|
|
value="true"
|
|
|
|
|
effect="NoSchedule"
|
|
|
|
|
|
2026-06-09 02:06:34 -03:00
|
|
|
patch_daemonset() {
|
|
|
|
|
target="$1"
|
|
|
|
|
current="$(kubectl -n "${ns}" get daemonset "${target}" -o json)"
|
|
|
|
|
if echo "${current}" | jq -e \
|
|
|
|
|
--arg key "${key}" \
|
|
|
|
|
--arg value "${value}" \
|
|
|
|
|
--arg effect "${effect}" \
|
|
|
|
|
'.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then
|
|
|
|
|
echo "${target} already tolerates ${key}=${value}:${effect}"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
2026-06-09 01:53:04 -03:00
|
|
|
patch="$(echo "${current}" | jq -c \
|
|
|
|
|
--arg key "${key}" \
|
|
|
|
|
--arg value "${value}" \
|
|
|
|
|
--arg effect "${effect}" \
|
|
|
|
|
'{
|
|
|
|
|
spec: {
|
|
|
|
|
template: {
|
|
|
|
|
spec: {
|
|
|
|
|
tolerations: ((.spec.template.spec.tolerations // []) + [
|
|
|
|
|
{key: $key, operator: "Equal", value: $value, effect: $effect}
|
|
|
|
|
])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}')"
|
2026-06-09 02:06:34 -03:00
|
|
|
kubectl -n "${ns}" patch daemonset "${target}" --type=merge -p "${patch}"
|
|
|
|
|
}
|
2026-06-09 01:53:04 -03:00
|
|
|
|
2026-06-09 02:06:34 -03:00
|
|
|
patch_daemonset "${ds}"
|
|
|
|
|
engine_daemonsets="$(kubectl -n "${ns}" get daemonset -l longhorn.io/component=engine-image -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
|
|
|
|
|
for engine_ds in ${engine_daemonsets}; do
|
|
|
|
|
patch_daemonset "${engine_ds}"
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
csi_ready="false"
|
2026-06-09 01:57:59 -03:00
|
|
|
for attempt in $(seq 1 90); do
|
|
|
|
|
if kubectl get csinode titan-23 -o json | jq -e '.spec.drivers[]? | select(.name == "driver.longhorn.io")' >/dev/null; then
|
|
|
|
|
echo "driver.longhorn.io registered on titan-23"
|
2026-06-09 02:06:34 -03:00
|
|
|
csi_ready="true"
|
|
|
|
|
break
|
2026-06-09 01:57:59 -03:00
|
|
|
fi
|
|
|
|
|
sleep 2
|
|
|
|
|
done
|
|
|
|
|
|
2026-06-09 02:06:34 -03:00
|
|
|
if [ "${csi_ready}" != "true" ]; then
|
|
|
|
|
echo "driver.longhorn.io did not register on titan-23 before timeout" >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
for engine_ds in ${engine_daemonsets}; do
|
|
|
|
|
for attempt in $(seq 1 90); do
|
|
|
|
|
if kubectl -n "${ns}" get pods -o json | jq -e \
|
|
|
|
|
--arg engine_ds "${engine_ds}" \
|
|
|
|
|
'.items[] | select(.spec.nodeName == "titan-23") | select(.metadata.ownerReferences[]?.name == $engine_ds) | select([.status.containerStatuses[]?.ready] | all)' >/dev/null; then
|
|
|
|
|
echo "${engine_ds} ready on titan-23"
|
|
|
|
|
break
|
|
|
|
|
fi
|
|
|
|
|
if [ "${attempt}" = "90" ]; then
|
|
|
|
|
echo "${engine_ds} did not become ready on titan-23 before timeout" >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
sleep 2
|
|
|
|
|
done
|
|
|
|
|
done
|