From 63c869bf42fe0ac8b90d3c83b3ae08dfc39504a1 Mon Sep 17 00:00:00 2001 From: jenkins Date: Tue, 9 Jun 2026 02:06:34 -0300 Subject: [PATCH] longhorn: ensure engine image on oceanus --- .../longhorn-csi-toleration-ensure-job.yaml | 58 ++++++++++++++----- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml b/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml index 44f9f875..8b084dac 100644 --- a/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml +++ b/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: longhorn-csi-toleration-ensure-3 + name: longhorn-csi-toleration-ensure-4 namespace: longhorn-system spec: backoffLimit: 0 @@ -38,14 +38,18 @@ spec: value="true" effect="NoSchedule" - current="$(kubectl -n "${ns}" get daemonset "${ds}" -o json)" - if echo "${current}" | jq -e \ - --arg key "${key}" \ - --arg value "${value}" \ - --arg effect "${effect}" \ - '.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then - echo "${ds} already tolerates ${key}=${value}:${effect}" - else + patch_daemonset() { + target="$1" + current="$(kubectl -n "${ns}" get daemonset "${target}" -o json)" + if echo "${current}" | jq -e \ + --arg key "${key}" \ + --arg value "${value}" \ + --arg effect "${effect}" \ + '.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then + echo "${target} already tolerates ${key}=${value}:${effect}" + return 0 + fi + patch="$(echo "${current}" | jq -c \ --arg key "${key}" \ --arg value "${value}" \ @@ -61,16 +65,42 @@ spec: } } }')" - kubectl -n "${ns}" patch daemonset "${ds}" --type=merge -p "${patch}" - fi + kubectl -n "${ns}" patch daemonset "${target}" --type=merge -p "${patch}" + } + patch_daemonset "${ds}" + engine_daemonsets="$(kubectl -n "${ns}" get daemonset -l longhorn.io/component=engine-image -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')" + for engine_ds in ${engine_daemonsets}; do + patch_daemonset "${engine_ds}" + done + + csi_ready="false" for attempt in $(seq 1 90); do if kubectl get csinode titan-23 -o json | jq -e '.spec.drivers[]? | select(.name == "driver.longhorn.io")' >/dev/null; then echo "driver.longhorn.io registered on titan-23" - exit 0 + csi_ready="true" + break fi sleep 2 done - echo "driver.longhorn.io did not register on titan-23 before timeout" >&2 - exit 1 + if [ "${csi_ready}" != "true" ]; then + echo "driver.longhorn.io did not register on titan-23 before timeout" >&2 + exit 1 + fi + + for engine_ds in ${engine_daemonsets}; do + for attempt in $(seq 1 90); do + if kubectl -n "${ns}" get pods -o json | jq -e \ + --arg engine_ds "${engine_ds}" \ + '.items[] | select(.spec.nodeName == "titan-23") | select(.metadata.ownerReferences[]?.name == $engine_ds) | select([.status.containerStatuses[]?.ready] | all)' >/dev/null; then + echo "${engine_ds} ready on titan-23" + break + fi + if [ "${attempt}" = "90" ]; then + echo "${engine_ds} did not become ready on titan-23 before timeout" >&2 + exit 1 + fi + sleep 2 + done + done