From 42069b0f23e3415810427a5f9c08bef708ba7e9c Mon Sep 17 00:00:00 2001 From: jenkins Date: Tue, 9 Jun 2026 01:53:04 -0300 Subject: [PATCH] longhorn: ensure csi tolerates oceanus --- .../longhorn/core/kustomization.yaml | 1 + .../longhorn-csi-toleration-ensure-job.yaml | 65 +++++++++++++++++++ .../core/longhorn-settings-ensure-job.yaml | 2 +- .../core/scripts/longhorn_settings_ensure.sh | 19 +++++- 4 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml diff --git a/infrastructure/longhorn/core/kustomization.yaml b/infrastructure/longhorn/core/kustomization.yaml index c60b4090..73603b81 100644 --- a/infrastructure/longhorn/core/kustomization.yaml +++ b/infrastructure/longhorn/core/kustomization.yaml @@ -9,6 +9,7 @@ resources: - helmrelease.yaml - veles-recurring-jobs.yaml - longhorn-settings-ensure-job.yaml + - longhorn-csi-toleration-ensure-job.yaml - longhorn-disk-tags-ensure-job.yaml configMapGenerator: diff --git a/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml b/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml new file mode 100644 index 00000000..d69f695b --- /dev/null +++ b/infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml @@ -0,0 +1,65 @@ +# infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: longhorn-csi-toleration-ensure-1 + namespace: longhorn-system +spec: + backoffLimit: 0 + activeDeadlineSeconds: 240 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: longhorn-service-account + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: node-role.kubernetes.io/worker + operator: Exists + containers: + - name: patch + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + + ns="longhorn-system" + ds="longhorn-csi-plugin" + key="veles.bstein.dev/simulation" + value="true" + effect="NoSchedule" + + current="$(kubectl -n "${ns}" get daemonset "${ds}" -o json)" + if echo "${current}" | jq -e \ + --arg key "${key}" \ + --arg value "${value}" \ + --arg effect "${effect}" \ + '.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then + echo "${ds} already tolerates ${key}=${value}:${effect}" + else + patch="$(echo "${current}" | jq -c \ + --arg key "${key}" \ + --arg value "${value}" \ + --arg effect "${effect}" \ + '{ + spec: { + template: { + spec: { + tolerations: ((.spec.template.spec.tolerations // []) + [ + {key: $key, operator: "Equal", value: $value, effect: $effect} + ]) + } + } + } + }')" + kubectl -n "${ns}" patch daemonset "${ds}" --type=merge -p "${patch}" + fi + + kubectl -n "${ns}" rollout status daemonset/"${ds}" --timeout=180s diff --git a/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml b/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml index 746935a2..10ef2df3 100644 --- a/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml +++ b/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: longhorn-settings-ensure-8 + name: longhorn-settings-ensure-9 namespace: longhorn-system spec: backoffLimit: 0 diff --git a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh index 3b5316eb..c02adc45 100644 --- a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh +++ b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh @@ -30,10 +30,25 @@ update_setting() { fi echo "Setting ${name} -> ${value}" - curl ${curl_opts} -X PUT \ + out="$(mktemp)" + if curl ${curl_opts} -o "${out}" -X PUT \ -H "Content-Type: application/json" \ -d "{\"value\":\"${value}\"}" \ - "${api_base}/${name}" >/dev/null + "${api_base}/${name}"; then + rm -f "${out}" + return 0 + fi + + current="$(curl ${curl_opts} "${api_base}/${name}" || true)" + if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then + echo "Setting ${name} stored; Longhorn will apply it when current state allows." + rm -f "${out}" + return 0 + fi + + cat "${out}" >&2 || true + rm -f "${out}" + return 1 } wait_for_api