From 9fb8dd483905b7205b129ed8a1074241244ae29c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 27 Apr 2026 16:15:13 -0300 Subject: [PATCH] stability: harden fluent-bit buffering and longhorn node-down recovery --- .../longhorn/core/scripts/longhorn_settings_ensure.sh | 1 + services/logging/fluent-bit-helmrelease.yaml | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh index fc7e45c6..a329b8cf 100644 --- a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh +++ b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh @@ -44,3 +44,4 @@ update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn- # Keep storage-heavy nodes from getting hammered by rebuild storms and skew. update_setting replica-auto-balance "best-effort" update_setting concurrent-replica-rebuild-per-node-limit "2" +update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod" diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index a686bb9c..34b755ff 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -44,8 +44,7 @@ spec: path: /var/log/journal - name: fluentbit-state emptyDir: - medium: Memory - sizeLimit: 64Mi + sizeLimit: 1Gi extraVolumeMounts: - name: runlogjournal mountPath: /run/log/journal