recovery(ananke): avoid unnecessary longhorn sidecar churn

This commit is contained in:
jenkins 2026-06-18 18:20:22 -03:00
parent 8c45f9509e
commit 0c2b59f7cc

View File

@ -1048,6 +1048,31 @@ remove_longhorn_manager_prepull_sidecar() {
done <<< "${indexes}"
}
longhorn_manager_prepull_sidecar_has_pull_failures() {
kubectl -n longhorn-system get pods -l app=longhorn-manager -o json \
| jq -e '
[
.items[].status.containerStatuses[]?
| select(.name == "pre-pull-share-manager-image")
| select(((.state.waiting.reason // "") | test("ImagePullBackOff|ErrImagePull|CreateContainerError|RunContainerError|InvalidImageName")))
]
| length > 0' >/dev/null 2>&1
}
remove_longhorn_manager_prepull_sidecar_if_needed() {
if ! harbor_endpoint_is_ready 1; then
warn "Removing Longhorn manager pre-pull sidecar because Harbor registry API is unhealthy."
remove_longhorn_manager_prepull_sidecar
return 0
fi
if longhorn_manager_prepull_sidecar_has_pull_failures; then
warn "Removing Longhorn manager pre-pull sidecar because it is in image/runtime failure."
remove_longhorn_manager_prepull_sidecar
return 0
fi
log "longhorn-manager-prepull-sidecar=retained harbor=healthy pull_failures=false"
}
save_longhorn_unlock_optional_replica_snapshot() {
if [[ "${EXECUTE}" -eq 0 ]]; then
log "DRY-RUN: save optional workload snapshot to ${LONGHORN_UNLOCK_REPLICA_SNAPSHOT_FILE}"
@ -2984,7 +3009,7 @@ longhorn_unlock_flow() {
REFRESH_BOOTSTRAP_IMAGE_ALIASES=1
freeze_longhorn_deadlock_automation
ensure_longhorn_cache_first_policy
remove_longhorn_manager_prepull_sidecar
remove_longhorn_manager_prepull_sidecar_if_needed
free_longhorn_instance_manager_headroom
delete_failed_nonstorage_pods_for_headroom
repair_longhorn_manager_cache_deadlock || warn "Surgical Longhorn manager cache repair did not complete on every affected node."