recovery(ananke): handle longhorn harbor deadlock
This commit is contained in:
parent
cf5caedd56
commit
0f58aa16a9
@ -37,7 +37,7 @@ spec:
|
||||
createSecret: false
|
||||
registrySecret: longhorn-registry
|
||||
image:
|
||||
pullPolicy: Always
|
||||
pullPolicy: IfNotPresent
|
||||
longhorn:
|
||||
engine:
|
||||
repository: registry.bstein.dev/infra/longhorn-engine
|
||||
@ -80,7 +80,7 @@ spec:
|
||||
repository: registry.bstein.dev/infra/longhorn-livenessprobe
|
||||
tag: v2.16.0
|
||||
defaultSettings:
|
||||
systemManagedPodsImagePullPolicy: Always
|
||||
systemManagedPodsImagePullPolicy: if-not-present
|
||||
taintToleration: veles.bstein.dev/simulation=true:NoSchedule
|
||||
longhornManager:
|
||||
tolerations:
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Harbor cold-start bootstrap images.
|
||||
# Harbor and Longhorn cold-start bootstrap images.
|
||||
registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-portal:v2.14.1-arm64
|
||||
@ -7,3 +7,18 @@ registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-redis:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-nginx:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-prepare:v2.14.1-arm64
|
||||
|
||||
# Longhorn must be able to start before Harbor is fully healthy.
|
||||
registry.bstein.dev/infra/longhorn-engine:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-ui:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-share-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56
|
||||
registry.bstein.dev/infra/longhorn-csi-attacher:v4.9.0
|
||||
registry.bstein.dev/infra/longhorn-csi-provisioner:v5.3.0
|
||||
registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:v2.14.0
|
||||
registry.bstein.dev/infra/longhorn-csi-resizer:v1.13.2
|
||||
registry.bstein.dev/infra/longhorn-csi-snapshotter:v8.2.0
|
||||
registry.bstein.dev/infra/longhorn-livenessprobe:v2.16.0
|
||||
|
||||
14
scripts/bootstrap/longhorn-unlock-images.txt
Normal file
14
scripts/bootstrap/longhorn-unlock-images.txt
Normal file
@ -0,0 +1,14 @@
|
||||
# Longhorn images needed when Harbor is unhealthy during storage recovery.
|
||||
registry.bstein.dev/infra/longhorn-engine:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-ui:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-share-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2
|
||||
registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56
|
||||
registry.bstein.dev/infra/longhorn-csi-attacher:v4.9.0
|
||||
registry.bstein.dev/infra/longhorn-csi-provisioner:v5.3.0
|
||||
registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:v2.14.0
|
||||
registry.bstein.dev/infra/longhorn-csi-resizer:v1.13.2
|
||||
registry.bstein.dev/infra/longhorn-csi-snapshotter:v8.2.0
|
||||
registry.bstein.dev/infra/longhorn-livenessprobe:v2.16.0
|
||||
@ -4,7 +4,9 @@ EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git"
|
||||
SHUTDOWN_MODE="host-poweroff"
|
||||
STATE_SUBDIR=".local/share/ananke"
|
||||
HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst"
|
||||
HARBOR_TARGET_NODE=""
|
||||
BOOTSTRAP_BUNDLE_ARCH="arm64"
|
||||
RECOVERY_UNCORDON_DENYLIST="titan-18,titan-22,titan-24"
|
||||
HARBOR_TARGET_NODE="titan-11"
|
||||
HARBOR_CANARY_NODE=""
|
||||
HARBOR_HOST_LABEL_KEY="ananke.bstein.dev/harbor-bootstrap"
|
||||
HARBOR_CANARY_IMAGE="registry.bstein.dev/bstein/kubectl:1.35.0"
|
||||
@ -33,4 +35,4 @@ STARTUP_INCLUDE_INGRESS_CHECKS="1"
|
||||
STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404"
|
||||
STARTUP_IGNORE_INGRESS_HOSTS_REGEX=""
|
||||
STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10"
|
||||
STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||'
|
||||
STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|401|unauthorized|<html|'
|
||||
|
||||
@ -5,6 +5,7 @@ IMAGES_FILE="scripts/bootstrap/harbor-bootstrap-images.txt"
|
||||
BUNDLE_FILE="artifacts/harbor-bootstrap-v2.14.1-arm64.tar.zst"
|
||||
DOCKER_CONFIG_PATH=""
|
||||
PLATFORM="linux/arm64"
|
||||
ZSTD_LEVEL="${ZSTD_LEVEL:-19}"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
@ -24,9 +25,13 @@ while [[ $# -gt 0 ]]; do
|
||||
PLATFORM="${2:?missing platform}"
|
||||
shift 2
|
||||
;;
|
||||
--zstd-level)
|
||||
ZSTD_LEVEL="${2:?missing zstd compression level}"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
cat <<USAGE
|
||||
Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file <path>] [--bundle-file <path>] [--docker-config <path>] [--platform <linux/arm64>]
|
||||
Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file <path>] [--bundle-file <path>] [--docker-config <path>] [--platform <linux/arm64>] [--zstd-level <level>]
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
@ -47,12 +52,54 @@ if [[ ${#IMAGES[@]} -eq 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source_image_for_alias() {
|
||||
local image="$1"
|
||||
local tag="${image##*:}"
|
||||
case "${image}" in
|
||||
registry.bstein.dev/infra/longhorn-engine:*) echo "docker.io/longhornio/longhorn-engine:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-manager:*) echo "docker.io/longhornio/longhorn-manager:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-ui:*) echo "docker.io/longhornio/longhorn-ui:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-instance-manager:*) echo "docker.io/longhornio/longhorn-instance-manager:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-share-manager:*) echo "docker.io/longhornio/longhorn-share-manager:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-backing-image-manager:*) echo "docker.io/longhornio/backing-image-manager:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-support-bundle-kit:*) echo "docker.io/longhornio/support-bundle-kit:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-csi-attacher:*) echo "registry.k8s.io/sig-storage/csi-attacher:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-csi-provisioner:*) echo "registry.k8s.io/sig-storage/csi-provisioner:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-csi-node-driver-registrar:*) echo "registry.k8s.io/sig-storage/csi-node-driver-registrar:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-csi-resizer:*) echo "registry.k8s.io/sig-storage/csi-resizer:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-csi-snapshotter:*) echo "registry.k8s.io/sig-storage/csi-snapshotter:${tag}" ;;
|
||||
registry.bstein.dev/infra/longhorn-livenessprobe:*) echo "registry.k8s.io/sig-storage/livenessprobe:${tag}" ;;
|
||||
*) echo "${image}" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
pull_or_tag_image() {
|
||||
local image="$1"
|
||||
local source_image
|
||||
if docker image inspect "${image}" >/dev/null 2>&1; then
|
||||
echo "Using cached ${image}" >&2
|
||||
return 0
|
||||
fi
|
||||
echo "Pulling ${image}" >&2
|
||||
if docker pull --platform "${PLATFORM}" "${image}" >/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
source_image="$(source_image_for_alias "${image}")"
|
||||
if [[ "${source_image}" == "${image}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
echo "Pulling ${source_image} for ${image}" >&2
|
||||
docker pull --platform "${PLATFORM}" "${source_image}" >/dev/null
|
||||
docker tag "${source_image}" "${image}"
|
||||
}
|
||||
|
||||
mkdir -p "$(dirname "${BUNDLE_FILE}")"
|
||||
for image in "${IMAGES[@]}"; do
|
||||
echo "Pulling ${image}" >&2
|
||||
docker pull --platform "${PLATFORM}" "${image}" >/dev/null
|
||||
|
||||
pull_or_tag_image "${image}"
|
||||
done
|
||||
|
||||
docker save "${IMAGES[@]}" | zstd -T0 -19 -o "${BUNDLE_FILE}"
|
||||
tmp_bundle="${BUNDLE_FILE}.tmp"
|
||||
rm -f "${tmp_bundle}"
|
||||
docker save "${IMAGES[@]}" | zstd -T0 -"${ZSTD_LEVEL}" -o "${tmp_bundle}"
|
||||
mv "${tmp_bundle}" "${BUNDLE_FILE}"
|
||||
echo "Wrote ${BUNDLE_FILE}" >&2
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user