jellyfin manual migration script
This commit is contained in:
parent
cd52f617a0
commit
586ceb9f4e
169
scripts/k3s_version_update.fish
Normal file
169
scripts/k3s_version_update.fish
Normal file
@ -0,0 +1,169 @@
|
||||
# Pick the correct K3s asset for a remote host (arm64 vs x86_64)
|
||||
function __k3s_asset_for_host
|
||||
set -l host $argv[1]
|
||||
set -l arch (ssh atlas@titan-db "ssh atlas@$host 'uname -m'" 2>/dev/null)
|
||||
switch $arch
|
||||
case aarch64 arm64
|
||||
echo k3s-arm64
|
||||
case x86_64 amd64
|
||||
echo k3s
|
||||
case '*'
|
||||
# Default to arm64 in your environment
|
||||
echo k3s-arm64
|
||||
end
|
||||
end
|
||||
|
||||
# Safer control-plane upgrade via jump host using a binary swap (recommended)
|
||||
# usage: upgrade_server_via_jump <host> <version>
|
||||
function upgrade_server_via_jump
|
||||
set -l host $argv[1]
|
||||
set -l ver $argv[2]
|
||||
if test (count $argv) -lt 2
|
||||
echo "usage: upgrade_server_via_jump <host> <version>"; return 1
|
||||
end
|
||||
|
||||
set -l jump titan-db
|
||||
set -l asset (__k3s_asset_for_host $host)
|
||||
# If already at target, skip
|
||||
set -l curr (kubectl get node $host -o jsonpath='{.status.nodeInfo.kubeletVersion}' 2>/dev/null)
|
||||
if test "$curr" = "$ver"
|
||||
echo "=== [$host] already at $ver; skipping"
|
||||
return 0
|
||||
end
|
||||
|
||||
echo "=== [$host] preflight: check datastore-endpoint is present and DB TCP reachable"
|
||||
# 1) datastore-endpoint existence in config, env file, or unit
|
||||
set -l dsn_lines (ssh atlas@$jump "ssh atlas@$host 'sudo sh -lc \" \
|
||||
(test -f /etc/rancher/k3s/config.yaml && grep -E ^datastore-endpoint: /etc/rancher/k3s/config.yaml || true); \
|
||||
(test -f /etc/systemd/system/k3s.service.env && grep -E ^K3S_DATASTORE_ENDPOINT= /etc/systemd/system/k3s.service.env || true); \
|
||||
(test -f /etc/systemd/system/k3s.service && grep -F -- \"--datastore-endpoint=\" /etc/systemd/system/k3s.service || true) \
|
||||
\"'")
|
||||
|
||||
if test -z "$dsn_lines"
|
||||
echo "ERROR: $host has no datastore-endpoint configured (config/env/unit). Aborting."; return 2
|
||||
end
|
||||
if string match -q '*datastore-endpoint: ""*' -- $dsn_lines
|
||||
echo "ERROR: $host datastore-endpoint is empty in config.yaml. Aborting."; return 2
|
||||
end
|
||||
if string match -q '*K3S_DATASTORE_ENDPOINT=""*' -- $dsn_lines
|
||||
echo "ERROR: $host K3S_DATASTORE_ENDPOINT is empty in k3s.service.env. Aborting."; return 2
|
||||
end
|
||||
|
||||
# 2) DB TCP reachability from the target
|
||||
set -l dbcheck (ssh atlas@$jump "ssh atlas@$host 'command -v nc >/dev/null && nc -vz -w2 192.168.22.10 5432 >/dev/null 2>&1 && echo ok || echo skip'" 2>/dev/null)
|
||||
if test "$dbcheck" != "ok" -a "$dbcheck" != "skip"
|
||||
echo "ERROR: $host cannot reach 192.168.22.10:5432. Aborting."; return 3
|
||||
end
|
||||
|
||||
echo "=== [$host] cordon + drain"
|
||||
kubectl cordon $host
|
||||
set -l drained 0
|
||||
|
||||
# Store flags as a list (not a single quoted string)
|
||||
set -l drain_common --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m
|
||||
|
||||
# 1) Try a normal drain (respects PDBs)
|
||||
if kubectl drain $host $drain_common
|
||||
set drained 1
|
||||
else
|
||||
echo "WARN: standard drain on $host failed (likely a PDB). Retrying with --force."
|
||||
# 2) Retry with --force (unmanaged pods etc.)
|
||||
if kubectl drain $host $drain_common --force
|
||||
set drained 1
|
||||
else
|
||||
echo "WARN: drain still blocked on $host. Falling back to --disable-eviction (bypass PDBs)."
|
||||
# 3) Last resort: bypass PDBs entirely (deletes pods instead of Evictions; PDBs don't apply)
|
||||
if kubectl drain $host $drain_common --disable-eviction --force
|
||||
set drained 1
|
||||
else
|
||||
echo "ERROR: drain failed on $host even with --disable-eviction."
|
||||
kubectl get pods -A -o wide --field-selector spec.nodeName=$host | head -n 50
|
||||
return 4
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
echo "=== [$host] binary swap to $ver ($asset)"
|
||||
set -l rc 0
|
||||
ssh atlas@$jump "ssh atlas@$host 'set -euo pipefail
|
||||
sudo systemctl stop k3s
|
||||
if test -x /usr/local/bin/k3s; then
|
||||
sudo cp /usr/local/bin/k3s /usr/local/bin/k3s.bak.\$(date -Iseconds)
|
||||
fi
|
||||
url=\"https://github.com/k3s-io/k3s/releases/download/$ver/$asset\"
|
||||
sudo curl -fL -o /usr/local/bin/k3s \"\$url\"
|
||||
sudo chmod +x /usr/local/bin/k3s
|
||||
sudo systemctl start k3s
|
||||
sleep 4
|
||||
sudo k3s --version
|
||||
'" ; set rc $status
|
||||
|
||||
if test $rc -ne 0
|
||||
echo "ERROR: remote swap/start failed on $host (rc=$rc)."
|
||||
if test $drained -eq 1
|
||||
kubectl uncordon $host
|
||||
end
|
||||
return $rc
|
||||
end
|
||||
|
||||
echo "=== [$host] wait for Ready and target version: $ver"
|
||||
set -l tries 0
|
||||
while true
|
||||
set -l v (kubectl get node $host -o jsonpath='{.status.nodeInfo.kubeletVersion}' 2>/dev/null)
|
||||
set -l r (kubectl get node $host -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null)
|
||||
echo "$host -> $v Ready=$r"
|
||||
if test "$v" = "$ver" -a "$r" = "True"
|
||||
break
|
||||
end
|
||||
if test $tries -eq 0
|
||||
# one-time nudge if the service came up slow
|
||||
ssh atlas@$jump "ssh atlas@$host 'sudo systemctl daemon-reload; sudo systemctl restart k3s'"
|
||||
end
|
||||
set tries (math $tries + 1)
|
||||
if test $tries -gt 100
|
||||
echo "ERROR: $host did not reach Ready/$ver; showing last logs:"
|
||||
ssh atlas@$jump "ssh atlas@$host 'sudo journalctl -u k3s -n 200 --no-pager | tail -n +1'"
|
||||
if test $drained -eq 1
|
||||
kubectl uncordon $host
|
||||
end
|
||||
return 5
|
||||
end
|
||||
sleep 3
|
||||
end
|
||||
|
||||
echo "=== [$host] uncordon"
|
||||
kubectl uncordon $host
|
||||
end
|
||||
|
||||
# Rolling control-plane upgrade to a target version (do NOT run in parallel)
|
||||
# usage: upgrade_control_plane_to <version> [hosts...]
|
||||
# If hosts omitted, defaults to: titan-0b titan-0c titan-0a
|
||||
function upgrade_control_plane_to
|
||||
set -l ver $argv[1]
|
||||
if test -z "$ver"
|
||||
echo "usage: upgrade_control_plane_to <version> [titan-0b titan-0c titan-0a]"; return 1
|
||||
end
|
||||
set -l hosts $argv[2..-1]
|
||||
if test (count $hosts) -eq 0
|
||||
set hosts titan-0b titan-0c titan-0a
|
||||
end
|
||||
|
||||
for n in $hosts
|
||||
# Build union of CP nodes (master ∪ control-plane)
|
||||
set -l ready_cp (begin
|
||||
kubectl get nodes -l 'node-role.kubernetes.io/control-plane' \
|
||||
-o jsonpath='{range .items[*]}{.metadata.name}{"|"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}'
|
||||
kubectl get nodes -l 'node-role.kubernetes.io/master' \
|
||||
-o jsonpath='{range .items[*]}{.metadata.name}{"|"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}'
|
||||
end | sort -u | grep -E '\|True$' | grep -v $n | wc -l)
|
||||
|
||||
if test (math $ready_cp) -lt 1
|
||||
echo "ERROR: upgrading $n would drop remaining Ready control-plane count below 1. Aborting."
|
||||
return 9
|
||||
end
|
||||
|
||||
upgrade_server_via_jump $n $ver; or return $status
|
||||
end
|
||||
|
||||
kubectl get nodes -o wide
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user