jellyfin manual migration script
This commit is contained in:
parent
cd52f617a0
commit
586ceb9f4e
169
scripts/k3s_version_update.fish
Normal file
169
scripts/k3s_version_update.fish
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
# Pick the correct K3s asset for a remote host (arm64 vs x86_64)
|
||||||
|
function __k3s_asset_for_host
|
||||||
|
set -l host $argv[1]
|
||||||
|
set -l arch (ssh atlas@titan-db "ssh atlas@$host 'uname -m'" 2>/dev/null)
|
||||||
|
switch $arch
|
||||||
|
case aarch64 arm64
|
||||||
|
echo k3s-arm64
|
||||||
|
case x86_64 amd64
|
||||||
|
echo k3s
|
||||||
|
case '*'
|
||||||
|
# Default to arm64 in your environment
|
||||||
|
echo k3s-arm64
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Safer control-plane upgrade via jump host using a binary swap (recommended)
|
||||||
|
# usage: upgrade_server_via_jump <host> <version>
|
||||||
|
function upgrade_server_via_jump
|
||||||
|
set -l host $argv[1]
|
||||||
|
set -l ver $argv[2]
|
||||||
|
if test (count $argv) -lt 2
|
||||||
|
echo "usage: upgrade_server_via_jump <host> <version>"; return 1
|
||||||
|
end
|
||||||
|
|
||||||
|
set -l jump titan-db
|
||||||
|
set -l asset (__k3s_asset_for_host $host)
|
||||||
|
# If already at target, skip
|
||||||
|
set -l curr (kubectl get node $host -o jsonpath='{.status.nodeInfo.kubeletVersion}' 2>/dev/null)
|
||||||
|
if test "$curr" = "$ver"
|
||||||
|
echo "=== [$host] already at $ver; skipping"
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
echo "=== [$host] preflight: check datastore-endpoint is present and DB TCP reachable"
|
||||||
|
# 1) datastore-endpoint existence in config, env file, or unit
|
||||||
|
set -l dsn_lines (ssh atlas@$jump "ssh atlas@$host 'sudo sh -lc \" \
|
||||||
|
(test -f /etc/rancher/k3s/config.yaml && grep -E ^datastore-endpoint: /etc/rancher/k3s/config.yaml || true); \
|
||||||
|
(test -f /etc/systemd/system/k3s.service.env && grep -E ^K3S_DATASTORE_ENDPOINT= /etc/systemd/system/k3s.service.env || true); \
|
||||||
|
(test -f /etc/systemd/system/k3s.service && grep -F -- \"--datastore-endpoint=\" /etc/systemd/system/k3s.service || true) \
|
||||||
|
\"'")
|
||||||
|
|
||||||
|
if test -z "$dsn_lines"
|
||||||
|
echo "ERROR: $host has no datastore-endpoint configured (config/env/unit). Aborting."; return 2
|
||||||
|
end
|
||||||
|
if string match -q '*datastore-endpoint: ""*' -- $dsn_lines
|
||||||
|
echo "ERROR: $host datastore-endpoint is empty in config.yaml. Aborting."; return 2
|
||||||
|
end
|
||||||
|
if string match -q '*K3S_DATASTORE_ENDPOINT=""*' -- $dsn_lines
|
||||||
|
echo "ERROR: $host K3S_DATASTORE_ENDPOINT is empty in k3s.service.env. Aborting."; return 2
|
||||||
|
end
|
||||||
|
|
||||||
|
# 2) DB TCP reachability from the target
|
||||||
|
set -l dbcheck (ssh atlas@$jump "ssh atlas@$host 'command -v nc >/dev/null && nc -vz -w2 192.168.22.10 5432 >/dev/null 2>&1 && echo ok || echo skip'" 2>/dev/null)
|
||||||
|
if test "$dbcheck" != "ok" -a "$dbcheck" != "skip"
|
||||||
|
echo "ERROR: $host cannot reach 192.168.22.10:5432. Aborting."; return 3
|
||||||
|
end
|
||||||
|
|
||||||
|
echo "=== [$host] cordon + drain"
|
||||||
|
kubectl cordon $host
|
||||||
|
set -l drained 0
|
||||||
|
|
||||||
|
# Store flags as a list (not a single quoted string)
|
||||||
|
set -l drain_common --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m
|
||||||
|
|
||||||
|
# 1) Try a normal drain (respects PDBs)
|
||||||
|
if kubectl drain $host $drain_common
|
||||||
|
set drained 1
|
||||||
|
else
|
||||||
|
echo "WARN: standard drain on $host failed (likely a PDB). Retrying with --force."
|
||||||
|
# 2) Retry with --force (unmanaged pods etc.)
|
||||||
|
if kubectl drain $host $drain_common --force
|
||||||
|
set drained 1
|
||||||
|
else
|
||||||
|
echo "WARN: drain still blocked on $host. Falling back to --disable-eviction (bypass PDBs)."
|
||||||
|
# 3) Last resort: bypass PDBs entirely (deletes pods instead of Evictions; PDBs don't apply)
|
||||||
|
if kubectl drain $host $drain_common --disable-eviction --force
|
||||||
|
set drained 1
|
||||||
|
else
|
||||||
|
echo "ERROR: drain failed on $host even with --disable-eviction."
|
||||||
|
kubectl get pods -A -o wide --field-selector spec.nodeName=$host | head -n 50
|
||||||
|
return 4
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
echo "=== [$host] binary swap to $ver ($asset)"
|
||||||
|
set -l rc 0
|
||||||
|
ssh atlas@$jump "ssh atlas@$host 'set -euo pipefail
|
||||||
|
sudo systemctl stop k3s
|
||||||
|
if test -x /usr/local/bin/k3s; then
|
||||||
|
sudo cp /usr/local/bin/k3s /usr/local/bin/k3s.bak.\$(date -Iseconds)
|
||||||
|
fi
|
||||||
|
url=\"https://github.com/k3s-io/k3s/releases/download/$ver/$asset\"
|
||||||
|
sudo curl -fL -o /usr/local/bin/k3s \"\$url\"
|
||||||
|
sudo chmod +x /usr/local/bin/k3s
|
||||||
|
sudo systemctl start k3s
|
||||||
|
sleep 4
|
||||||
|
sudo k3s --version
|
||||||
|
'" ; set rc $status
|
||||||
|
|
||||||
|
if test $rc -ne 0
|
||||||
|
echo "ERROR: remote swap/start failed on $host (rc=$rc)."
|
||||||
|
if test $drained -eq 1
|
||||||
|
kubectl uncordon $host
|
||||||
|
end
|
||||||
|
return $rc
|
||||||
|
end
|
||||||
|
|
||||||
|
echo "=== [$host] wait for Ready and target version: $ver"
|
||||||
|
set -l tries 0
|
||||||
|
while true
|
||||||
|
set -l v (kubectl get node $host -o jsonpath='{.status.nodeInfo.kubeletVersion}' 2>/dev/null)
|
||||||
|
set -l r (kubectl get node $host -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null)
|
||||||
|
echo "$host -> $v Ready=$r"
|
||||||
|
if test "$v" = "$ver" -a "$r" = "True"
|
||||||
|
break
|
||||||
|
end
|
||||||
|
if test $tries -eq 0
|
||||||
|
# one-time nudge if the service came up slow
|
||||||
|
ssh atlas@$jump "ssh atlas@$host 'sudo systemctl daemon-reload; sudo systemctl restart k3s'"
|
||||||
|
end
|
||||||
|
set tries (math $tries + 1)
|
||||||
|
if test $tries -gt 100
|
||||||
|
echo "ERROR: $host did not reach Ready/$ver; showing last logs:"
|
||||||
|
ssh atlas@$jump "ssh atlas@$host 'sudo journalctl -u k3s -n 200 --no-pager | tail -n +1'"
|
||||||
|
if test $drained -eq 1
|
||||||
|
kubectl uncordon $host
|
||||||
|
end
|
||||||
|
return 5
|
||||||
|
end
|
||||||
|
sleep 3
|
||||||
|
end
|
||||||
|
|
||||||
|
echo "=== [$host] uncordon"
|
||||||
|
kubectl uncordon $host
|
||||||
|
end
|
||||||
|
|
||||||
|
# Rolling control-plane upgrade to a target version (do NOT run in parallel)
|
||||||
|
# usage: upgrade_control_plane_to <version> [hosts...]
|
||||||
|
# If hosts omitted, defaults to: titan-0b titan-0c titan-0a
|
||||||
|
function upgrade_control_plane_to
|
||||||
|
set -l ver $argv[1]
|
||||||
|
if test -z "$ver"
|
||||||
|
echo "usage: upgrade_control_plane_to <version> [titan-0b titan-0c titan-0a]"; return 1
|
||||||
|
end
|
||||||
|
set -l hosts $argv[2..-1]
|
||||||
|
if test (count $hosts) -eq 0
|
||||||
|
set hosts titan-0b titan-0c titan-0a
|
||||||
|
end
|
||||||
|
|
||||||
|
for n in $hosts
|
||||||
|
# Build union of CP nodes (master ∪ control-plane)
|
||||||
|
set -l ready_cp (begin
|
||||||
|
kubectl get nodes -l 'node-role.kubernetes.io/control-plane' \
|
||||||
|
-o jsonpath='{range .items[*]}{.metadata.name}{"|"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}'
|
||||||
|
kubectl get nodes -l 'node-role.kubernetes.io/master' \
|
||||||
|
-o jsonpath='{range .items[*]}{.metadata.name}{"|"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}'
|
||||||
|
end | sort -u | grep -E '\|True$' | grep -v $n | wc -l)
|
||||||
|
|
||||||
|
if test (math $ready_cp) -lt 1
|
||||||
|
echo "ERROR: upgrading $n would drop remaining Ready control-plane count below 1. Aborting."
|
||||||
|
return 9
|
||||||
|
end
|
||||||
|
|
||||||
|
upgrade_server_via_jump $n $ver; or return $status
|
||||||
|
end
|
||||||
|
|
||||||
|
kubectl get nodes -o wide
|
||||||
|
end
|
||||||
Loading…
x
Reference in New Issue
Block a user