From 586ceb9f4e51e47219e119ff87819dee0e26766a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 1 Sep 2025 18:40:02 -0500 Subject: [PATCH] jellyfin manual migration script --- -c | 0 ...p.fish => crypto_wallet_monero_setup.fish} | 0 ...etup.fish => crypto_wallet_sui_setup.fish} | 0 scripts/{hammer.fish => flux_hammer.fish} | 0 ...in_load.fish => jellyfin_manual_load.fish} | 0 scripts/k3s_version_update.fish | 169 ++++++++++++++++++ 6 files changed, 169 insertions(+) create mode 100644 -c rename scripts/{wallet_monero_setup.fish => crypto_wallet_monero_setup.fish} (100%) rename scripts/{wallet_sui_setup.fish => crypto_wallet_sui_setup.fish} (100%) rename scripts/{hammer.fish => flux_hammer.fish} (100%) rename scripts/{manual_jellyfin_load.fish => jellyfin_manual_load.fish} (100%) create mode 100644 scripts/k3s_version_update.fish diff --git a/-c b/-c new file mode 100644 index 0000000..e69de29 diff --git a/scripts/wallet_monero_setup.fish b/scripts/crypto_wallet_monero_setup.fish similarity index 100% rename from scripts/wallet_monero_setup.fish rename to scripts/crypto_wallet_monero_setup.fish diff --git a/scripts/wallet_sui_setup.fish b/scripts/crypto_wallet_sui_setup.fish similarity index 100% rename from scripts/wallet_sui_setup.fish rename to scripts/crypto_wallet_sui_setup.fish diff --git a/scripts/hammer.fish b/scripts/flux_hammer.fish similarity index 100% rename from scripts/hammer.fish rename to scripts/flux_hammer.fish diff --git a/scripts/manual_jellyfin_load.fish b/scripts/jellyfin_manual_load.fish similarity index 100% rename from scripts/manual_jellyfin_load.fish rename to scripts/jellyfin_manual_load.fish diff --git a/scripts/k3s_version_update.fish b/scripts/k3s_version_update.fish new file mode 100644 index 0000000..0a5f710 --- /dev/null +++ b/scripts/k3s_version_update.fish @@ -0,0 +1,169 @@ +# Pick the correct K3s asset for a remote host (arm64 vs x86_64) +function __k3s_asset_for_host + set -l host $argv[1] + set -l arch (ssh atlas@titan-db "ssh atlas@$host 'uname -m'" 2>/dev/null) + switch $arch + case aarch64 arm64 + echo k3s-arm64 + case x86_64 amd64 + echo k3s + case '*' + # Default to arm64 in your environment + echo k3s-arm64 + end +end + +# Safer control-plane upgrade via jump host using a binary swap (recommended) +# usage: upgrade_server_via_jump +function upgrade_server_via_jump + set -l host $argv[1] + set -l ver $argv[2] + if test (count $argv) -lt 2 + echo "usage: upgrade_server_via_jump "; return 1 + end + + set -l jump titan-db + set -l asset (__k3s_asset_for_host $host) + # If already at target, skip + set -l curr (kubectl get node $host -o jsonpath='{.status.nodeInfo.kubeletVersion}' 2>/dev/null) + if test "$curr" = "$ver" + echo "=== [$host] already at $ver; skipping" + return 0 + end + + echo "=== [$host] preflight: check datastore-endpoint is present and DB TCP reachable" + # 1) datastore-endpoint existence in config, env file, or unit + set -l dsn_lines (ssh atlas@$jump "ssh atlas@$host 'sudo sh -lc \" \ + (test -f /etc/rancher/k3s/config.yaml && grep -E ^datastore-endpoint: /etc/rancher/k3s/config.yaml || true); \ + (test -f /etc/systemd/system/k3s.service.env && grep -E ^K3S_DATASTORE_ENDPOINT= /etc/systemd/system/k3s.service.env || true); \ + (test -f /etc/systemd/system/k3s.service && grep -F -- \"--datastore-endpoint=\" /etc/systemd/system/k3s.service || true) \ + \"'") + + if test -z "$dsn_lines" + echo "ERROR: $host has no datastore-endpoint configured (config/env/unit). Aborting."; return 2 + end + if string match -q '*datastore-endpoint: ""*' -- $dsn_lines + echo "ERROR: $host datastore-endpoint is empty in config.yaml. Aborting."; return 2 + end + if string match -q '*K3S_DATASTORE_ENDPOINT=""*' -- $dsn_lines + echo "ERROR: $host K3S_DATASTORE_ENDPOINT is empty in k3s.service.env. Aborting."; return 2 + end + + # 2) DB TCP reachability from the target + set -l dbcheck (ssh atlas@$jump "ssh atlas@$host 'command -v nc >/dev/null && nc -vz -w2 192.168.22.10 5432 >/dev/null 2>&1 && echo ok || echo skip'" 2>/dev/null) + if test "$dbcheck" != "ok" -a "$dbcheck" != "skip" + echo "ERROR: $host cannot reach 192.168.22.10:5432. Aborting."; return 3 + end + + echo "=== [$host] cordon + drain" + kubectl cordon $host + set -l drained 0 + + # Store flags as a list (not a single quoted string) + set -l drain_common --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m + + # 1) Try a normal drain (respects PDBs) + if kubectl drain $host $drain_common + set drained 1 + else + echo "WARN: standard drain on $host failed (likely a PDB). Retrying with --force." + # 2) Retry with --force (unmanaged pods etc.) + if kubectl drain $host $drain_common --force + set drained 1 + else + echo "WARN: drain still blocked on $host. Falling back to --disable-eviction (bypass PDBs)." + # 3) Last resort: bypass PDBs entirely (deletes pods instead of Evictions; PDBs don't apply) + if kubectl drain $host $drain_common --disable-eviction --force + set drained 1 + else + echo "ERROR: drain failed on $host even with --disable-eviction." + kubectl get pods -A -o wide --field-selector spec.nodeName=$host | head -n 50 + return 4 + end + end + end + + echo "=== [$host] binary swap to $ver ($asset)" + set -l rc 0 + ssh atlas@$jump "ssh atlas@$host 'set -euo pipefail + sudo systemctl stop k3s + if test -x /usr/local/bin/k3s; then + sudo cp /usr/local/bin/k3s /usr/local/bin/k3s.bak.\$(date -Iseconds) + fi + url=\"https://github.com/k3s-io/k3s/releases/download/$ver/$asset\" + sudo curl -fL -o /usr/local/bin/k3s \"\$url\" + sudo chmod +x /usr/local/bin/k3s + sudo systemctl start k3s + sleep 4 + sudo k3s --version + '" ; set rc $status + + if test $rc -ne 0 + echo "ERROR: remote swap/start failed on $host (rc=$rc)." + if test $drained -eq 1 + kubectl uncordon $host + end + return $rc + end + + echo "=== [$host] wait for Ready and target version: $ver" + set -l tries 0 + while true + set -l v (kubectl get node $host -o jsonpath='{.status.nodeInfo.kubeletVersion}' 2>/dev/null) + set -l r (kubectl get node $host -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) + echo "$host -> $v Ready=$r" + if test "$v" = "$ver" -a "$r" = "True" + break + end + if test $tries -eq 0 + # one-time nudge if the service came up slow + ssh atlas@$jump "ssh atlas@$host 'sudo systemctl daemon-reload; sudo systemctl restart k3s'" + end + set tries (math $tries + 1) + if test $tries -gt 100 + echo "ERROR: $host did not reach Ready/$ver; showing last logs:" + ssh atlas@$jump "ssh atlas@$host 'sudo journalctl -u k3s -n 200 --no-pager | tail -n +1'" + if test $drained -eq 1 + kubectl uncordon $host + end + return 5 + end + sleep 3 + end + + echo "=== [$host] uncordon" + kubectl uncordon $host +end + +# Rolling control-plane upgrade to a target version (do NOT run in parallel) +# usage: upgrade_control_plane_to [hosts...] +# If hosts omitted, defaults to: titan-0b titan-0c titan-0a +function upgrade_control_plane_to + set -l ver $argv[1] + if test -z "$ver" + echo "usage: upgrade_control_plane_to [titan-0b titan-0c titan-0a]"; return 1 + end + set -l hosts $argv[2..-1] + if test (count $hosts) -eq 0 + set hosts titan-0b titan-0c titan-0a + end + + for n in $hosts + # Build union of CP nodes (master ∪ control-plane) + set -l ready_cp (begin + kubectl get nodes -l 'node-role.kubernetes.io/control-plane' \ + -o jsonpath='{range .items[*]}{.metadata.name}{"|"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' + kubectl get nodes -l 'node-role.kubernetes.io/master' \ + -o jsonpath='{range .items[*]}{.metadata.name}{"|"}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' + end | sort -u | grep -E '\|True$' | grep -v $n | wc -l) + + if test (math $ready_cp) -lt 1 + echo "ERROR: upgrading $n would drop remaining Ready control-plane count below 1. Aborting." + return 9 + end + + upgrade_server_via_jump $n $ver; or return $status + end + + kubectl get nodes -o wide +end