313 lines
12 KiB
Fish
Executable File
313 lines
12 KiB
Fish
Executable File
#!/usr/bin/env fish
|
||
# Flux "hammer": disable KS/HR gates, force reconciling to latest Git/Helm artifacts,
|
||
# and optionally delete a StatefulSet to unblock immutable changes.
|
||
# Requires: fish 3.x+, kubectl, flux >= 2.x
|
||
|
||
# -------- colors / helpers --------
|
||
set -l RED (set_color red); set -l GRN (set_color green)
|
||
set -l YEL (set_color yellow); set -l CYA (set_color cyan)
|
||
set -l RST (set_color normal)
|
||
function _info; echo "$CYA$argv$RST"; end
|
||
function _ok; echo "$GRN$argv$RST"; end
|
||
function _warn; echo "$YEL$argv$RST"; end
|
||
function _err; echo "$RED$argv$RST" >&2; end
|
||
function _dbg
|
||
if test "$VERBOSE" = "1"
|
||
echo "$argv"
|
||
end
|
||
end
|
||
|
||
# -------- check argparse exists --------
|
||
if not type -q argparse
|
||
_err "fish >= 3.2 with 'argparse' builtin is required. You have: "(fish --version)
|
||
exit 1
|
||
end
|
||
|
||
# -------- small helper --------
|
||
function _split_ref -a ref default_ns
|
||
if test -z "$ref"
|
||
# Return two empty lines so callers can always read [1] and [2]
|
||
printf "\n\n"
|
||
return
|
||
end
|
||
|
||
if string match -rq '/' -- "$ref"
|
||
# Split only once at the first slash
|
||
set -l parts (string split -m 1 '/' -- "$ref")
|
||
# IMPORTANT: print each piece on its own line so command substitution
|
||
# returns two separate list items.
|
||
printf "%s\n%s\n" $parts[1] $parts[2]
|
||
else
|
||
printf "%s\n%s\n" $default_ns $ref
|
||
end
|
||
end
|
||
|
||
# ---- robust arg parse (short+long), then echo raw flags when --verbose -------
|
||
set -l KS ""; set -l HR ""; set -l REPO ""; set -l STS ""
|
||
set -l VERBOSE 0; set -l NO_KS 0; set -l NO_HR 0; set -l DELETE_STS 0; set -l RESTORE 0
|
||
|
||
# Use both short and long names; fish creates _flag_<long> variables.
|
||
argparse \
|
||
'h/help' \
|
||
'v/verbose' \
|
||
'K/ks=' \
|
||
'H/hr=' \
|
||
'R/helmrepo=' \
|
||
'S/sts=' \
|
||
'D/delete-sts' \
|
||
'r/restore' \
|
||
'X/no-ks' \
|
||
'Y/no-hr' -- $argv
|
||
or begin
|
||
_err "argparse failed (bad flags/values?)"
|
||
exit 1
|
||
end
|
||
|
||
# Copy flags into our own variables (arrays safe; take first element)
|
||
set -q _flag_verbose; and set VERBOSE 1
|
||
set -q _flag_ks; and set KS "$_flag_ks[1]"
|
||
set -q _flag_hr; and set HR "$_flag_hr[1]"
|
||
set -q _flag_helmrepo; and set REPO "$_flag_helmrepo[1]"
|
||
set -q _flag_sts; and set STS "$_flag_sts[1]"
|
||
set -q _flag_delete_sts; and set DELETE_STS 1
|
||
set -q _flag_restore; and set RESTORE 1
|
||
set -q _flag_no_ks; and set NO_KS 1
|
||
set -q _flag_no_hr; and set NO_HR 1
|
||
|
||
if test $VERBOSE -eq 1
|
||
_info "raw flags:"
|
||
_info " KS='$KS' HR='$HR' REPO='$REPO' STS='$STS'"
|
||
_info " DELETE_STS=$DELETE_STS RESTORE=$RESTORE NO_KS=$NO_KS NO_HR=$NO_HR"
|
||
end
|
||
|
||
# ---- resolve refs -----------------------------------------------------------
|
||
set -l ks_ns ""; set -l ks_name ""
|
||
if test -n "$KS"
|
||
set -l t (_split_ref "$KS" flux-system); set ks_ns $t[1]; set ks_name $t[2]
|
||
set ks_ns $t[1]; set ks_name $t[2]
|
||
end
|
||
|
||
set -l hr_ns ""; set -l hr_name ""
|
||
if test -n "$HR"
|
||
set -l t (_split_ref "$HR" default); set hr_ns $t[1]; set hr_name $t[2]
|
||
set hr_ns $t[1]; set hr_name $t[2]
|
||
end
|
||
# convenience for common case (vault)
|
||
if test "$hr_ns" = "default" -a "$hr_name" = "vault"
|
||
set hr_ns vault
|
||
end
|
||
|
||
set -l repo_ns ""; set -l repo_name ""
|
||
if test -n "$REPO"
|
||
set -l t (_split_ref "$REPO" flux-system); set repo_ns $t[1]; set repo_name $t[2]
|
||
set repo_ns $t[1]; set repo_name $t[2]
|
||
end
|
||
|
||
set -l sts_name "$STS"
|
||
|
||
if test "$VERBOSE" = "1"
|
||
_info "resolve: KS='$KS' → ks_ns='$ks_ns' ks_name='$ks_name'"
|
||
_info "resolve: HR='$HR' → hr_ns='$hr_ns' hr_name='$hr_name'"
|
||
_info "resolve: REPO='$REPO' → repo_ns='$repo_ns' repo_name='$repo_name'"
|
||
end
|
||
|
||
# ---- banner -----------------------------------------------------------------
|
||
set -l FLUX_VER (flux --version 2>/dev/null | string trim)
|
||
set -l KCTX (kubectl config current-context)
|
||
_info "► Flux: $FLUX_VER"
|
||
_info "► Kube context: $KCTX"
|
||
|
||
# Build nice single-line targets string (avoid odd spacing)
|
||
set -l ks_disp "(none)"
|
||
set -l hr_disp "(none)"
|
||
set -l repo_disp "(none)"
|
||
set -l sts_disp "(auto)"
|
||
|
||
if test -n "$ks_name"
|
||
set ks_disp "$ks_ns/$ks_name"
|
||
end
|
||
if test -n "$hr_name"
|
||
set hr_disp "$hr_ns/$hr_name"
|
||
end
|
||
if test -n "$repo_name"
|
||
set repo_disp "$repo_ns/$repo_name"
|
||
end
|
||
if test -n "$sts_name"
|
||
if test -n "$hr_ns"
|
||
set sts_disp "$hr_ns/$sts_name"
|
||
else
|
||
set sts_disp "$sts_name"
|
||
end
|
||
end
|
||
|
||
_info "► Targets: KS=$ks_disp HR=$hr_disp REPO=$repo_disp STS=$sts_disp delete-sts=$DELETE_STS verbose=$VERBOSE"
|
||
|
||
# Hard stop if nothing was requested to hammer
|
||
if test -z "$ks_name" -a -z "$hr_name"
|
||
_err "No --ks or --hr specified. Nothing to hammer."
|
||
exit 1
|
||
end
|
||
|
||
# -------- backup specs (best‑effort) --------
|
||
set -l BACK_DIR (mktemp -d /tmp/flux-hammer.XXXXXX)
|
||
if test -n "$ks_name"
|
||
_dbg "+ kubectl -n $ks_ns get kustomization $ks_name -o yaml > $BACK_DIR/ks-$ks_ns-$ks_name.yaml"
|
||
kubectl -n "$ks_ns" get kustomization "$ks_name" -o yaml >"$BACK_DIR/ks-$ks_ns-$ks_name.yaml" 2>/dev/null
|
||
end
|
||
if test -n "$hr_name"
|
||
_dbg "+ kubectl -n $hr_ns get helmrelease $hr_name -o yaml > $BACK_DIR/hr-$hr_ns-$hr_name.yaml"
|
||
kubectl -n "$hr_ns" get helmrelease "$hr_name" -o yaml >"$BACK_DIR/hr-$hr_ns-$hr_name.yaml" 2>/dev/null
|
||
end
|
||
_ok "✔ Backed up current specs to $BACK_DIR"
|
||
|
||
# -------- suspend (so gates don’t fight us) --------
|
||
if test $NO_KS -eq 0 -a -n "$ks_name"
|
||
_dbg "+ flux suspend kustomization $ks_name -n $ks_ns"
|
||
flux suspend kustomization $ks_name -n $ks_ns >/dev/null 2>&1
|
||
end
|
||
if test $NO_HR -eq 0 -a -n "$hr_name"
|
||
_dbg "+ flux suspend helmrelease $hr_name -n $hr_ns"
|
||
flux suspend helmrelease $hr_name -n $hr_ns >/dev/null 2>&1
|
||
end
|
||
_ok "✔ Suspended (best‑effort)"
|
||
|
||
# -------- KS: disable ALL gates & demand reconcile --------
|
||
if test $NO_KS -eq 0 -a -n "$ks_name"
|
||
_info "► KS: gate‑off patch (spec.force=true, spec.wait=false, spec.healthChecks=[])"
|
||
_dbg "+ kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p '{\"spec\":{\"force\":true,\"wait\":false,\"healthChecks\":[]}}'"
|
||
kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p \
|
||
'{"spec":{"force":true,"wait":false,"healthChecks":[]}}' >/dev/null 2>&1
|
||
|
||
set -l now (date --iso-8601=seconds)
|
||
_dbg "+ annotate KS requestedAt=$now forceAt=$now"
|
||
kubectl -n $ks_ns annotate kustomization $ks_name reconcile.fluxcd.io/requestedAt="$now" --overwrite >/dev/null 2>&1
|
||
kubectl -n $ks_ns annotate kustomization $ks_name reconcile.fluxcd.io/forceAt="$now" --overwrite >/dev/null 2>&1
|
||
end
|
||
|
||
# -------- HR: force upgrade & disable waits --------
|
||
if test $NO_HR -eq 0 -a -n "$hr_name"
|
||
_info "► HR: force patch (upgrade.force, install/upgrade.disableWait, retries=0)"
|
||
_dbg "+ kubectl -n $hr_ns patch helmrelease $hr_name ... (disableWait/force)"
|
||
kubectl -n $hr_ns patch helmrelease $hr_name --type=merge -p \
|
||
'{"spec":{"upgrade":{"force":true,"disableWait":true,"remediation":{"retries":0}},"install":{"disableWait":true}}}' >/dev/null 2>&1
|
||
|
||
set -l now (date --iso-8601=seconds)
|
||
_dbg "+ annotate HR requestedAt=$now forceAt=$now"
|
||
kubectl -n $hr_ns annotate helmrelease $hr_name reconcile.fluxcd.io/requestedAt="$now" --overwrite >/dev/null 2>&1
|
||
kubectl -n $hr_ns annotate helmrelease $hr_name reconcile.fluxcd.io/forceAt="$now" --overwrite >/dev/null 2>&1
|
||
end
|
||
|
||
# -------- reconcile sources first --------
|
||
_info "► Reconcile GitRepository flux-system/flux-system"
|
||
_dbg "+ flux reconcile source git flux-system -n flux-system --timeout=2m"
|
||
flux reconcile source git flux-system -n flux-system --timeout=2m >/dev/null 2>&1
|
||
|
||
if test -n "$repo_name"
|
||
_info "► Reconcile HelmRepository $repo_ns/$repo_name"
|
||
_dbg "+ flux reconcile source helm $repo_name -n $repo_ns --timeout=2m"
|
||
flux reconcile source helm $repo_name -n $repo_ns --timeout=2m >/dev/null 2>&1
|
||
end
|
||
|
||
# -------- resume KS and force it to pick newest artifact --------
|
||
set -l latest_rev (kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.artifact.revision}' 2>/dev/null)
|
||
if test $NO_KS -eq 0 -a -n "$ks_name"
|
||
_info "► KS: resume + reconcile (with source)"
|
||
_dbg "+ flux resume kustomization $ks_name -n $ks_ns"
|
||
flux resume kustomization $ks_name -n $ks_ns >/dev/null 2>&1
|
||
_dbg "+ flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m"
|
||
flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m >/dev/null 2>&1
|
||
|
||
# Poll for newest revision (don’t block on health)
|
||
set -l okrev 0
|
||
for i in (seq 1 30)
|
||
set -l ksrev (kubectl -n $ks_ns get kustomization $ks_name -o jsonpath='{.status.lastAppliedRevision}' 2>/dev/null)
|
||
_dbg " poll[$i]: latest_rev='$latest_rev' ksrev='$ksrev'"
|
||
if test -n "$ksrev"
|
||
if string match -rq -- "$latest_rev" "$ksrev"
|
||
set okrev 1; break
|
||
end
|
||
end
|
||
# extra nudge mid‑way
|
||
if test $i -eq 12 -o $i -eq 24
|
||
set -l nonce (date +%s)
|
||
_dbg " nudge: set annotation hammer.nonce=$nonce and reconcile again"
|
||
kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p \
|
||
'{"metadata":{"annotations":{"hammer.nonce":"'"$nonce"'"}}}' >/dev/null 2>&1
|
||
flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m >/dev/null 2>&1
|
||
end
|
||
sleep 2
|
||
end
|
||
if test $okrev -eq 1
|
||
_ok "✔ KS now shows revision: $latest_rev"
|
||
else
|
||
_warn "⚠ KS has not reported $latest_rev; gates are OFF so the apply should still proceed."
|
||
end
|
||
end
|
||
|
||
# -------- optionally delete the StatefulSet (PVCs retained) --------
|
||
if test $NO_HR -eq 0 -a -n "$hr_name" -a $DELETE_STS -eq 1
|
||
set -l sts_to_del "$sts_name"
|
||
if test -z "$sts_to_del"
|
||
_dbg "auto‑detect STS: by instance/name/app labels"
|
||
set sts_to_del (kubectl -n $hr_ns get sts -l app.kubernetes.io/instance=$hr_name -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||
if test -z "$sts_to_del"
|
||
set sts_to_del (kubectl -n $hr_ns get sts $hr_name -o jsonpath='{.metadata.name}' 2>/dev/null)
|
||
end
|
||
if test -z "$sts_to_del"
|
||
set sts_to_del (kubectl -n $hr_ns get sts -l app.kubernetes.io/name=$hr_name -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||
end
|
||
end
|
||
if test -n "$sts_to_del"
|
||
_warn "⚠ Deleting StatefulSet $hr_ns/$sts_to_del (PVCs retained)."
|
||
_dbg "+ kubectl -n $hr_ns scale sts $sts_to_del --replicas=0 --timeout=2m"
|
||
kubectl -n $hr_ns scale sts $sts_to_del --replicas=0 --timeout=2m >/dev/null 2>&1
|
||
_dbg "+ kubectl -n $hr_ns delete sts $sts_to_del --wait=true"
|
||
kubectl -n $hr_ns delete sts $sts_to_del --wait=true >/dev/null 2>&1
|
||
else
|
||
_warn "⚠ Could not detect a StatefulSet; use --sts <name> if needed."
|
||
end
|
||
end
|
||
|
||
# -------- resume HR and reconcile (try --force; fallback) --------
|
||
if test $NO_HR -eq 0 -a -n "$hr_name"
|
||
_info "► HR: resume + reconcile (with source, try --force)"
|
||
_dbg "+ flux resume helmrelease $hr_name -n $hr_ns"
|
||
flux resume helmrelease $hr_name -n $hr_ns >/dev/null 2>&1
|
||
|
||
_dbg "+ flux reconcile helmrelease $hr_name -n $hr_ns --with-source --force --timeout=10m"
|
||
flux reconcile helmrelease $hr_name -n $hr_ns --with-source --force --timeout=10m >/dev/null 2>&1
|
||
set -l rc $status
|
||
if test $rc -ne 0
|
||
_warn "HR reconcile with --force failed (likely unsupported in this build); retrying without --force."
|
||
_dbg "+ flux reconcile helmrelease $hr_name -n $hr_ns --with-source --timeout=10m"
|
||
flux reconcile helmrelease $hr_name -n $hr_ns --with-source --timeout=10m >/dev/null 2>&1
|
||
end
|
||
end
|
||
|
||
# -------- status summary --------
|
||
_info "► Status:"
|
||
if test $NO_KS -eq 0 -a -n "$ks_name"
|
||
flux get kustomizations $ks_name -n $ks_ns
|
||
end
|
||
if test $NO_HR -eq 0 -a -n "$hr_name"
|
||
flux get helmreleases -n $hr_ns | string match -r -- "NAME|$hr_name"
|
||
end
|
||
|
||
# -------- optional restore (turn gates back on from backup) --------
|
||
if test $RESTORE -eq 1
|
||
_info "► Restoring original specs"
|
||
if test -n "$ks_name" -a -s "$BACK_DIR/ks-$ks_ns-$ks_name.yaml"
|
||
_dbg "+ kubectl apply -f $BACK_DIR/ks-$ks_ns-$ks_name.yaml"
|
||
kubectl apply -f "$BACK_DIR/ks-$ks_ns-$ks_name.yaml" >/dev/null 2>&1
|
||
end
|
||
if test -n "$hr_name" -a -s "$BACK_DIR/hr-$hr_ns-$hr_name.yaml"
|
||
_dbg "+ kubectl apply -f $BACK_DIR/hr-$hr_ns-$hr_name.yaml"
|
||
kubectl apply -f "$BACK_DIR/hr-$hr_ns-$hr_name.yaml" >/dev/null 2>&1
|
||
end
|
||
_ok "✔ Restored"
|
||
else
|
||
_warn "⚠ Not restoring KS/HR patches; gates remain OFF until you revert them in Git or re‑patch."
|
||
end
|
||
|
||
_ok "✔ Done."
|