titan-iac/scripts/flux_hammer.fish

313 lines
12 KiB
Fish
Raw Normal View History

2025-08-19 22:15:57 -05:00
#!/usr/bin/env fish
2025-08-21 07:41:55 -05:00
# Flux "hammer": disable KS/HR gates, force reconciling to latest Git/Helm artifacts,
# and optionally delete a StatefulSet to unblock immutable changes.
# Requires: fish 3.x+, kubectl, flux >= 2.x
# -------- colors / helpers --------
set -l RED (set_color red); set -l GRN (set_color green)
set -l YEL (set_color yellow); set -l CYA (set_color cyan)
set -l RST (set_color normal)
function _info; echo "$CYA$argv$RST"; end
function _ok; echo "$GRN$argv$RST"; end
function _warn; echo "$YEL$argv$RST"; end
function _err; echo "$RED$argv$RST" >&2; end
function _dbg
if test "$VERBOSE" = "1"
echo "$argv"
2025-08-19 22:15:57 -05:00
end
end
2025-08-21 07:41:55 -05:00
# -------- check argparse exists --------
if not type -q argparse
_err "fish >= 3.2 with 'argparse' builtin is required. You have: "(fish --version)
exit 1
end
2025-08-19 22:15:57 -05:00
2025-08-21 07:41:55 -05:00
# -------- small helper --------
function _split_ref -a ref default_ns
if test -z "$ref"
# Return two empty lines so callers can always read [1] and [2]
printf "\n\n"
return
end
2025-08-19 22:15:57 -05:00
2025-08-21 07:41:55 -05:00
if string match -rq '/' -- "$ref"
# Split only once at the first slash
set -l parts (string split -m 1 '/' -- "$ref")
# IMPORTANT: print each piece on its own line so command substitution
# returns two separate list items.
printf "%s\n%s\n" $parts[1] $parts[2]
else
printf "%s\n%s\n" $default_ns $ref
end
end
2025-08-19 22:15:57 -05:00
2025-08-21 07:41:55 -05:00
# ---- robust arg parse (short+long), then echo raw flags when --verbose -------
set -l KS ""; set -l HR ""; set -l REPO ""; set -l STS ""
set -l VERBOSE 0; set -l NO_KS 0; set -l NO_HR 0; set -l DELETE_STS 0; set -l RESTORE 0
2025-08-19 22:15:57 -05:00
2025-08-21 07:41:55 -05:00
# Use both short and long names; fish creates _flag_<long> variables.
argparse \
'h/help' \
'v/verbose' \
'K/ks=' \
'H/hr=' \
'R/helmrepo=' \
'S/sts=' \
'D/delete-sts' \
'r/restore' \
'X/no-ks' \
'Y/no-hr' -- $argv
or begin
_err "argparse failed (bad flags/values?)"
exit 1
end
2025-08-19 22:15:57 -05:00
2025-08-21 07:41:55 -05:00
# Copy flags into our own variables (arrays safe; take first element)
set -q _flag_verbose; and set VERBOSE 1
set -q _flag_ks; and set KS "$_flag_ks[1]"
set -q _flag_hr; and set HR "$_flag_hr[1]"
set -q _flag_helmrepo; and set REPO "$_flag_helmrepo[1]"
set -q _flag_sts; and set STS "$_flag_sts[1]"
set -q _flag_delete_sts; and set DELETE_STS 1
set -q _flag_restore; and set RESTORE 1
set -q _flag_no_ks; and set NO_KS 1
set -q _flag_no_hr; and set NO_HR 1
2025-08-19 22:15:57 -05:00
2025-08-21 07:41:55 -05:00
if test $VERBOSE -eq 1
_info "raw flags:"
_info " KS='$KS' HR='$HR' REPO='$REPO' STS='$STS'"
_info " DELETE_STS=$DELETE_STS RESTORE=$RESTORE NO_KS=$NO_KS NO_HR=$NO_HR"
end
# ---- resolve refs -----------------------------------------------------------
set -l ks_ns ""; set -l ks_name ""
if test -n "$KS"
set -l t (_split_ref "$KS" flux-system); set ks_ns $t[1]; set ks_name $t[2]
set ks_ns $t[1]; set ks_name $t[2]
end
set -l hr_ns ""; set -l hr_name ""
if test -n "$HR"
set -l t (_split_ref "$HR" default); set hr_ns $t[1]; set hr_name $t[2]
set hr_ns $t[1]; set hr_name $t[2]
end
# convenience for common case (vault)
if test "$hr_ns" = "default" -a "$hr_name" = "vault"
set hr_ns vault
end
set -l repo_ns ""; set -l repo_name ""
if test -n "$REPO"
set -l t (_split_ref "$REPO" flux-system); set repo_ns $t[1]; set repo_name $t[2]
set repo_ns $t[1]; set repo_name $t[2]
end
set -l sts_name "$STS"
if test "$VERBOSE" = "1"
_info "resolve: KS='$KS' → ks_ns='$ks_ns' ks_name='$ks_name'"
_info "resolve: HR='$HR' → hr_ns='$hr_ns' hr_name='$hr_name'"
_info "resolve: REPO='$REPO' → repo_ns='$repo_ns' repo_name='$repo_name'"
end
# ---- banner -----------------------------------------------------------------
set -l FLUX_VER (flux --version 2>/dev/null | string trim)
set -l KCTX (kubectl config current-context)
_info "► Flux: $FLUX_VER"
_info "► Kube context: $KCTX"
# Build nice single-line targets string (avoid odd spacing)
set -l ks_disp "(none)"
set -l hr_disp "(none)"
set -l repo_disp "(none)"
set -l sts_disp "(auto)"
if test -n "$ks_name"
set ks_disp "$ks_ns/$ks_name"
end
if test -n "$hr_name"
set hr_disp "$hr_ns/$hr_name"
end
if test -n "$repo_name"
set repo_disp "$repo_ns/$repo_name"
end
if test -n "$sts_name"
if test -n "$hr_ns"
set sts_disp "$hr_ns/$sts_name"
else
set sts_disp "$sts_name"
end
end
_info "► Targets: KS=$ks_disp HR=$hr_disp REPO=$repo_disp STS=$sts_disp delete-sts=$DELETE_STS verbose=$VERBOSE"
# Hard stop if nothing was requested to hammer
if test -z "$ks_name" -a -z "$hr_name"
_err "No --ks or --hr specified. Nothing to hammer."
exit 1
end
# -------- backup specs (besteffort) --------
set -l BACK_DIR (mktemp -d /tmp/flux-hammer.XXXXXX)
if test -n "$ks_name"
_dbg "+ kubectl -n $ks_ns get kustomization $ks_name -o yaml > $BACK_DIR/ks-$ks_ns-$ks_name.yaml"
kubectl -n "$ks_ns" get kustomization "$ks_name" -o yaml >"$BACK_DIR/ks-$ks_ns-$ks_name.yaml" 2>/dev/null
end
if test -n "$hr_name"
_dbg "+ kubectl -n $hr_ns get helmrelease $hr_name -o yaml > $BACK_DIR/hr-$hr_ns-$hr_name.yaml"
kubectl -n "$hr_ns" get helmrelease "$hr_name" -o yaml >"$BACK_DIR/hr-$hr_ns-$hr_name.yaml" 2>/dev/null
end
_ok "✔ Backed up current specs to $BACK_DIR"
# -------- suspend (so gates dont fight us) --------
if test $NO_KS -eq 0 -a -n "$ks_name"
_dbg "+ flux suspend kustomization $ks_name -n $ks_ns"
flux suspend kustomization $ks_name -n $ks_ns >/dev/null 2>&1
end
if test $NO_HR -eq 0 -a -n "$hr_name"
_dbg "+ flux suspend helmrelease $hr_name -n $hr_ns"
flux suspend helmrelease $hr_name -n $hr_ns >/dev/null 2>&1
end
_ok "✔ Suspended (besteffort)"
# -------- KS: disable ALL gates & demand reconcile --------
if test $NO_KS -eq 0 -a -n "$ks_name"
_info "► KS: gateoff patch (spec.force=true, spec.wait=false, spec.healthChecks=[])"
_dbg "+ kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p '{\"spec\":{\"force\":true,\"wait\":false,\"healthChecks\":[]}}'"
kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p \
'{"spec":{"force":true,"wait":false,"healthChecks":[]}}' >/dev/null 2>&1
set -l now (date --iso-8601=seconds)
_dbg "+ annotate KS requestedAt=$now forceAt=$now"
kubectl -n $ks_ns annotate kustomization $ks_name reconcile.fluxcd.io/requestedAt="$now" --overwrite >/dev/null 2>&1
kubectl -n $ks_ns annotate kustomization $ks_name reconcile.fluxcd.io/forceAt="$now" --overwrite >/dev/null 2>&1
end
# -------- HR: force upgrade & disable waits --------
if test $NO_HR -eq 0 -a -n "$hr_name"
_info "► HR: force patch (upgrade.force, install/upgrade.disableWait, retries=0)"
_dbg "+ kubectl -n $hr_ns patch helmrelease $hr_name ... (disableWait/force)"
kubectl -n $hr_ns patch helmrelease $hr_name --type=merge -p \
'{"spec":{"upgrade":{"force":true,"disableWait":true,"remediation":{"retries":0}},"install":{"disableWait":true}}}' >/dev/null 2>&1
set -l now (date --iso-8601=seconds)
_dbg "+ annotate HR requestedAt=$now forceAt=$now"
kubectl -n $hr_ns annotate helmrelease $hr_name reconcile.fluxcd.io/requestedAt="$now" --overwrite >/dev/null 2>&1
kubectl -n $hr_ns annotate helmrelease $hr_name reconcile.fluxcd.io/forceAt="$now" --overwrite >/dev/null 2>&1
end
# -------- reconcile sources first --------
_info "► Reconcile GitRepository flux-system/flux-system"
_dbg "+ flux reconcile source git flux-system -n flux-system --timeout=2m"
flux reconcile source git flux-system -n flux-system --timeout=2m >/dev/null 2>&1
if test -n "$repo_name"
_info "► Reconcile HelmRepository $repo_ns/$repo_name"
_dbg "+ flux reconcile source helm $repo_name -n $repo_ns --timeout=2m"
flux reconcile source helm $repo_name -n $repo_ns --timeout=2m >/dev/null 2>&1
end
# -------- resume KS and force it to pick newest artifact --------
set -l latest_rev (kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.artifact.revision}' 2>/dev/null)
if test $NO_KS -eq 0 -a -n "$ks_name"
_info "► KS: resume + reconcile (with source)"
_dbg "+ flux resume kustomization $ks_name -n $ks_ns"
flux resume kustomization $ks_name -n $ks_ns >/dev/null 2>&1
_dbg "+ flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m"
flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m >/dev/null 2>&1
# Poll for newest revision (dont block on health)
set -l okrev 0
for i in (seq 1 30)
set -l ksrev (kubectl -n $ks_ns get kustomization $ks_name -o jsonpath='{.status.lastAppliedRevision}' 2>/dev/null)
_dbg " poll[$i]: latest_rev='$latest_rev' ksrev='$ksrev'"
if test -n "$ksrev"
if string match -rq -- "$latest_rev" "$ksrev"
set okrev 1; break
end
end
# extra nudge midway
if test $i -eq 12 -o $i -eq 24
set -l nonce (date +%s)
_dbg " nudge: set annotation hammer.nonce=$nonce and reconcile again"
kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p \
'{"metadata":{"annotations":{"hammer.nonce":"'"$nonce"'"}}}' >/dev/null 2>&1
flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m >/dev/null 2>&1
end
sleep 2
end
if test $okrev -eq 1
_ok "✔ KS now shows revision: $latest_rev"
else
_warn "⚠ KS has not reported $latest_rev; gates are OFF so the apply should still proceed."
end
end
# -------- optionally delete the StatefulSet (PVCs retained) --------
if test $NO_HR -eq 0 -a -n "$hr_name" -a $DELETE_STS -eq 1
set -l sts_to_del "$sts_name"
if test -z "$sts_to_del"
_dbg "autodetect STS: by instance/name/app labels"
set sts_to_del (kubectl -n $hr_ns get sts -l app.kubernetes.io/instance=$hr_name -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if test -z "$sts_to_del"
set sts_to_del (kubectl -n $hr_ns get sts $hr_name -o jsonpath='{.metadata.name}' 2>/dev/null)
end
if test -z "$sts_to_del"
set sts_to_del (kubectl -n $hr_ns get sts -l app.kubernetes.io/name=$hr_name -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
end
end
if test -n "$sts_to_del"
_warn "⚠ Deleting StatefulSet $hr_ns/$sts_to_del (PVCs retained)."
_dbg "+ kubectl -n $hr_ns scale sts $sts_to_del --replicas=0 --timeout=2m"
kubectl -n $hr_ns scale sts $sts_to_del --replicas=0 --timeout=2m >/dev/null 2>&1
_dbg "+ kubectl -n $hr_ns delete sts $sts_to_del --wait=true"
kubectl -n $hr_ns delete sts $sts_to_del --wait=true >/dev/null 2>&1
else
_warn "⚠ Could not detect a StatefulSet; use --sts <name> if needed."
end
end
# -------- resume HR and reconcile (try --force; fallback) --------
if test $NO_HR -eq 0 -a -n "$hr_name"
_info "► HR: resume + reconcile (with source, try --force)"
_dbg "+ flux resume helmrelease $hr_name -n $hr_ns"
flux resume helmrelease $hr_name -n $hr_ns >/dev/null 2>&1
_dbg "+ flux reconcile helmrelease $hr_name -n $hr_ns --with-source --force --timeout=10m"
flux reconcile helmrelease $hr_name -n $hr_ns --with-source --force --timeout=10m >/dev/null 2>&1
set -l rc $status
if test $rc -ne 0
_warn "HR reconcile with --force failed (likely unsupported in this build); retrying without --force."
_dbg "+ flux reconcile helmrelease $hr_name -n $hr_ns --with-source --timeout=10m"
flux reconcile helmrelease $hr_name -n $hr_ns --with-source --timeout=10m >/dev/null 2>&1
end
end
# -------- status summary --------
_info "► Status:"
if test $NO_KS -eq 0 -a -n "$ks_name"
flux get kustomizations $ks_name -n $ks_ns
end
if test $NO_HR -eq 0 -a -n "$hr_name"
flux get helmreleases -n $hr_ns | string match -r -- "NAME|$hr_name"
end
# -------- optional restore (turn gates back on from backup) --------
if test $RESTORE -eq 1
_info "► Restoring original specs"
if test -n "$ks_name" -a -s "$BACK_DIR/ks-$ks_ns-$ks_name.yaml"
_dbg "+ kubectl apply -f $BACK_DIR/ks-$ks_ns-$ks_name.yaml"
kubectl apply -f "$BACK_DIR/ks-$ks_ns-$ks_name.yaml" >/dev/null 2>&1
end
if test -n "$hr_name" -a -s "$BACK_DIR/hr-$hr_ns-$hr_name.yaml"
_dbg "+ kubectl apply -f $BACK_DIR/hr-$hr_ns-$hr_name.yaml"
kubectl apply -f "$BACK_DIR/hr-$hr_ns-$hr_name.yaml" >/dev/null 2>&1
end
_ok "✔ Restored"
2025-08-19 22:15:57 -05:00
else
2025-08-21 07:41:55 -05:00
_warn "⚠ Not restoring KS/HR patches; gates remain OFF until you revert them in Git or repatch."
2025-08-19 22:15:57 -05:00
end
2025-08-21 07:41:55 -05:00
_ok "✔ Done."