titan-iac/scripts/flux_hammer.fish

313 lines
12 KiB
Fish
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env fish
# Flux "hammer": disable KS/HR gates, force reconciling to latest Git/Helm artifacts,
# and optionally delete a StatefulSet to unblock immutable changes.
# Requires: fish 3.x+, kubectl, flux >= 2.x
# -------- colors / helpers --------
set -l RED (set_color red); set -l GRN (set_color green)
set -l YEL (set_color yellow); set -l CYA (set_color cyan)
set -l RST (set_color normal)
function _info; echo "$CYA$argv$RST"; end
function _ok; echo "$GRN$argv$RST"; end
function _warn; echo "$YEL$argv$RST"; end
function _err; echo "$RED$argv$RST" >&2; end
function _dbg
if test "$VERBOSE" = "1"
echo "$argv"
end
end
# -------- check argparse exists --------
if not type -q argparse
_err "fish >= 3.2 with 'argparse' builtin is required. You have: "(fish --version)
exit 1
end
# -------- small helper --------
function _split_ref -a ref default_ns
if test -z "$ref"
# Return two empty lines so callers can always read [1] and [2]
printf "\n\n"
return
end
if string match -rq '/' -- "$ref"
# Split only once at the first slash
set -l parts (string split -m 1 '/' -- "$ref")
# IMPORTANT: print each piece on its own line so command substitution
# returns two separate list items.
printf "%s\n%s\n" $parts[1] $parts[2]
else
printf "%s\n%s\n" $default_ns $ref
end
end
# ---- robust arg parse (short+long), then echo raw flags when --verbose -------
set -l KS ""; set -l HR ""; set -l REPO ""; set -l STS ""
set -l VERBOSE 0; set -l NO_KS 0; set -l NO_HR 0; set -l DELETE_STS 0; set -l RESTORE 0
# Use both short and long names; fish creates _flag_<long> variables.
argparse \
'h/help' \
'v/verbose' \
'K/ks=' \
'H/hr=' \
'R/helmrepo=' \
'S/sts=' \
'D/delete-sts' \
'r/restore' \
'X/no-ks' \
'Y/no-hr' -- $argv
or begin
_err "argparse failed (bad flags/values?)"
exit 1
end
# Copy flags into our own variables (arrays safe; take first element)
set -q _flag_verbose; and set VERBOSE 1
set -q _flag_ks; and set KS "$_flag_ks[1]"
set -q _flag_hr; and set HR "$_flag_hr[1]"
set -q _flag_helmrepo; and set REPO "$_flag_helmrepo[1]"
set -q _flag_sts; and set STS "$_flag_sts[1]"
set -q _flag_delete_sts; and set DELETE_STS 1
set -q _flag_restore; and set RESTORE 1
set -q _flag_no_ks; and set NO_KS 1
set -q _flag_no_hr; and set NO_HR 1
if test $VERBOSE -eq 1
_info "raw flags:"
_info " KS='$KS' HR='$HR' REPO='$REPO' STS='$STS'"
_info " DELETE_STS=$DELETE_STS RESTORE=$RESTORE NO_KS=$NO_KS NO_HR=$NO_HR"
end
# ---- resolve refs -----------------------------------------------------------
set -l ks_ns ""; set -l ks_name ""
if test -n "$KS"
set -l t (_split_ref "$KS" flux-system); set ks_ns $t[1]; set ks_name $t[2]
set ks_ns $t[1]; set ks_name $t[2]
end
set -l hr_ns ""; set -l hr_name ""
if test -n "$HR"
set -l t (_split_ref "$HR" default); set hr_ns $t[1]; set hr_name $t[2]
set hr_ns $t[1]; set hr_name $t[2]
end
# convenience for common case (vault)
if test "$hr_ns" = "default" -a "$hr_name" = "vault"
set hr_ns vault
end
set -l repo_ns ""; set -l repo_name ""
if test -n "$REPO"
set -l t (_split_ref "$REPO" flux-system); set repo_ns $t[1]; set repo_name $t[2]
set repo_ns $t[1]; set repo_name $t[2]
end
set -l sts_name "$STS"
if test "$VERBOSE" = "1"
_info "resolve: KS='$KS' → ks_ns='$ks_ns' ks_name='$ks_name'"
_info "resolve: HR='$HR' → hr_ns='$hr_ns' hr_name='$hr_name'"
_info "resolve: REPO='$REPO' → repo_ns='$repo_ns' repo_name='$repo_name'"
end
# ---- banner -----------------------------------------------------------------
set -l FLUX_VER (flux --version 2>/dev/null | string trim)
set -l KCTX (kubectl config current-context)
_info "► Flux: $FLUX_VER"
_info "► Kube context: $KCTX"
# Build nice single-line targets string (avoid odd spacing)
set -l ks_disp "(none)"
set -l hr_disp "(none)"
set -l repo_disp "(none)"
set -l sts_disp "(auto)"
if test -n "$ks_name"
set ks_disp "$ks_ns/$ks_name"
end
if test -n "$hr_name"
set hr_disp "$hr_ns/$hr_name"
end
if test -n "$repo_name"
set repo_disp "$repo_ns/$repo_name"
end
if test -n "$sts_name"
if test -n "$hr_ns"
set sts_disp "$hr_ns/$sts_name"
else
set sts_disp "$sts_name"
end
end
_info "► Targets: KS=$ks_disp HR=$hr_disp REPO=$repo_disp STS=$sts_disp delete-sts=$DELETE_STS verbose=$VERBOSE"
# Hard stop if nothing was requested to hammer
if test -z "$ks_name" -a -z "$hr_name"
_err "No --ks or --hr specified. Nothing to hammer."
exit 1
end
# -------- backup specs (besteffort) --------
set -l BACK_DIR (mktemp -d /tmp/flux-hammer.XXXXXX)
if test -n "$ks_name"
_dbg "+ kubectl -n $ks_ns get kustomization $ks_name -o yaml > $BACK_DIR/ks-$ks_ns-$ks_name.yaml"
kubectl -n "$ks_ns" get kustomization "$ks_name" -o yaml >"$BACK_DIR/ks-$ks_ns-$ks_name.yaml" 2>/dev/null
end
if test -n "$hr_name"
_dbg "+ kubectl -n $hr_ns get helmrelease $hr_name -o yaml > $BACK_DIR/hr-$hr_ns-$hr_name.yaml"
kubectl -n "$hr_ns" get helmrelease "$hr_name" -o yaml >"$BACK_DIR/hr-$hr_ns-$hr_name.yaml" 2>/dev/null
end
_ok "✔ Backed up current specs to $BACK_DIR"
# -------- suspend (so gates dont fight us) --------
if test $NO_KS -eq 0 -a -n "$ks_name"
_dbg "+ flux suspend kustomization $ks_name -n $ks_ns"
flux suspend kustomization $ks_name -n $ks_ns >/dev/null 2>&1
end
if test $NO_HR -eq 0 -a -n "$hr_name"
_dbg "+ flux suspend helmrelease $hr_name -n $hr_ns"
flux suspend helmrelease $hr_name -n $hr_ns >/dev/null 2>&1
end
_ok "✔ Suspended (besteffort)"
# -------- KS: disable ALL gates & demand reconcile --------
if test $NO_KS -eq 0 -a -n "$ks_name"
_info "► KS: gateoff patch (spec.force=true, spec.wait=false, spec.healthChecks=[])"
_dbg "+ kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p '{\"spec\":{\"force\":true,\"wait\":false,\"healthChecks\":[]}}'"
kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p \
'{"spec":{"force":true,"wait":false,"healthChecks":[]}}' >/dev/null 2>&1
set -l now (date --iso-8601=seconds)
_dbg "+ annotate KS requestedAt=$now forceAt=$now"
kubectl -n $ks_ns annotate kustomization $ks_name reconcile.fluxcd.io/requestedAt="$now" --overwrite >/dev/null 2>&1
kubectl -n $ks_ns annotate kustomization $ks_name reconcile.fluxcd.io/forceAt="$now" --overwrite >/dev/null 2>&1
end
# -------- HR: force upgrade & disable waits --------
if test $NO_HR -eq 0 -a -n "$hr_name"
_info "► HR: force patch (upgrade.force, install/upgrade.disableWait, retries=0)"
_dbg "+ kubectl -n $hr_ns patch helmrelease $hr_name ... (disableWait/force)"
kubectl -n $hr_ns patch helmrelease $hr_name --type=merge -p \
'{"spec":{"upgrade":{"force":true,"disableWait":true,"remediation":{"retries":0}},"install":{"disableWait":true}}}' >/dev/null 2>&1
set -l now (date --iso-8601=seconds)
_dbg "+ annotate HR requestedAt=$now forceAt=$now"
kubectl -n $hr_ns annotate helmrelease $hr_name reconcile.fluxcd.io/requestedAt="$now" --overwrite >/dev/null 2>&1
kubectl -n $hr_ns annotate helmrelease $hr_name reconcile.fluxcd.io/forceAt="$now" --overwrite >/dev/null 2>&1
end
# -------- reconcile sources first --------
_info "► Reconcile GitRepository flux-system/flux-system"
_dbg "+ flux reconcile source git flux-system -n flux-system --timeout=2m"
flux reconcile source git flux-system -n flux-system --timeout=2m >/dev/null 2>&1
if test -n "$repo_name"
_info "► Reconcile HelmRepository $repo_ns/$repo_name"
_dbg "+ flux reconcile source helm $repo_name -n $repo_ns --timeout=2m"
flux reconcile source helm $repo_name -n $repo_ns --timeout=2m >/dev/null 2>&1
end
# -------- resume KS and force it to pick newest artifact --------
set -l latest_rev (kubectl -n flux-system get gitrepository flux-system -o jsonpath='{.status.artifact.revision}' 2>/dev/null)
if test $NO_KS -eq 0 -a -n "$ks_name"
_info "► KS: resume + reconcile (with source)"
_dbg "+ flux resume kustomization $ks_name -n $ks_ns"
flux resume kustomization $ks_name -n $ks_ns >/dev/null 2>&1
_dbg "+ flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m"
flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m >/dev/null 2>&1
# Poll for newest revision (dont block on health)
set -l okrev 0
for i in (seq 1 30)
set -l ksrev (kubectl -n $ks_ns get kustomization $ks_name -o jsonpath='{.status.lastAppliedRevision}' 2>/dev/null)
_dbg " poll[$i]: latest_rev='$latest_rev' ksrev='$ksrev'"
if test -n "$ksrev"
if string match -rq -- "$latest_rev" "$ksrev"
set okrev 1; break
end
end
# extra nudge midway
if test $i -eq 12 -o $i -eq 24
set -l nonce (date +%s)
_dbg " nudge: set annotation hammer.nonce=$nonce and reconcile again"
kubectl -n $ks_ns patch kustomization $ks_name --type=merge -p \
'{"metadata":{"annotations":{"hammer.nonce":"'"$nonce"'"}}}' >/dev/null 2>&1
flux reconcile kustomization $ks_name -n $ks_ns --with-source --timeout=5m >/dev/null 2>&1
end
sleep 2
end
if test $okrev -eq 1
_ok "✔ KS now shows revision: $latest_rev"
else
_warn "⚠ KS has not reported $latest_rev; gates are OFF so the apply should still proceed."
end
end
# -------- optionally delete the StatefulSet (PVCs retained) --------
if test $NO_HR -eq 0 -a -n "$hr_name" -a $DELETE_STS -eq 1
set -l sts_to_del "$sts_name"
if test -z "$sts_to_del"
_dbg "autodetect STS: by instance/name/app labels"
set sts_to_del (kubectl -n $hr_ns get sts -l app.kubernetes.io/instance=$hr_name -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if test -z "$sts_to_del"
set sts_to_del (kubectl -n $hr_ns get sts $hr_name -o jsonpath='{.metadata.name}' 2>/dev/null)
end
if test -z "$sts_to_del"
set sts_to_del (kubectl -n $hr_ns get sts -l app.kubernetes.io/name=$hr_name -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
end
end
if test -n "$sts_to_del"
_warn "⚠ Deleting StatefulSet $hr_ns/$sts_to_del (PVCs retained)."
_dbg "+ kubectl -n $hr_ns scale sts $sts_to_del --replicas=0 --timeout=2m"
kubectl -n $hr_ns scale sts $sts_to_del --replicas=0 --timeout=2m >/dev/null 2>&1
_dbg "+ kubectl -n $hr_ns delete sts $sts_to_del --wait=true"
kubectl -n $hr_ns delete sts $sts_to_del --wait=true >/dev/null 2>&1
else
_warn "⚠ Could not detect a StatefulSet; use --sts <name> if needed."
end
end
# -------- resume HR and reconcile (try --force; fallback) --------
if test $NO_HR -eq 0 -a -n "$hr_name"
_info "► HR: resume + reconcile (with source, try --force)"
_dbg "+ flux resume helmrelease $hr_name -n $hr_ns"
flux resume helmrelease $hr_name -n $hr_ns >/dev/null 2>&1
_dbg "+ flux reconcile helmrelease $hr_name -n $hr_ns --with-source --force --timeout=10m"
flux reconcile helmrelease $hr_name -n $hr_ns --with-source --force --timeout=10m >/dev/null 2>&1
set -l rc $status
if test $rc -ne 0
_warn "HR reconcile with --force failed (likely unsupported in this build); retrying without --force."
_dbg "+ flux reconcile helmrelease $hr_name -n $hr_ns --with-source --timeout=10m"
flux reconcile helmrelease $hr_name -n $hr_ns --with-source --timeout=10m >/dev/null 2>&1
end
end
# -------- status summary --------
_info "► Status:"
if test $NO_KS -eq 0 -a -n "$ks_name"
flux get kustomizations $ks_name -n $ks_ns
end
if test $NO_HR -eq 0 -a -n "$hr_name"
flux get helmreleases -n $hr_ns | string match -r -- "NAME|$hr_name"
end
# -------- optional restore (turn gates back on from backup) --------
if test $RESTORE -eq 1
_info "► Restoring original specs"
if test -n "$ks_name" -a -s "$BACK_DIR/ks-$ks_ns-$ks_name.yaml"
_dbg "+ kubectl apply -f $BACK_DIR/ks-$ks_ns-$ks_name.yaml"
kubectl apply -f "$BACK_DIR/ks-$ks_ns-$ks_name.yaml" >/dev/null 2>&1
end
if test -n "$hr_name" -a -s "$BACK_DIR/hr-$hr_ns-$hr_name.yaml"
_dbg "+ kubectl apply -f $BACK_DIR/hr-$hr_ns-$hr_name.yaml"
kubectl apply -f "$BACK_DIR/hr-$hr_ns-$hr_name.yaml" >/dev/null 2>&1
end
_ok "✔ Restored"
else
_warn "⚠ Not restoring KS/HR patches; gates remain OFF until you revert them in Git or repatch."
end
_ok "✔ Done."