ananke/internal/cluster/orchestrator_shutdown.go
2026-06-19 15:43:44 -03:00

83 lines
2.5 KiB
Go

package cluster
import (
"context"
"fmt"
"strings"
"time"
"scm.bstein.dev/bstein/ananke/internal/state"
)
// Shutdown runs one orchestration or CLI step.
// Signature: (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err error).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err error) {
unlock, err := state.AcquireLock(o.cfg.State.LockPath)
if err != nil {
return err
}
defer unlock()
if invErr := o.validateNodeInventory(); invErr != nil {
return invErr
}
record := state.RunRecord{
ID: fmt.Sprintf("shutdown-%d", time.Now().UnixNano()),
Action: "shutdown",
Reason: opts.Reason,
DryRun: o.runner.DryRun,
StartedAt: time.Now().UTC(),
}
defer o.finalizeRecord(&record, &err)
if !o.runner.DryRun {
if writeErr := state.MustWriteIntent(o.cfg.State.IntentPath, state.IntentShuttingDown, opts.Reason, "shutdown"); writeErr != nil {
return fmt.Errorf("set shutdown intent: %w", writeErr)
}
defer func() {
final := state.IntentShuttingDown
if err == nil {
final = state.IntentShutdownComplete
}
if writeErr := state.MustWriteIntent(o.cfg.State.IntentPath, final, opts.Reason, "shutdown"); writeErr != nil {
o.log.Printf("warning: write shutdown completion intent failed: %v", writeErr)
}
}()
}
workers, err := o.effectiveWorkers(ctx)
if err != nil {
return err
}
o.log.Printf("shutdown control-planes=%s workers=%s", strings.Join(o.cfg.ControlPlanes, ","), strings.Join(workers, ","))
o.reportFluxSource(ctx, "")
skipEtcd := opts.SkipEtcdSnapshot || o.cfg.Shutdown.SkipEtcdSnapshot
if !skipEtcd {
o.bestEffort("etcd snapshot", func() error {
return o.takeEtcdSnapshot(ctx, o.cfg.ControlPlanes[0])
})
}
o.bestEffort("suspend flux", func() error { return o.patchFluxSuspendAll(ctx, true) })
o.bestEffort("scale down apps", func() error { return o.scaleDownApps(ctx) })
skipDrain := opts.SkipDrain || o.cfg.Shutdown.SkipDrain
if !skipDrain {
o.bestEffort("drain workers", func() error { return o.drainWorkers(ctx, workers) })
}
shutdownMode := strings.TrimSpace(opts.Mode)
effectiveMode, modeErr := normalizeShutdownMode(shutdownMode)
if modeErr != nil {
return modeErr
}
o.log.Printf("shutdown execution mode=%s (requested=%q)", effectiveMode, shutdownMode)
o.stopWorkers(ctx, workers)
o.stopControlPlanes(ctx, o.cfg.ControlPlanes)
o.log.Printf("shutdown flow complete")
return nil
}