package cluster import ( "context" "fmt" "strings" "time" "scm.bstein.dev/bstein/ananke/internal/state" ) // Shutdown runs one orchestration or CLI step. // Signature: (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err error). // Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve. func (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err error) { unlock, err := state.AcquireLock(o.cfg.State.LockPath) if err != nil { return err } defer unlock() if invErr := o.validateNodeInventory(); invErr != nil { return invErr } record := state.RunRecord{ ID: fmt.Sprintf("shutdown-%d", time.Now().UnixNano()), Action: "shutdown", Reason: opts.Reason, DryRun: o.runner.DryRun, StartedAt: time.Now().UTC(), } defer o.finalizeRecord(&record, &err) if !o.runner.DryRun { if writeErr := state.MustWriteIntent(o.cfg.State.IntentPath, state.IntentShuttingDown, opts.Reason, "shutdown"); writeErr != nil { return fmt.Errorf("set shutdown intent: %w", writeErr) } defer func() { final := state.IntentShuttingDown if err == nil { final = state.IntentShutdownComplete } if writeErr := state.MustWriteIntent(o.cfg.State.IntentPath, final, opts.Reason, "shutdown"); writeErr != nil { o.log.Printf("warning: write shutdown completion intent failed: %v", writeErr) } }() } workers, err := o.effectiveWorkers(ctx) if err != nil { return err } o.log.Printf("shutdown control-planes=%s workers=%s", strings.Join(o.cfg.ControlPlanes, ","), strings.Join(workers, ",")) o.reportFluxSource(ctx, "") skipEtcd := opts.SkipEtcdSnapshot || o.cfg.Shutdown.SkipEtcdSnapshot if !skipEtcd { o.bestEffort("etcd snapshot", func() error { return o.takeEtcdSnapshot(ctx, o.cfg.ControlPlanes[0]) }) } o.bestEffort("suspend flux", func() error { return o.patchFluxSuspendAll(ctx, true) }) o.bestEffort("scale down apps", func() error { return o.scaleDownApps(ctx) }) skipDrain := opts.SkipDrain || o.cfg.Shutdown.SkipDrain if !skipDrain { o.bestEffort("drain workers", func() error { return o.drainWorkers(ctx, workers) }) } shutdownMode := strings.TrimSpace(opts.Mode) effectiveMode, modeErr := normalizeShutdownMode(shutdownMode) if modeErr != nil { return modeErr } o.log.Printf("shutdown execution mode=%s (requested=%q)", effectiveMode, shutdownMode) o.stopWorkers(ctx, workers) o.stopControlPlanes(ctx, o.cfg.ControlPlanes) o.log.Printf("shutdown flow complete") return nil }