From 1f54cd3d46b085b0f90dc923a5532e92dac3a577 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 7 Apr 2026 20:58:41 -0300 Subject: [PATCH] shutdown: default to cluster-only and require explicit poweroff --- README.md | 3 ++- cmd/ananke/main.go | 4 ++-- configs/ananke.example.yaml | 7 +++---- configs/ananke.tethys.yaml | 4 ++-- configs/ananke.titan-db.yaml | 7 +++---- internal/cluster/orchestrator.go | 7 +++++-- internal/config/config.go | 4 ++-- internal/service/daemon.go | 2 +- 8 files changed, 20 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4ab8923..e873965 100644 --- a/README.md +++ b/README.md @@ -61,9 +61,10 @@ sudo systemctl start ananke-update.service ## Shutdown modes (explicit) `ananke shutdown` now supports explicit mode selection: +- default behavior is `cluster-only` (host poweroff is not performed) - `--mode config`: use config default (`shutdown.poweroff_enabled`) - `--mode cluster-only`: stop cluster services only (no host poweroff) -- `--mode poweroff`: include host poweroff path +- `--mode poweroff`: include host poweroff path (explicit only) This removes ambiguity during drills. diff --git a/cmd/ananke/main.go b/cmd/ananke/main.go index fc079a3..e5430e3 100644 --- a/cmd/ananke/main.go +++ b/cmd/ananke/main.go @@ -146,7 +146,7 @@ func runShutdown(logger *log.Logger, args []string) error { execute := fs.Bool("execute", false, "Actually execute changes (default dry-run)") skipEtcd := fs.Bool("skip-etcd-snapshot", false, "Skip etcd snapshot") skipDrain := fs.Bool("skip-drain", false, "Skip worker drain") - mode := fs.String("mode", "config", "Shutdown mode: config|cluster-only|poweroff") + mode := fs.String("mode", "cluster-only", "Shutdown mode: config|cluster-only|poweroff") reason := fs.String("reason", "manual-shutdown", "Shutdown reason for run history") _ = fs.Parse(args) @@ -426,7 +426,7 @@ Commands: Examples: ananke startup --config /etc/ananke/ananke.yaml --execute --force-flux-branch main - ananke shutdown --config /etc/ananke/ananke.yaml --execute --reason "manual-maintenance" + ananke shutdown --config /etc/ananke/ananke.yaml --execute --mode cluster-only --reason "manual-maintenance" ananke etcd-restore --config /etc/ananke/ananke.yaml --execute ananke daemon --config /etc/ananke/ananke.yaml ananke status --config /etc/ananke/ananke.yaml diff --git a/configs/ananke.example.yaml b/configs/ananke.example.yaml index fbfe2e5..c708745 100644 --- a/configs/ananke.example.yaml +++ b/configs/ananke.example.yaml @@ -130,11 +130,10 @@ shutdown: drain_parallelism: 6 scale_parallelism: 8 ssh_parallelism: 8 - poweroff_enabled: true + poweroff_enabled: false poweroff_delay_seconds: 25 - poweroff_local_host: true - extra_poweroff_hosts: - - titan-db + poweroff_local_host: false + extra_poweroff_hosts: [] ups: enabled: true provider: nut diff --git a/configs/ananke.tethys.yaml b/configs/ananke.tethys.yaml index d9cd66d..adb1863 100644 --- a/configs/ananke.tethys.yaml +++ b/configs/ananke.tethys.yaml @@ -196,9 +196,9 @@ shutdown: drain_parallelism: 6 scale_parallelism: 8 ssh_parallelism: 8 - poweroff_enabled: true + poweroff_enabled: false poweroff_delay_seconds: 25 - poweroff_local_host: true + poweroff_local_host: false extra_poweroff_hosts: [] ups: enabled: true diff --git a/configs/ananke.titan-db.yaml b/configs/ananke.titan-db.yaml index 160df6e..12a2e60 100644 --- a/configs/ananke.titan-db.yaml +++ b/configs/ananke.titan-db.yaml @@ -196,11 +196,10 @@ shutdown: drain_parallelism: 6 scale_parallelism: 8 ssh_parallelism: 8 - poweroff_enabled: true + poweroff_enabled: false poweroff_delay_seconds: 25 - poweroff_local_host: true - extra_poweroff_hosts: - - titan-db + poweroff_local_host: false + extra_poweroff_hosts: [] ups: enabled: true provider: nut diff --git a/internal/cluster/orchestrator.go b/internal/cluster/orchestrator.go index eda3103..c50633c 100644 --- a/internal/cluster/orchestrator.go +++ b/internal/cluster/orchestrator.go @@ -469,8 +469,11 @@ func (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err shutdownMode := strings.TrimSpace(opts.Mode) poweroffEnabled := o.cfg.Shutdown.PoweroffEnabled switch shutdownMode { - case "", "config": - // honor configured behavior + case "": + // Safe default for internal triggers and older callers. + poweroffEnabled = false + case "config": + // Honor configured behavior only when explicitly requested. case "cluster-only": poweroffEnabled = false case "poweroff": diff --git a/internal/config/config.go b/internal/config/config.go index fafd817..a347d6f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -538,9 +538,9 @@ func defaults() Config { DrainParallelism: 6, ScaleParallelism: 8, SSHParallelism: 8, - PoweroffEnabled: true, + PoweroffEnabled: false, PoweroffDelaySeconds: 25, - PoweroffLocalHost: true, + PoweroffLocalHost: false, }, UPS: UPS{ Enabled: true, diff --git a/internal/service/daemon.go b/internal/service/daemon.go index 1362599..e224061 100644 --- a/internal/service/daemon.go +++ b/internal/service/daemon.go @@ -198,7 +198,7 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error { runCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - remoteCmd := fmt.Sprintf("sudo /usr/local/bin/ananke shutdown --config %q --execute --reason %q", d.cfg.Coordination.ForwardShutdownConfig, reason) + remoteCmd := fmt.Sprintf("sudo /usr/local/bin/ananke shutdown --config %q --execute --mode cluster-only --reason %q", d.cfg.Coordination.ForwardShutdownConfig, reason) if d.cfg.Shutdown.EmergencySkipEtcd { remoteCmd += " --skip-etcd-snapshot" }