shutdown: default to cluster-only and require explicit poweroff

This commit is contained in:
Brad Stein 2026-04-07 20:58:41 -03:00
parent 22c581b24d
commit 1f54cd3d46
8 changed files with 20 additions and 18 deletions

View File

@ -61,9 +61,10 @@ sudo systemctl start ananke-update.service
## Shutdown modes (explicit) ## Shutdown modes (explicit)
`ananke shutdown` now supports explicit mode selection: `ananke shutdown` now supports explicit mode selection:
- default behavior is `cluster-only` (host poweroff is not performed)
- `--mode config`: use config default (`shutdown.poweroff_enabled`) - `--mode config`: use config default (`shutdown.poweroff_enabled`)
- `--mode cluster-only`: stop cluster services only (no host poweroff) - `--mode cluster-only`: stop cluster services only (no host poweroff)
- `--mode poweroff`: include host poweroff path - `--mode poweroff`: include host poweroff path (explicit only)
This removes ambiguity during drills. This removes ambiguity during drills.

View File

@ -146,7 +146,7 @@ func runShutdown(logger *log.Logger, args []string) error {
execute := fs.Bool("execute", false, "Actually execute changes (default dry-run)") execute := fs.Bool("execute", false, "Actually execute changes (default dry-run)")
skipEtcd := fs.Bool("skip-etcd-snapshot", false, "Skip etcd snapshot") skipEtcd := fs.Bool("skip-etcd-snapshot", false, "Skip etcd snapshot")
skipDrain := fs.Bool("skip-drain", false, "Skip worker drain") skipDrain := fs.Bool("skip-drain", false, "Skip worker drain")
mode := fs.String("mode", "config", "Shutdown mode: config|cluster-only|poweroff") mode := fs.String("mode", "cluster-only", "Shutdown mode: config|cluster-only|poweroff")
reason := fs.String("reason", "manual-shutdown", "Shutdown reason for run history") reason := fs.String("reason", "manual-shutdown", "Shutdown reason for run history")
_ = fs.Parse(args) _ = fs.Parse(args)
@ -426,7 +426,7 @@ Commands:
Examples: Examples:
ananke startup --config /etc/ananke/ananke.yaml --execute --force-flux-branch main ananke startup --config /etc/ananke/ananke.yaml --execute --force-flux-branch main
ananke shutdown --config /etc/ananke/ananke.yaml --execute --reason "manual-maintenance" ananke shutdown --config /etc/ananke/ananke.yaml --execute --mode cluster-only --reason "manual-maintenance"
ananke etcd-restore --config /etc/ananke/ananke.yaml --execute ananke etcd-restore --config /etc/ananke/ananke.yaml --execute
ananke daemon --config /etc/ananke/ananke.yaml ananke daemon --config /etc/ananke/ananke.yaml
ananke status --config /etc/ananke/ananke.yaml ananke status --config /etc/ananke/ananke.yaml

View File

@ -130,11 +130,10 @@ shutdown:
drain_parallelism: 6 drain_parallelism: 6
scale_parallelism: 8 scale_parallelism: 8
ssh_parallelism: 8 ssh_parallelism: 8
poweroff_enabled: true poweroff_enabled: false
poweroff_delay_seconds: 25 poweroff_delay_seconds: 25
poweroff_local_host: true poweroff_local_host: false
extra_poweroff_hosts: extra_poweroff_hosts: []
- titan-db
ups: ups:
enabled: true enabled: true
provider: nut provider: nut

View File

@ -196,9 +196,9 @@ shutdown:
drain_parallelism: 6 drain_parallelism: 6
scale_parallelism: 8 scale_parallelism: 8
ssh_parallelism: 8 ssh_parallelism: 8
poweroff_enabled: true poweroff_enabled: false
poweroff_delay_seconds: 25 poweroff_delay_seconds: 25
poweroff_local_host: true poweroff_local_host: false
extra_poweroff_hosts: [] extra_poweroff_hosts: []
ups: ups:
enabled: true enabled: true

View File

@ -196,11 +196,10 @@ shutdown:
drain_parallelism: 6 drain_parallelism: 6
scale_parallelism: 8 scale_parallelism: 8
ssh_parallelism: 8 ssh_parallelism: 8
poweroff_enabled: true poweroff_enabled: false
poweroff_delay_seconds: 25 poweroff_delay_seconds: 25
poweroff_local_host: true poweroff_local_host: false
extra_poweroff_hosts: extra_poweroff_hosts: []
- titan-db
ups: ups:
enabled: true enabled: true
provider: nut provider: nut

View File

@ -469,8 +469,11 @@ func (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err
shutdownMode := strings.TrimSpace(opts.Mode) shutdownMode := strings.TrimSpace(opts.Mode)
poweroffEnabled := o.cfg.Shutdown.PoweroffEnabled poweroffEnabled := o.cfg.Shutdown.PoweroffEnabled
switch shutdownMode { switch shutdownMode {
case "", "config": case "":
// honor configured behavior // Safe default for internal triggers and older callers.
poweroffEnabled = false
case "config":
// Honor configured behavior only when explicitly requested.
case "cluster-only": case "cluster-only":
poweroffEnabled = false poweroffEnabled = false
case "poweroff": case "poweroff":

View File

@ -538,9 +538,9 @@ func defaults() Config {
DrainParallelism: 6, DrainParallelism: 6,
ScaleParallelism: 8, ScaleParallelism: 8,
SSHParallelism: 8, SSHParallelism: 8,
PoweroffEnabled: true, PoweroffEnabled: false,
PoweroffDelaySeconds: 25, PoweroffDelaySeconds: 25,
PoweroffLocalHost: true, PoweroffLocalHost: false,
}, },
UPS: UPS{ UPS: UPS{
Enabled: true, Enabled: true,

View File

@ -198,7 +198,7 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
runCtx, cancel := context.WithTimeout(ctx, timeout) runCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel() defer cancel()
remoteCmd := fmt.Sprintf("sudo /usr/local/bin/ananke shutdown --config %q --execute --reason %q", d.cfg.Coordination.ForwardShutdownConfig, reason) remoteCmd := fmt.Sprintf("sudo /usr/local/bin/ananke shutdown --config %q --execute --mode cluster-only --reason %q", d.cfg.Coordination.ForwardShutdownConfig, reason)
if d.cfg.Shutdown.EmergencySkipEtcd { if d.cfg.Shutdown.EmergencySkipEtcd {
remoteCmd += " --skip-etcd-snapshot" remoteCmd += " --skip-etcd-snapshot"
} }