shutdown: default to cluster-only and require explicit poweroff

This commit is contained in:
Brad Stein 2026-04-07 20:58:41 -03:00
parent 22c581b24d
commit 1f54cd3d46
8 changed files with 20 additions and 18 deletions

View File

@ -61,9 +61,10 @@ sudo systemctl start ananke-update.service
## Shutdown modes (explicit)
`ananke shutdown` now supports explicit mode selection:
- default behavior is `cluster-only` (host poweroff is not performed)
- `--mode config`: use config default (`shutdown.poweroff_enabled`)
- `--mode cluster-only`: stop cluster services only (no host poweroff)
- `--mode poweroff`: include host poweroff path
- `--mode poweroff`: include host poweroff path (explicit only)
This removes ambiguity during drills.

View File

@ -146,7 +146,7 @@ func runShutdown(logger *log.Logger, args []string) error {
execute := fs.Bool("execute", false, "Actually execute changes (default dry-run)")
skipEtcd := fs.Bool("skip-etcd-snapshot", false, "Skip etcd snapshot")
skipDrain := fs.Bool("skip-drain", false, "Skip worker drain")
mode := fs.String("mode", "config", "Shutdown mode: config|cluster-only|poweroff")
mode := fs.String("mode", "cluster-only", "Shutdown mode: config|cluster-only|poweroff")
reason := fs.String("reason", "manual-shutdown", "Shutdown reason for run history")
_ = fs.Parse(args)
@ -426,7 +426,7 @@ Commands:
Examples:
ananke startup --config /etc/ananke/ananke.yaml --execute --force-flux-branch main
ananke shutdown --config /etc/ananke/ananke.yaml --execute --reason "manual-maintenance"
ananke shutdown --config /etc/ananke/ananke.yaml --execute --mode cluster-only --reason "manual-maintenance"
ananke etcd-restore --config /etc/ananke/ananke.yaml --execute
ananke daemon --config /etc/ananke/ananke.yaml
ananke status --config /etc/ananke/ananke.yaml

View File

@ -130,11 +130,10 @@ shutdown:
drain_parallelism: 6
scale_parallelism: 8
ssh_parallelism: 8
poweroff_enabled: true
poweroff_enabled: false
poweroff_delay_seconds: 25
poweroff_local_host: true
extra_poweroff_hosts:
- titan-db
poweroff_local_host: false
extra_poweroff_hosts: []
ups:
enabled: true
provider: nut

View File

@ -196,9 +196,9 @@ shutdown:
drain_parallelism: 6
scale_parallelism: 8
ssh_parallelism: 8
poweroff_enabled: true
poweroff_enabled: false
poweroff_delay_seconds: 25
poweroff_local_host: true
poweroff_local_host: false
extra_poweroff_hosts: []
ups:
enabled: true

View File

@ -196,11 +196,10 @@ shutdown:
drain_parallelism: 6
scale_parallelism: 8
ssh_parallelism: 8
poweroff_enabled: true
poweroff_enabled: false
poweroff_delay_seconds: 25
poweroff_local_host: true
extra_poweroff_hosts:
- titan-db
poweroff_local_host: false
extra_poweroff_hosts: []
ups:
enabled: true
provider: nut

View File

@ -469,8 +469,11 @@ func (o *Orchestrator) Shutdown(ctx context.Context, opts ShutdownOptions) (err
shutdownMode := strings.TrimSpace(opts.Mode)
poweroffEnabled := o.cfg.Shutdown.PoweroffEnabled
switch shutdownMode {
case "", "config":
// honor configured behavior
case "":
// Safe default for internal triggers and older callers.
poweroffEnabled = false
case "config":
// Honor configured behavior only when explicitly requested.
case "cluster-only":
poweroffEnabled = false
case "poweroff":

View File

@ -538,9 +538,9 @@ func defaults() Config {
DrainParallelism: 6,
ScaleParallelism: 8,
SSHParallelism: 8,
PoweroffEnabled: true,
PoweroffEnabled: false,
PoweroffDelaySeconds: 25,
PoweroffLocalHost: true,
PoweroffLocalHost: false,
},
UPS: UPS{
Enabled: true,

View File

@ -198,7 +198,7 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
runCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
remoteCmd := fmt.Sprintf("sudo /usr/local/bin/ananke shutdown --config %q --execute --reason %q", d.cfg.Coordination.ForwardShutdownConfig, reason)
remoteCmd := fmt.Sprintf("sudo /usr/local/bin/ananke shutdown --config %q --execute --mode cluster-only --reason %q", d.cfg.Coordination.ForwardShutdownConfig, reason)
if d.cfg.Shutdown.EmergencySkipEtcd {
remoteCmd += " --skip-etcd-snapshot"
}