startup: add off-site break-glass unseal fallback
This commit is contained in:
parent
d2526edf0e
commit
437a6b62cd
@ -31,6 +31,7 @@ Key startup guards:
|
||||
- Startup can block until external probes pass (`startup.require_post_start_probes` + `startup.post_start_probes`).
|
||||
- Startup refreshes and can use a cached bootstrap manifest set under `/var/lib/hecate/bootstrap-cache` when local fallback paths fail.
|
||||
- Vault unseal now falls back to a local cached key file (`startup.vault_unseal_key_file`) if `vault-init` cannot be read yet.
|
||||
- Optional off-site break-glass retrieval can be configured with `startup.vault_unseal_breakglass_command` (for example, an SSH `cat` command to a remote key escrow host).
|
||||
|
||||
## Manual install on titan-db
|
||||
|
||||
@ -93,6 +94,10 @@ Recommended:
|
||||
|
||||
See `configs/hecate.example.yaml`.
|
||||
|
||||
Break-glass unseal fallback knobs:
|
||||
- `startup.vault_unseal_breakglass_command`: optional shell command that prints the unseal key to stdout.
|
||||
- `startup.vault_unseal_breakglass_timeout_seconds`: timeout for the command (default `15`).
|
||||
|
||||
UPS auto-shutdown trigger uses:
|
||||
- runtime threshold = `runtime_safety_factor * estimated_shutdown_budget`
|
||||
- default safety factor `1.25`
|
||||
|
||||
@ -70,6 +70,8 @@ startup:
|
||||
- https://scm.bstein.dev/user/login
|
||||
- https://metrics.bstein.dev/login
|
||||
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
||||
vault_unseal_breakglass_command: ""
|
||||
vault_unseal_breakglass_timeout_seconds: 15
|
||||
shutdown:
|
||||
default_budget_seconds: 1380
|
||||
history_min_samples: 3
|
||||
|
||||
@ -136,6 +136,8 @@ startup:
|
||||
- https://scm.bstein.dev/user/login
|
||||
- https://metrics.bstein.dev/login
|
||||
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
||||
vault_unseal_breakglass_command: "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -i /home/tethys/.ssh/id_ed25519 -p 1122 brad@99.183.132.163 'cat ~/.hecate-breakglass/vault-unseal.key'"
|
||||
vault_unseal_breakglass_timeout_seconds: 15
|
||||
shutdown:
|
||||
default_budget_seconds: 1380
|
||||
history_min_samples: 3
|
||||
|
||||
@ -136,6 +136,8 @@ startup:
|
||||
- https://scm.bstein.dev/user/login
|
||||
- https://metrics.bstein.dev/login
|
||||
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
||||
vault_unseal_breakglass_command: "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -i /home/atlas/.ssh/id_ed25519 -p 1122 brad@99.183.132.163 'cat ~/.hecate-breakglass/vault-unseal.key'"
|
||||
vault_unseal_breakglass_timeout_seconds: 15
|
||||
shutdown:
|
||||
default_budget_seconds: 1380
|
||||
history_min_samples: 3
|
||||
|
||||
@ -2086,7 +2086,33 @@ func (o *Orchestrator) vaultUnsealKey(ctx context.Context) (string, error) {
|
||||
o.log.Printf("warning: using cached vault unseal key from %s", o.cfg.Startup.VaultUnsealKeyFile)
|
||||
return fallbackKey, nil
|
||||
}
|
||||
return "", fmt.Errorf("%v; fallback %v", err, fileErr)
|
||||
breakglassKey, breakglassErr := o.readVaultUnsealKeyBreakglass(ctx)
|
||||
if breakglassErr == nil {
|
||||
o.log.Printf("warning: using break-glass vault unseal key command fallback")
|
||||
o.bestEffort("cache vault unseal key locally", func() error { return o.writeVaultUnsealKeyFile(breakglassKey) })
|
||||
return breakglassKey, nil
|
||||
}
|
||||
return "", fmt.Errorf("%v; fallback %v; break-glass %v", err, fileErr, breakglassErr)
|
||||
}
|
||||
|
||||
func (o *Orchestrator) readVaultUnsealKeyBreakglass(ctx context.Context) (string, error) {
|
||||
cmd := strings.TrimSpace(o.cfg.Startup.VaultUnsealBreakglassCommand)
|
||||
if cmd == "" {
|
||||
return "", fmt.Errorf("break-glass command not configured")
|
||||
}
|
||||
timeout := time.Duration(o.cfg.Startup.VaultUnsealBreakglassTimeout) * time.Second
|
||||
if timeout <= 0 {
|
||||
timeout = 15 * time.Second
|
||||
}
|
||||
out, err := o.runSensitive(ctx, timeout, "sh", "-lc", cmd)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("run break-glass command: %w", err)
|
||||
}
|
||||
key := strings.TrimSpace(out)
|
||||
if key == "" {
|
||||
return "", fmt.Errorf("break-glass command returned empty output")
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
|
||||
func (o *Orchestrator) writeVaultUnsealKeyFile(key string) error {
|
||||
|
||||
@ -34,26 +34,28 @@ type Config struct {
|
||||
}
|
||||
|
||||
type Startup struct {
|
||||
APIWaitSeconds int `yaml:"api_wait_seconds"`
|
||||
APIPollSeconds int `yaml:"api_poll_seconds"`
|
||||
RequireTimeSync bool `yaml:"require_time_sync"`
|
||||
TimeSyncWaitSeconds int `yaml:"time_sync_wait_seconds"`
|
||||
TimeSyncPollSeconds int `yaml:"time_sync_poll_seconds"`
|
||||
TimeSyncMode string `yaml:"time_sync_mode"`
|
||||
TimeSyncQuorum int `yaml:"time_sync_quorum"`
|
||||
ReconcileAccessOnBoot bool `yaml:"reconcile_access_on_boot"`
|
||||
AutoEtcdRestoreOnAPIFailure bool `yaml:"auto_etcd_restore_on_api_failure"`
|
||||
EtcdRestoreControlPlane string `yaml:"etcd_restore_control_plane"`
|
||||
RequireStorageReady bool `yaml:"require_storage_ready"`
|
||||
StorageReadyWaitSeconds int `yaml:"storage_ready_wait_seconds"`
|
||||
StorageReadyPollSeconds int `yaml:"storage_ready_poll_seconds"`
|
||||
StorageMinReadyNodes int `yaml:"storage_min_ready_nodes"`
|
||||
StorageCriticalPVCs []string `yaml:"storage_critical_pvcs"`
|
||||
RequirePostStartProbes bool `yaml:"require_post_start_probes"`
|
||||
PostStartProbeWaitSeconds int `yaml:"post_start_probe_wait_seconds"`
|
||||
PostStartProbePollSeconds int `yaml:"post_start_probe_poll_seconds"`
|
||||
PostStartProbes []string `yaml:"post_start_probes"`
|
||||
VaultUnsealKeyFile string `yaml:"vault_unseal_key_file"`
|
||||
APIWaitSeconds int `yaml:"api_wait_seconds"`
|
||||
APIPollSeconds int `yaml:"api_poll_seconds"`
|
||||
RequireTimeSync bool `yaml:"require_time_sync"`
|
||||
TimeSyncWaitSeconds int `yaml:"time_sync_wait_seconds"`
|
||||
TimeSyncPollSeconds int `yaml:"time_sync_poll_seconds"`
|
||||
TimeSyncMode string `yaml:"time_sync_mode"`
|
||||
TimeSyncQuorum int `yaml:"time_sync_quorum"`
|
||||
ReconcileAccessOnBoot bool `yaml:"reconcile_access_on_boot"`
|
||||
AutoEtcdRestoreOnAPIFailure bool `yaml:"auto_etcd_restore_on_api_failure"`
|
||||
EtcdRestoreControlPlane string `yaml:"etcd_restore_control_plane"`
|
||||
RequireStorageReady bool `yaml:"require_storage_ready"`
|
||||
StorageReadyWaitSeconds int `yaml:"storage_ready_wait_seconds"`
|
||||
StorageReadyPollSeconds int `yaml:"storage_ready_poll_seconds"`
|
||||
StorageMinReadyNodes int `yaml:"storage_min_ready_nodes"`
|
||||
StorageCriticalPVCs []string `yaml:"storage_critical_pvcs"`
|
||||
RequirePostStartProbes bool `yaml:"require_post_start_probes"`
|
||||
PostStartProbeWaitSeconds int `yaml:"post_start_probe_wait_seconds"`
|
||||
PostStartProbePollSeconds int `yaml:"post_start_probe_poll_seconds"`
|
||||
PostStartProbes []string `yaml:"post_start_probes"`
|
||||
VaultUnsealKeyFile string `yaml:"vault_unseal_key_file"`
|
||||
VaultUnsealBreakglassCommand string `yaml:"vault_unseal_breakglass_command"`
|
||||
VaultUnsealBreakglassTimeout int `yaml:"vault_unseal_breakglass_timeout_seconds"`
|
||||
}
|
||||
|
||||
type Shutdown struct {
|
||||
@ -323,7 +325,8 @@ func defaults() Config {
|
||||
"https://scm.bstein.dev/user/login",
|
||||
"https://metrics.bstein.dev/login",
|
||||
},
|
||||
VaultUnsealKeyFile: "/var/lib/hecate/vault-unseal.key",
|
||||
VaultUnsealKeyFile: "/var/lib/hecate/vault-unseal.key",
|
||||
VaultUnsealBreakglassTimeout: 15,
|
||||
},
|
||||
Shutdown: Shutdown{
|
||||
DefaultBudgetSeconds: 1380,
|
||||
@ -435,6 +438,9 @@ func (c *Config) applyDefaults() {
|
||||
if strings.TrimSpace(c.Startup.VaultUnsealKeyFile) == "" {
|
||||
c.Startup.VaultUnsealKeyFile = "/var/lib/hecate/vault-unseal.key"
|
||||
}
|
||||
if c.Startup.VaultUnsealBreakglassTimeout <= 0 {
|
||||
c.Startup.VaultUnsealBreakglassTimeout = 15
|
||||
}
|
||||
if c.SSHPort <= 0 {
|
||||
c.SSHPort = 2277
|
||||
}
|
||||
|
||||
@ -98,6 +98,9 @@ state:
|
||||
if cfg.Startup.VaultUnsealKeyFile == "" {
|
||||
t.Fatalf("expected startup vault unseal key file default to be set")
|
||||
}
|
||||
if cfg.Startup.VaultUnsealBreakglassTimeout <= 0 {
|
||||
t.Fatalf("expected startup break-glass timeout default > 0, got %d", cfg.Startup.VaultUnsealBreakglassTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRejectsInvalidTimeSyncMode(t *testing.T) {
|
||||
|
||||
@ -268,6 +268,13 @@ migrate_hecate_config() {
|
||||
changed=1
|
||||
fi
|
||||
fi
|
||||
if ! grep -Eq '^ vault_unseal_breakglass_timeout_seconds:[[:space:]]*[0-9]+' "${CONF_DIR}/hecate.yaml"; then
|
||||
if grep -Eq '^ vault_unseal_key_file:[[:space:]]*/var/lib/hecate/vault-unseal.key' "${CONF_DIR}/hecate.yaml"; then
|
||||
sed -Ei '/^ vault_unseal_key_file:[[:space:]]*\/var\/lib\/hecate\/vault-unseal.key$/a\ vault_unseal_breakglass_command: ""\n vault_unseal_breakglass_timeout_seconds: 15' "${CONF_DIR}/hecate.yaml"
|
||||
echo "[install] added startup break-glass fallback defaults"
|
||||
changed=1
|
||||
fi
|
||||
fi
|
||||
|
||||
local role
|
||||
role="$(read_hecate_role)"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user