startup: add off-site break-glass unseal fallback
This commit is contained in:
parent
d2526edf0e
commit
437a6b62cd
@ -31,6 +31,7 @@ Key startup guards:
|
|||||||
- Startup can block until external probes pass (`startup.require_post_start_probes` + `startup.post_start_probes`).
|
- Startup can block until external probes pass (`startup.require_post_start_probes` + `startup.post_start_probes`).
|
||||||
- Startup refreshes and can use a cached bootstrap manifest set under `/var/lib/hecate/bootstrap-cache` when local fallback paths fail.
|
- Startup refreshes and can use a cached bootstrap manifest set under `/var/lib/hecate/bootstrap-cache` when local fallback paths fail.
|
||||||
- Vault unseal now falls back to a local cached key file (`startup.vault_unseal_key_file`) if `vault-init` cannot be read yet.
|
- Vault unseal now falls back to a local cached key file (`startup.vault_unseal_key_file`) if `vault-init` cannot be read yet.
|
||||||
|
- Optional off-site break-glass retrieval can be configured with `startup.vault_unseal_breakglass_command` (for example, an SSH `cat` command to a remote key escrow host).
|
||||||
|
|
||||||
## Manual install on titan-db
|
## Manual install on titan-db
|
||||||
|
|
||||||
@ -93,6 +94,10 @@ Recommended:
|
|||||||
|
|
||||||
See `configs/hecate.example.yaml`.
|
See `configs/hecate.example.yaml`.
|
||||||
|
|
||||||
|
Break-glass unseal fallback knobs:
|
||||||
|
- `startup.vault_unseal_breakglass_command`: optional shell command that prints the unseal key to stdout.
|
||||||
|
- `startup.vault_unseal_breakglass_timeout_seconds`: timeout for the command (default `15`).
|
||||||
|
|
||||||
UPS auto-shutdown trigger uses:
|
UPS auto-shutdown trigger uses:
|
||||||
- runtime threshold = `runtime_safety_factor * estimated_shutdown_budget`
|
- runtime threshold = `runtime_safety_factor * estimated_shutdown_budget`
|
||||||
- default safety factor `1.25`
|
- default safety factor `1.25`
|
||||||
|
|||||||
@ -70,6 +70,8 @@ startup:
|
|||||||
- https://scm.bstein.dev/user/login
|
- https://scm.bstein.dev/user/login
|
||||||
- https://metrics.bstein.dev/login
|
- https://metrics.bstein.dev/login
|
||||||
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
||||||
|
vault_unseal_breakglass_command: ""
|
||||||
|
vault_unseal_breakglass_timeout_seconds: 15
|
||||||
shutdown:
|
shutdown:
|
||||||
default_budget_seconds: 1380
|
default_budget_seconds: 1380
|
||||||
history_min_samples: 3
|
history_min_samples: 3
|
||||||
|
|||||||
@ -136,6 +136,8 @@ startup:
|
|||||||
- https://scm.bstein.dev/user/login
|
- https://scm.bstein.dev/user/login
|
||||||
- https://metrics.bstein.dev/login
|
- https://metrics.bstein.dev/login
|
||||||
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
||||||
|
vault_unseal_breakglass_command: "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -i /home/tethys/.ssh/id_ed25519 -p 1122 brad@99.183.132.163 'cat ~/.hecate-breakglass/vault-unseal.key'"
|
||||||
|
vault_unseal_breakglass_timeout_seconds: 15
|
||||||
shutdown:
|
shutdown:
|
||||||
default_budget_seconds: 1380
|
default_budget_seconds: 1380
|
||||||
history_min_samples: 3
|
history_min_samples: 3
|
||||||
|
|||||||
@ -136,6 +136,8 @@ startup:
|
|||||||
- https://scm.bstein.dev/user/login
|
- https://scm.bstein.dev/user/login
|
||||||
- https://metrics.bstein.dev/login
|
- https://metrics.bstein.dev/login
|
||||||
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
vault_unseal_key_file: /var/lib/hecate/vault-unseal.key
|
||||||
|
vault_unseal_breakglass_command: "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -i /home/atlas/.ssh/id_ed25519 -p 1122 brad@99.183.132.163 'cat ~/.hecate-breakglass/vault-unseal.key'"
|
||||||
|
vault_unseal_breakglass_timeout_seconds: 15
|
||||||
shutdown:
|
shutdown:
|
||||||
default_budget_seconds: 1380
|
default_budget_seconds: 1380
|
||||||
history_min_samples: 3
|
history_min_samples: 3
|
||||||
|
|||||||
@ -2086,7 +2086,33 @@ func (o *Orchestrator) vaultUnsealKey(ctx context.Context) (string, error) {
|
|||||||
o.log.Printf("warning: using cached vault unseal key from %s", o.cfg.Startup.VaultUnsealKeyFile)
|
o.log.Printf("warning: using cached vault unseal key from %s", o.cfg.Startup.VaultUnsealKeyFile)
|
||||||
return fallbackKey, nil
|
return fallbackKey, nil
|
||||||
}
|
}
|
||||||
return "", fmt.Errorf("%v; fallback %v", err, fileErr)
|
breakglassKey, breakglassErr := o.readVaultUnsealKeyBreakglass(ctx)
|
||||||
|
if breakglassErr == nil {
|
||||||
|
o.log.Printf("warning: using break-glass vault unseal key command fallback")
|
||||||
|
o.bestEffort("cache vault unseal key locally", func() error { return o.writeVaultUnsealKeyFile(breakglassKey) })
|
||||||
|
return breakglassKey, nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("%v; fallback %v; break-glass %v", err, fileErr, breakglassErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Orchestrator) readVaultUnsealKeyBreakglass(ctx context.Context) (string, error) {
|
||||||
|
cmd := strings.TrimSpace(o.cfg.Startup.VaultUnsealBreakglassCommand)
|
||||||
|
if cmd == "" {
|
||||||
|
return "", fmt.Errorf("break-glass command not configured")
|
||||||
|
}
|
||||||
|
timeout := time.Duration(o.cfg.Startup.VaultUnsealBreakglassTimeout) * time.Second
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = 15 * time.Second
|
||||||
|
}
|
||||||
|
out, err := o.runSensitive(ctx, timeout, "sh", "-lc", cmd)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("run break-glass command: %w", err)
|
||||||
|
}
|
||||||
|
key := strings.TrimSpace(out)
|
||||||
|
if key == "" {
|
||||||
|
return "", fmt.Errorf("break-glass command returned empty output")
|
||||||
|
}
|
||||||
|
return key, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *Orchestrator) writeVaultUnsealKeyFile(key string) error {
|
func (o *Orchestrator) writeVaultUnsealKeyFile(key string) error {
|
||||||
|
|||||||
@ -34,26 +34,28 @@ type Config struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Startup struct {
|
type Startup struct {
|
||||||
APIWaitSeconds int `yaml:"api_wait_seconds"`
|
APIWaitSeconds int `yaml:"api_wait_seconds"`
|
||||||
APIPollSeconds int `yaml:"api_poll_seconds"`
|
APIPollSeconds int `yaml:"api_poll_seconds"`
|
||||||
RequireTimeSync bool `yaml:"require_time_sync"`
|
RequireTimeSync bool `yaml:"require_time_sync"`
|
||||||
TimeSyncWaitSeconds int `yaml:"time_sync_wait_seconds"`
|
TimeSyncWaitSeconds int `yaml:"time_sync_wait_seconds"`
|
||||||
TimeSyncPollSeconds int `yaml:"time_sync_poll_seconds"`
|
TimeSyncPollSeconds int `yaml:"time_sync_poll_seconds"`
|
||||||
TimeSyncMode string `yaml:"time_sync_mode"`
|
TimeSyncMode string `yaml:"time_sync_mode"`
|
||||||
TimeSyncQuorum int `yaml:"time_sync_quorum"`
|
TimeSyncQuorum int `yaml:"time_sync_quorum"`
|
||||||
ReconcileAccessOnBoot bool `yaml:"reconcile_access_on_boot"`
|
ReconcileAccessOnBoot bool `yaml:"reconcile_access_on_boot"`
|
||||||
AutoEtcdRestoreOnAPIFailure bool `yaml:"auto_etcd_restore_on_api_failure"`
|
AutoEtcdRestoreOnAPIFailure bool `yaml:"auto_etcd_restore_on_api_failure"`
|
||||||
EtcdRestoreControlPlane string `yaml:"etcd_restore_control_plane"`
|
EtcdRestoreControlPlane string `yaml:"etcd_restore_control_plane"`
|
||||||
RequireStorageReady bool `yaml:"require_storage_ready"`
|
RequireStorageReady bool `yaml:"require_storage_ready"`
|
||||||
StorageReadyWaitSeconds int `yaml:"storage_ready_wait_seconds"`
|
StorageReadyWaitSeconds int `yaml:"storage_ready_wait_seconds"`
|
||||||
StorageReadyPollSeconds int `yaml:"storage_ready_poll_seconds"`
|
StorageReadyPollSeconds int `yaml:"storage_ready_poll_seconds"`
|
||||||
StorageMinReadyNodes int `yaml:"storage_min_ready_nodes"`
|
StorageMinReadyNodes int `yaml:"storage_min_ready_nodes"`
|
||||||
StorageCriticalPVCs []string `yaml:"storage_critical_pvcs"`
|
StorageCriticalPVCs []string `yaml:"storage_critical_pvcs"`
|
||||||
RequirePostStartProbes bool `yaml:"require_post_start_probes"`
|
RequirePostStartProbes bool `yaml:"require_post_start_probes"`
|
||||||
PostStartProbeWaitSeconds int `yaml:"post_start_probe_wait_seconds"`
|
PostStartProbeWaitSeconds int `yaml:"post_start_probe_wait_seconds"`
|
||||||
PostStartProbePollSeconds int `yaml:"post_start_probe_poll_seconds"`
|
PostStartProbePollSeconds int `yaml:"post_start_probe_poll_seconds"`
|
||||||
PostStartProbes []string `yaml:"post_start_probes"`
|
PostStartProbes []string `yaml:"post_start_probes"`
|
||||||
VaultUnsealKeyFile string `yaml:"vault_unseal_key_file"`
|
VaultUnsealKeyFile string `yaml:"vault_unseal_key_file"`
|
||||||
|
VaultUnsealBreakglassCommand string `yaml:"vault_unseal_breakglass_command"`
|
||||||
|
VaultUnsealBreakglassTimeout int `yaml:"vault_unseal_breakglass_timeout_seconds"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Shutdown struct {
|
type Shutdown struct {
|
||||||
@ -323,7 +325,8 @@ func defaults() Config {
|
|||||||
"https://scm.bstein.dev/user/login",
|
"https://scm.bstein.dev/user/login",
|
||||||
"https://metrics.bstein.dev/login",
|
"https://metrics.bstein.dev/login",
|
||||||
},
|
},
|
||||||
VaultUnsealKeyFile: "/var/lib/hecate/vault-unseal.key",
|
VaultUnsealKeyFile: "/var/lib/hecate/vault-unseal.key",
|
||||||
|
VaultUnsealBreakglassTimeout: 15,
|
||||||
},
|
},
|
||||||
Shutdown: Shutdown{
|
Shutdown: Shutdown{
|
||||||
DefaultBudgetSeconds: 1380,
|
DefaultBudgetSeconds: 1380,
|
||||||
@ -435,6 +438,9 @@ func (c *Config) applyDefaults() {
|
|||||||
if strings.TrimSpace(c.Startup.VaultUnsealKeyFile) == "" {
|
if strings.TrimSpace(c.Startup.VaultUnsealKeyFile) == "" {
|
||||||
c.Startup.VaultUnsealKeyFile = "/var/lib/hecate/vault-unseal.key"
|
c.Startup.VaultUnsealKeyFile = "/var/lib/hecate/vault-unseal.key"
|
||||||
}
|
}
|
||||||
|
if c.Startup.VaultUnsealBreakglassTimeout <= 0 {
|
||||||
|
c.Startup.VaultUnsealBreakglassTimeout = 15
|
||||||
|
}
|
||||||
if c.SSHPort <= 0 {
|
if c.SSHPort <= 0 {
|
||||||
c.SSHPort = 2277
|
c.SSHPort = 2277
|
||||||
}
|
}
|
||||||
|
|||||||
@ -98,6 +98,9 @@ state:
|
|||||||
if cfg.Startup.VaultUnsealKeyFile == "" {
|
if cfg.Startup.VaultUnsealKeyFile == "" {
|
||||||
t.Fatalf("expected startup vault unseal key file default to be set")
|
t.Fatalf("expected startup vault unseal key file default to be set")
|
||||||
}
|
}
|
||||||
|
if cfg.Startup.VaultUnsealBreakglassTimeout <= 0 {
|
||||||
|
t.Fatalf("expected startup break-glass timeout default > 0, got %d", cfg.Startup.VaultUnsealBreakglassTimeout)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestValidateRejectsInvalidTimeSyncMode(t *testing.T) {
|
func TestValidateRejectsInvalidTimeSyncMode(t *testing.T) {
|
||||||
|
|||||||
@ -268,6 +268,13 @@ migrate_hecate_config() {
|
|||||||
changed=1
|
changed=1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
if ! grep -Eq '^ vault_unseal_breakglass_timeout_seconds:[[:space:]]*[0-9]+' "${CONF_DIR}/hecate.yaml"; then
|
||||||
|
if grep -Eq '^ vault_unseal_key_file:[[:space:]]*/var/lib/hecate/vault-unseal.key' "${CONF_DIR}/hecate.yaml"; then
|
||||||
|
sed -Ei '/^ vault_unseal_key_file:[[:space:]]*\/var\/lib\/hecate\/vault-unseal.key$/a\ vault_unseal_breakglass_command: ""\n vault_unseal_breakglass_timeout_seconds: 15' "${CONF_DIR}/hecate.yaml"
|
||||||
|
echo "[install] added startup break-glass fallback defaults"
|
||||||
|
changed=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
local role
|
local role
|
||||||
role="$(read_hecate_role)"
|
role="$(read_hecate_role)"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user