startup: unblock on harbor during recovery and add controlled-cycle drill
This commit is contained in:
parent
11a2f66e41
commit
c8c3304797
@ -99,6 +99,7 @@ See `configs/hecate.example.yaml`.
|
||||
Break-glass unseal fallback knobs:
|
||||
- `startup.vault_unseal_breakglass_command`: optional shell command that prints the unseal key to stdout.
|
||||
- `startup.vault_unseal_breakglass_timeout_seconds`: timeout for the command (default `15`).
|
||||
- `startup.shutdown_cooldown_seconds`: cooldown window after shutdown completion before startup proceeds (default `45`).
|
||||
|
||||
UPS auto-shutdown trigger uses:
|
||||
- runtime threshold = `runtime_safety_factor * estimated_shutdown_budget`
|
||||
@ -119,6 +120,7 @@ Power metrics:
|
||||
|
||||
- Default behavior for `startup` and `shutdown` is dry-run unless `--execute` is set.
|
||||
- Hecate tracks intent in `/var/lib/hecate/intent.json` (`normal`, `startup_in_progress`, `shutting_down`, `shutdown_complete`) to avoid startup/shutdown fighting each other.
|
||||
- Startup now waits out the recent-shutdown cooldown window instead of failing immediately when shutdown completed moments ago.
|
||||
- In multi-instance setups, set `coordination.peer_hosts` on each host (for example `titan-db` <-> `titan-24`) so startup guards account for remote intent too.
|
||||
- `hecate-bootstrap.service` is enabled to run at host boot and perform staged startup automatically.
|
||||
- `HECATE_ENABLE_BOOTSTRAP=1` forces bootstrap on, `HECATE_ENABLE_BOOTSTRAP=0` forces it off, and `auto` enables by default.
|
||||
@ -144,5 +146,6 @@ Hecate includes scripted disruptive drills that intentionally break critical ser
|
||||
- `scripts/hecate-drills.sh run flux-gitea-deadlock --execute`
|
||||
- `scripts/hecate-drills.sh run foundation-recovery --execute`
|
||||
- `scripts/hecate-drills.sh run reconciliation-resume --execute`
|
||||
- `scripts/hecate-drills.sh run controlled-cycle --execute` (uses `HECATE_DRILL_SHUTDOWN_CONFIG`, defaults to `/tmp/hecate-drill-no-poweroff.yaml`)
|
||||
|
||||
These drills are intentionally **not** part of regular `go test ./...`.
|
||||
|
||||
@ -520,7 +520,7 @@ func coordinatorAllowsPeerFallbackStartup(ctx context.Context, cfg config.Config
|
||||
if remoteIntent.UpdatedAt.IsZero() {
|
||||
return false, "coordinator reported shutdown_complete with unknown age", nil
|
||||
}
|
||||
if intentAge <= 45*time.Second {
|
||||
if intentAge <= startupShutdownCooldown(cfg) {
|
||||
return false, fmt.Sprintf("coordinator recently completed shutdown (%s ago)", intentAge.Round(time.Second)), nil
|
||||
}
|
||||
return true, "coordinator shutdown_complete is old enough", nil
|
||||
@ -621,3 +621,11 @@ func maxInt(a, b int) int {
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func startupShutdownCooldown(cfg config.Config) time.Duration {
|
||||
seconds := cfg.Startup.ShutdownCooldownSeconds
|
||||
if seconds <= 0 {
|
||||
seconds = 45
|
||||
}
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
@ -45,6 +45,7 @@ excluded_namespaces:
|
||||
startup:
|
||||
api_wait_seconds: 1200
|
||||
api_poll_seconds: 2
|
||||
shutdown_cooldown_seconds: 45
|
||||
require_time_sync: true
|
||||
time_sync_wait_seconds: 240
|
||||
time_sync_poll_seconds: 5
|
||||
|
||||
@ -111,6 +111,7 @@ excluded_namespaces:
|
||||
startup:
|
||||
api_wait_seconds: 1200
|
||||
api_poll_seconds: 2
|
||||
shutdown_cooldown_seconds: 45
|
||||
require_time_sync: true
|
||||
time_sync_wait_seconds: 240
|
||||
time_sync_poll_seconds: 5
|
||||
|
||||
@ -111,6 +111,7 @@ excluded_namespaces:
|
||||
startup:
|
||||
api_wait_seconds: 1200
|
||||
api_poll_seconds: 2
|
||||
shutdown_cooldown_seconds: 45
|
||||
require_time_sync: true
|
||||
time_sync_wait_seconds: 240
|
||||
time_sync_poll_seconds: 5
|
||||
|
||||
@ -77,8 +77,6 @@ var criticalStartupWorkloads = []startupWorkload{
|
||||
{Namespace: "vault", Kind: "statefulset", Name: "vault"},
|
||||
{Namespace: "postgres", Kind: "statefulset", Name: "postgres"},
|
||||
{Namespace: "gitea", Kind: "deployment", Name: "gitea"},
|
||||
{Namespace: "harbor", Kind: "statefulset", Name: "harbor-redis"},
|
||||
{Namespace: "harbor", Kind: "deployment", Name: "harbor-registry"},
|
||||
}
|
||||
|
||||
var ErrEtcdRestoreNotApplicable = errors.New("etcd restore not applicable")
|
||||
@ -135,9 +133,33 @@ func (o *Orchestrator) Startup(ctx context.Context, opts StartupOptions) (err er
|
||||
}
|
||||
currentIntent = state.Intent{State: state.IntentNormal}
|
||||
}
|
||||
if currentIntent.State == state.IntentShutdownComplete && intentFresh(currentIntent, 45*time.Second) {
|
||||
cooldown := o.startupShutdownCooldown()
|
||||
if currentIntent.State == state.IntentShutdownComplete && intentFresh(currentIntent, cooldown) {
|
||||
elapsed := intentAge(currentIntent)
|
||||
remaining := cooldown - elapsed
|
||||
if remaining < time.Second {
|
||||
remaining = time.Second
|
||||
}
|
||||
o.log.Printf("startup cooldown active: last shutdown completed %s ago; waiting %s", elapsed.Round(time.Second), remaining.Round(time.Second))
|
||||
timer := time.NewTimer(remaining)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return fmt.Errorf("startup canceled while waiting for shutdown cooldown: %w", ctx.Err())
|
||||
case <-timer.C:
|
||||
}
|
||||
refreshed, readErr := state.ReadIntent(o.cfg.State.IntentPath)
|
||||
if readErr != nil {
|
||||
return fmt.Errorf("re-read startup intent after cooldown wait: %w", readErr)
|
||||
}
|
||||
currentIntent = refreshed
|
||||
if currentIntent.State == state.IntentShuttingDown && intentFresh(currentIntent, o.startupGuardAge()) {
|
||||
return fmt.Errorf("startup blocked: shutdown intent became active during cooldown wait (%s)", currentIntent.Reason)
|
||||
}
|
||||
if currentIntent.State == state.IntentShutdownComplete && intentFresh(currentIntent, cooldown) {
|
||||
return fmt.Errorf("startup blocked: shutdown completed too recently (%s ago)", intentAge(currentIntent).Round(time.Second))
|
||||
}
|
||||
}
|
||||
if err := o.guardPeerStartupIntents(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -789,6 +811,12 @@ func (o *Orchestrator) readScaledWorkloadSnapshot() (*workloadScaleSnapshot, err
|
||||
return &snapshot, nil
|
||||
}
|
||||
|
||||
type drainFailure struct {
|
||||
node string
|
||||
err error
|
||||
details string
|
||||
}
|
||||
|
||||
func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error {
|
||||
total := len(workers)
|
||||
if total == 0 {
|
||||
@ -805,7 +833,7 @@ func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error
|
||||
o.log.Printf("drain workers total=%d parallelism=%d", total, parallelism)
|
||||
sem := make(chan struct{}, parallelism)
|
||||
var wg sync.WaitGroup
|
||||
errCh := make(chan error, total)
|
||||
errCh := make(chan drainFailure, total)
|
||||
|
||||
for idx, node := range workers {
|
||||
idx := idx
|
||||
@ -821,7 +849,12 @@ func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error
|
||||
o.log.Printf("warning: cordon %s failed: %v", node, err)
|
||||
}
|
||||
if _, err := o.kubectl(ctx, 3*time.Minute, "drain", node, "--ignore-daemonsets", "--delete-emptydir-data", "--grace-period=30", "--timeout=180s"); err != nil {
|
||||
errCh <- fmt.Errorf("drain %s failed: %w", node, err)
|
||||
details := o.drainNodeDiagnostics(ctx, node)
|
||||
errCh <- drainFailure{
|
||||
node: node,
|
||||
err: fmt.Errorf("drain %s failed: %w", node, err),
|
||||
details: details,
|
||||
}
|
||||
return
|
||||
}
|
||||
}()
|
||||
@ -832,10 +865,18 @@ func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error
|
||||
if len(errCh) == 0 {
|
||||
return nil
|
||||
}
|
||||
count := len(errCh)
|
||||
failures := make([]drainFailure, 0, len(errCh))
|
||||
for failure := range errCh {
|
||||
failures = append(failures, failure)
|
||||
}
|
||||
count := len(failures)
|
||||
samples := []string{}
|
||||
for err := range errCh {
|
||||
samples = append(samples, err.Error())
|
||||
for _, failure := range failures {
|
||||
msg := failure.err.Error()
|
||||
if strings.TrimSpace(failure.details) != "" {
|
||||
msg = fmt.Sprintf("%s (details: %s)", msg, failure.details)
|
||||
}
|
||||
samples = append(samples, msg)
|
||||
if len(samples) >= 4 {
|
||||
break
|
||||
}
|
||||
@ -843,6 +884,52 @@ func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error
|
||||
return fmt.Errorf("drain workers had %d errors (first: %s)", count, strings.Join(samples, " | "))
|
||||
}
|
||||
|
||||
func (o *Orchestrator) drainNodeDiagnostics(ctx context.Context, node string) string {
|
||||
out, err := o.kubectl(
|
||||
ctx,
|
||||
20*time.Second,
|
||||
"get",
|
||||
"pods",
|
||||
"-A",
|
||||
"--field-selector", "spec.nodeName="+node,
|
||||
"-o",
|
||||
"custom-columns=NS:.metadata.namespace,NAME:.metadata.name,PHASE:.status.phase,OWNER:.metadata.ownerReferences[0].kind",
|
||||
"--no-headers",
|
||||
)
|
||||
if err != nil {
|
||||
if strings.TrimSpace(out) == "" {
|
||||
return fmt.Sprintf("diagnostics unavailable: %v", err)
|
||||
}
|
||||
return fmt.Sprintf("diagnostics unavailable: %v (%s)", err, strings.Join(lines(out), "; "))
|
||||
}
|
||||
|
||||
blockers := make([]string, 0, 6)
|
||||
for _, line := range lines(out) {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 4 {
|
||||
continue
|
||||
}
|
||||
namespace := fields[0]
|
||||
name := fields[1]
|
||||
phase := fields[2]
|
||||
owner := fields[3]
|
||||
if strings.EqualFold(owner, "DaemonSet") {
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(phase, "Succeeded") || strings.EqualFold(phase, "Failed") {
|
||||
continue
|
||||
}
|
||||
blockers = append(blockers, fmt.Sprintf("%s/%s(phase=%s owner=%s)", namespace, name, phase, owner))
|
||||
if len(blockers) >= 6 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(blockers) == 0 {
|
||||
return "no non-daemonset blocking pods found on node"
|
||||
}
|
||||
return strings.Join(blockers, ", ")
|
||||
}
|
||||
|
||||
func (o *Orchestrator) uncordonWorkers(ctx context.Context, workers []string) error {
|
||||
for _, node := range workers {
|
||||
if _, err := o.kubectl(ctx, 20*time.Second, "uncordon", node); err != nil {
|
||||
@ -969,6 +1056,14 @@ func (o *Orchestrator) startupGuardAge() time.Duration {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
func (o *Orchestrator) startupShutdownCooldown() time.Duration {
|
||||
seconds := o.cfg.Startup.ShutdownCooldownSeconds
|
||||
if seconds <= 0 {
|
||||
seconds = 45
|
||||
}
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
func (o *Orchestrator) coordinationPeers() []string {
|
||||
seen := map[string]struct{}{}
|
||||
out := make([]string, 0, len(o.cfg.Coordination.PeerHosts)+1)
|
||||
@ -1018,7 +1113,7 @@ func (o *Orchestrator) guardPeerStartupIntents(ctx context.Context) error {
|
||||
}
|
||||
o.log.Printf("warning: peer %s startup intent appears stale; allowing startup", peer)
|
||||
case state.IntentShutdownComplete:
|
||||
if intentFresh(intent, 45*time.Second) {
|
||||
if intentFresh(intent, o.startupShutdownCooldown()) {
|
||||
return fmt.Errorf("startup blocked: peer %s completed shutdown too recently (age=%s)", peer, intentAge(intent).Round(time.Second))
|
||||
}
|
||||
default:
|
||||
|
||||
@ -36,6 +36,7 @@ type Config struct {
|
||||
type Startup struct {
|
||||
APIWaitSeconds int `yaml:"api_wait_seconds"`
|
||||
APIPollSeconds int `yaml:"api_poll_seconds"`
|
||||
ShutdownCooldownSeconds int `yaml:"shutdown_cooldown_seconds"`
|
||||
RequireTimeSync bool `yaml:"require_time_sync"`
|
||||
TimeSyncWaitSeconds int `yaml:"time_sync_wait_seconds"`
|
||||
TimeSyncPollSeconds int `yaml:"time_sync_poll_seconds"`
|
||||
@ -172,6 +173,9 @@ func (c Config) Validate() error {
|
||||
if c.Startup.APIPollSeconds <= 0 {
|
||||
return fmt.Errorf("config.startup.api_poll_seconds must be > 0")
|
||||
}
|
||||
if c.Startup.ShutdownCooldownSeconds <= 0 {
|
||||
return fmt.Errorf("config.startup.shutdown_cooldown_seconds must be > 0")
|
||||
}
|
||||
if c.Startup.TimeSyncWaitSeconds <= 0 {
|
||||
return fmt.Errorf("config.startup.time_sync_wait_seconds must be > 0")
|
||||
}
|
||||
@ -305,6 +309,7 @@ func defaults() Config {
|
||||
Startup: Startup{
|
||||
APIWaitSeconds: 1200,
|
||||
APIPollSeconds: 2,
|
||||
ShutdownCooldownSeconds: 45,
|
||||
RequireTimeSync: true,
|
||||
TimeSyncWaitSeconds: 240,
|
||||
TimeSyncPollSeconds: 5,
|
||||
@ -394,6 +399,9 @@ func (c *Config) applyDefaults() {
|
||||
if c.Startup.APIPollSeconds <= 0 {
|
||||
c.Startup.APIPollSeconds = 2
|
||||
}
|
||||
if c.Startup.ShutdownCooldownSeconds <= 0 {
|
||||
c.Startup.ShutdownCooldownSeconds = 45
|
||||
}
|
||||
if c.Startup.TimeSyncWaitSeconds <= 0 {
|
||||
c.Startup.TimeSyncWaitSeconds = 240
|
||||
}
|
||||
|
||||
@ -106,11 +106,22 @@ state:
|
||||
if cfg.Startup.VaultUnsealKeyFile == "" {
|
||||
t.Fatalf("expected startup vault unseal key file default to be set")
|
||||
}
|
||||
if cfg.Startup.ShutdownCooldownSeconds <= 0 {
|
||||
t.Fatalf("expected startup shutdown cooldown default > 0, got %d", cfg.Startup.ShutdownCooldownSeconds)
|
||||
}
|
||||
if cfg.Startup.VaultUnsealBreakglassTimeout <= 0 {
|
||||
t.Fatalf("expected startup break-glass timeout default > 0, got %d", cfg.Startup.VaultUnsealBreakglassTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRejectsInvalidStartupShutdownCooldown(t *testing.T) {
|
||||
cfg := defaults()
|
||||
cfg.Startup.ShutdownCooldownSeconds = 0
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected validation error for invalid startup shutdown_cooldown_seconds")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRejectsInvalidTimeSyncMode(t *testing.T) {
|
||||
cfg := defaults()
|
||||
cfg.Startup.TimeSyncMode = "invalid"
|
||||
|
||||
@ -8,6 +8,10 @@ HECATE_CONFIG="${HECATE_CONFIG:-/etc/hecate/hecate.yaml}"
|
||||
HECATE_COORDINATOR_RELAY="${HECATE_COORDINATOR_RELAY:-}"
|
||||
LOG_DIR="${HECATE_DRILL_LOG_DIR:-/tmp/hecate-drills}"
|
||||
STARTUP_TIMEOUT_SECONDS="${HECATE_DRILL_STARTUP_TIMEOUT_SECONDS:-1800}"
|
||||
SHUTDOWN_TIMEOUT_SECONDS="${HECATE_DRILL_SHUTDOWN_TIMEOUT_SECONDS:-1800}"
|
||||
SHUTDOWN_CONFIG="${HECATE_DRILL_SHUTDOWN_CONFIG:-/tmp/hecate-drill-no-poweroff.yaml}"
|
||||
STARTUP_RETRY_DELAY_SECONDS="${HECATE_DRILL_STARTUP_RETRY_DELAY_SECONDS:-10}"
|
||||
STARTUP_RETRY_MAX="${HECATE_DRILL_STARTUP_RETRY_MAX:-12}"
|
||||
EXECUTE=0
|
||||
|
||||
usage() {
|
||||
@ -21,6 +25,7 @@ Drills:
|
||||
foundation-recovery Simulate vault/postgres/gitea outage and require layered restore.
|
||||
reconciliation-resume Simulate global Flux suspend + source-controller down and require resume.
|
||||
startup-intent-guard Assert startup is blocked when shutdown intent is active.
|
||||
controlled-cycle Run full shutdown->startup recovery cycle (uses no-poweroff config).
|
||||
|
||||
Notes:
|
||||
- Drills are intentionally disruptive and are not part of regular `make test`.
|
||||
@ -74,6 +79,25 @@ wait_ready() {
|
||||
"${KUBECTL}" -n "$ns" rollout status "${kind}/${name}" --timeout="${timeout}" >/dev/null
|
||||
}
|
||||
|
||||
wait_ready_keycloak() {
|
||||
local timeout="$1"
|
||||
if [[ "${EXECUTE}" -eq 0 ]]; then
|
||||
log "plan: wait for sso keycloak rollout (${timeout}) [deployment preferred, fallback to statefulset]"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if "${KUBECTL}" -n sso get deployment keycloak >/dev/null 2>&1; then
|
||||
wait_ready sso deployment keycloak "${timeout}"
|
||||
return 0
|
||||
fi
|
||||
if "${KUBECTL}" -n sso get statefulset keycloak >/dev/null 2>&1; then
|
||||
wait_ready sso statefulset keycloak "${timeout}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
die "keycloak workload not found in sso namespace (expected deployment/keycloak or statefulset/keycloak)"
|
||||
}
|
||||
|
||||
run_hecate_startup() {
|
||||
local reason="$1"
|
||||
local cmd=(sudo "${HECATE_BIN}" startup --config "${HECATE_CONFIG}" --execute --force-flux-branch main --reason "${reason}")
|
||||
@ -93,6 +117,48 @@ run_hecate_startup() {
|
||||
fi
|
||||
}
|
||||
|
||||
run_hecate_shutdown() {
|
||||
local reason="$1"
|
||||
local cmd=(sudo "${HECATE_BIN}" shutdown --config "${SHUTDOWN_CONFIG}" --execute --reason "${reason}")
|
||||
if [[ "${EXECUTE}" -eq 0 ]]; then
|
||||
if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then
|
||||
log "plan: ssh ${HECATE_COORDINATOR_HOST} ${HECATE_COORDINATOR_RELAY} '${cmd[*]}'"
|
||||
else
|
||||
log "plan: ssh ${HECATE_COORDINATOR_HOST} '${cmd[*]}'"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then
|
||||
# shellcheck disable=SC2086
|
||||
timeout "${SHUTDOWN_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" ${HECATE_COORDINATOR_RELAY} "${cmd[@]}"
|
||||
else
|
||||
timeout "${SHUTDOWN_TIMEOUT_SECONDS}" ssh "${HECATE_COORDINATOR_HOST}" "${cmd[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
run_hecate_startup_with_retry() {
|
||||
local reason="$1"
|
||||
local startup_cmd="sudo ${HECATE_BIN} startup --config ${HECATE_CONFIG} --execute --force-flux-branch main --reason ${reason}"
|
||||
|
||||
if [[ "${EXECUTE}" -eq 0 ]]; then
|
||||
log "plan: startup retry loop with max=${STARTUP_RETRY_MAX} delay=${STARTUP_RETRY_DELAY_SECONDS}s"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local attempt
|
||||
for attempt in $(seq 1 "${STARTUP_RETRY_MAX}"); do
|
||||
log "startup attempt ${attempt}/${STARTUP_RETRY_MAX}"
|
||||
if run_coordinator_bash "${startup_cmd}"; then
|
||||
return 0
|
||||
fi
|
||||
if [[ "${attempt}" -lt "${STARTUP_RETRY_MAX}" ]]; then
|
||||
log "startup attempt ${attempt} failed; retrying in ${STARTUP_RETRY_DELAY_SECONDS}s"
|
||||
sleep "${STARTUP_RETRY_DELAY_SECONDS}"
|
||||
fi
|
||||
done
|
||||
die "startup failed after ${STARTUP_RETRY_MAX} attempts"
|
||||
}
|
||||
|
||||
run_coordinator_bash() {
|
||||
local script="$1"
|
||||
if [[ -n "${HECATE_COORDINATOR_RELAY}" ]]; then
|
||||
@ -329,6 +395,35 @@ fi
|
||||
log "pass: startup-intent-guard"
|
||||
}
|
||||
|
||||
run_drill_controlled_cycle() {
|
||||
CURRENT_RESOURCES=()
|
||||
ROLLBACK_FLUX_SUSPEND=0
|
||||
|
||||
if [[ "${EXECUTE}" -eq 0 ]]; then
|
||||
log "plan: verify shutdown drill config exists on coordinator (${SHUTDOWN_CONFIG})"
|
||||
else
|
||||
run_coordinator_bash "[ -s '${SHUTDOWN_CONFIG}' ]" || die "shutdown drill config missing on coordinator: ${SHUTDOWN_CONFIG}"
|
||||
fi
|
||||
|
||||
log "running controlled shutdown cycle (poweroff disabled config)"
|
||||
run_hecate_shutdown "drill-controlled-cycle-shutdown"
|
||||
|
||||
log "running startup recovery cycle"
|
||||
run_hecate_startup_with_retry "drill-controlled-cycle-startup"
|
||||
|
||||
log "verifying critical stack readiness after cycle"
|
||||
wait_ready flux-system deployment source-controller 240s
|
||||
wait_ready flux-system deployment kustomize-controller 240s
|
||||
wait_ready flux-system deployment helm-controller 240s
|
||||
wait_ready flux-system deployment notification-controller 240s
|
||||
wait_ready vault statefulset vault 420s
|
||||
wait_ready postgres statefulset postgres 420s
|
||||
wait_ready gitea deployment gitea 300s
|
||||
wait_ready_keycloak 420s
|
||||
wait_ready maintenance deployment metis 300s
|
||||
log "pass: controlled-cycle"
|
||||
}
|
||||
|
||||
main() {
|
||||
need_cmd "${KUBECTL}"
|
||||
need_cmd ssh
|
||||
@ -375,6 +470,9 @@ main() {
|
||||
startup-intent-guard)
|
||||
run_drill_startup_intent_guard
|
||||
;;
|
||||
controlled-cycle)
|
||||
run_drill_controlled_cycle
|
||||
;;
|
||||
*)
|
||||
die "unknown drill: ${drill}"
|
||||
;;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user