hecate: wait flux source recovery before local bootstrap fallback

This commit is contained in:
Brad Stein 2026-04-04 06:11:46 -03:00
parent 014039eea2
commit 6b88fb305f

View File

@ -118,7 +118,16 @@ func (o *Orchestrator) Startup(ctx context.Context, opts StartupOptions) (err er
} }
if !opts.SkipLocalBootstrap && needsLocalBootstrap { if !opts.SkipLocalBootstrap && needsLocalBootstrap {
o.log.Printf("startup bootstrap required: %s", strings.Join(bootstrapReasons, "; ")) if ready, err := o.waitForFluxSourceReady(ctx, 2*time.Minute); err != nil {
o.log.Printf("warning: flux source readiness wait failed before local bootstrap: %v", err)
} else if ready {
o.log.Printf("flux source became ready after targeted recovery; skipping local bootstrap")
needsLocalBootstrap = false
}
}
if !opts.SkipLocalBootstrap && needsLocalBootstrap {
o.log.Printf("startup bootstrap required after wait: %s", strings.Join(bootstrapReasons, "; "))
if err := o.bootstrapLocal(ctx); err != nil { if err := o.bootstrapLocal(ctx); err != nil {
return err return err
} }
@ -388,16 +397,46 @@ func (o *Orchestrator) reportFluxSource(ctx context.Context, forceBranch string)
} }
func (o *Orchestrator) bootstrapLocal(ctx context.Context) error { func (o *Orchestrator) bootstrapLocal(ctx context.Context) error {
failures := 0
for _, rel := range o.cfg.LocalBootstrapPaths { for _, rel := range o.cfg.LocalBootstrapPaths {
full := filepath.Join(o.cfg.IACRepoPath, rel) full := filepath.Join(o.cfg.IACRepoPath, rel)
o.log.Printf("local bootstrap apply -k %s", full) o.log.Printf("local bootstrap apply -k %s", full)
if _, err := o.kubectl(ctx, 2*time.Minute, "apply", "-k", full); err != nil { if _, err := o.kubectl(ctx, 2*time.Minute, "apply", "-k", full); err != nil {
return fmt.Errorf("local bootstrap apply failed at %s: %w", full, err) failures++
o.log.Printf("warning: local bootstrap apply failed at %s: %v", full, err)
continue
} }
} }
if failures == len(o.cfg.LocalBootstrapPaths) {
return fmt.Errorf("local bootstrap apply failed for every configured path (%d total)", failures)
}
return nil return nil
} }
func (o *Orchestrator) waitForFluxSourceReady(ctx context.Context, window time.Duration) (bool, error) {
if o.runner.DryRun {
return true, nil
}
deadline := time.Now().Add(window)
for {
ready, err := o.fluxSourceReady(ctx)
if err != nil {
return false, err
}
if ready {
return true, nil
}
if time.Now().After(deadline) {
return false, nil
}
select {
case <-ctx.Done():
return false, ctx.Err()
case <-time.After(5 * time.Second):
}
}
}
func (o *Orchestrator) resumeFluxAndReconcile(ctx context.Context) error { func (o *Orchestrator) resumeFluxAndReconcile(ctx context.Context) error {
if err := o.patchFluxSuspendAll(ctx, false); err != nil { if err := o.patchFluxSuspendAll(ctx, false); err != nil {
return err return err