startup: uncordon early and include harbor prereqs

This commit is contained in:
Brad Stein 2026-04-04 14:44:14 -03:00
parent 7ce729d810
commit ac2fbf89cb
2 changed files with 19 additions and 5 deletions

View File

@ -49,6 +49,8 @@ var criticalStartupWorkloads = []startupWorkload{
{Namespace: "flux-system", Kind: "deployment", Name: "kustomize-controller"},
{Namespace: "flux-system", Kind: "deployment", Name: "helm-controller"},
{Namespace: "flux-system", Kind: "deployment", Name: "notification-controller"},
{Namespace: "harbor", Kind: "statefulset", Name: "harbor-redis"},
{Namespace: "harbor", Kind: "deployment", Name: "harbor-registry"},
{Namespace: "vault", Kind: "statefulset", Name: "vault"},
{Namespace: "postgres", Kind: "statefulset", Name: "postgres"},
{Namespace: "gitea", Kind: "deployment", Name: "gitea"},
@ -113,6 +115,7 @@ func (o *Orchestrator) Startup(ctx context.Context, opts StartupOptions) (err er
}
o.log.Printf("startup workers=%s", strings.Join(workers, ","))
o.startWorkers(ctx, workers)
o.bestEffort("uncordon workers", func() error { return o.uncordonWorkers(ctx, workers) })
if opts.ForceFluxBranch != "" {
patch := fmt.Sprintf(`{"spec":{"ref":{"branch":"%s"}}}`, opts.ForceFluxBranch)
@ -350,7 +353,9 @@ func (o *Orchestrator) scaleDownApps(ctx context.Context) error {
}
func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error {
for _, node := range workers {
total := len(workers)
for idx, node := range workers {
o.log.Printf("drain worker %d/%d: %s", idx+1, total, node)
if _, err := o.kubectl(ctx, 20*time.Second, "cordon", node); err != nil {
o.log.Printf("warning: cordon %s failed: %v", node, err)
}
@ -361,6 +366,15 @@ func (o *Orchestrator) drainWorkers(ctx context.Context, workers []string) error
return nil
}
func (o *Orchestrator) uncordonWorkers(ctx context.Context, workers []string) error {
for _, node := range workers {
if _, err := o.kubectl(ctx, 20*time.Second, "uncordon", node); err != nil {
o.log.Printf("warning: uncordon %s failed: %v", node, err)
}
}
return nil
}
func (o *Orchestrator) stopWorkers(ctx context.Context, workers []string) {
for _, n := range workers {
if !o.sshManaged(n) {

View File

@ -192,7 +192,7 @@ func defaults() Config {
APIPollSeconds: 2,
},
Shutdown: Shutdown{
DefaultBudgetSeconds: 300,
DefaultBudgetSeconds: 1380,
PoweroffEnabled: true,
PoweroffDelaySeconds: 25,
PoweroffLocalHost: true,
@ -201,7 +201,7 @@ func defaults() Config {
Enabled: true,
Provider: "nut",
PollSeconds: 5,
RuntimeSafetyFactor: 1.10,
RuntimeSafetyFactor: 1.25,
DebounceCount: 3,
TelemetryTimeoutSeconds: 90,
},
@ -245,7 +245,7 @@ func (c *Config) applyDefaults() {
c.SSHPort = 2277
}
if c.Shutdown.DefaultBudgetSeconds <= 0 {
c.Shutdown.DefaultBudgetSeconds = 300
c.Shutdown.DefaultBudgetSeconds = 1380
}
if c.Shutdown.PoweroffDelaySeconds <= 0 {
c.Shutdown.PoweroffDelaySeconds = 25
@ -254,7 +254,7 @@ func (c *Config) applyDefaults() {
c.UPS.PollSeconds = 5
}
if c.UPS.RuntimeSafetyFactor <= 0 {
c.UPS.RuntimeSafetyFactor = 1.10
c.UPS.RuntimeSafetyFactor = 1.25
}
if c.UPS.DebounceCount <= 0 {
c.UPS.DebounceCount = 3