recovery: skip runtime-wedged workers during startup
This commit is contained in:
parent
e22a9150e9
commit
b3076a23a9
@ -215,6 +215,27 @@ func (o *Orchestrator) Startup(ctx context.Context, opts StartupOptions) (err er
|
|||||||
}
|
}
|
||||||
o.log.Printf("startup workers=%s", strings.Join(workers, ","))
|
o.log.Printf("startup workers=%s", strings.Join(workers, ","))
|
||||||
o.setStartupPhase("worker-start", "starting and uncordoning worker nodes")
|
o.setStartupPhase("worker-start", "starting and uncordoning worker nodes")
|
||||||
|
runtimeWedgedNodes := []string{}
|
||||||
|
o.bestEffort("quarantine container-runtime-wedged nodes", func() error {
|
||||||
|
nodes, err := o.quarantineContainerRuntimeWedgeNodesFromCluster(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
runtimeWedgedNodes = nodes
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if len(runtimeWedgedNodes) > 0 {
|
||||||
|
ignored := makeStringSet(o.cfg.Startup.IgnoreUnavailableNodes)
|
||||||
|
for _, node := range runtimeWedgedNodes {
|
||||||
|
if _, ok := ignored[node]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
o.cfg.Startup.IgnoreUnavailableNodes = append(o.cfg.Startup.IgnoreUnavailableNodes, node)
|
||||||
|
ignored[node] = struct{}{}
|
||||||
|
}
|
||||||
|
workers = filterIgnoredNodes(workers, o.cfg.Startup.IgnoreUnavailableNodes)
|
||||||
|
o.log.Printf("startup workers after container-runtime quarantine=%s", strings.Join(workers, ","))
|
||||||
|
}
|
||||||
if o.cfg.Startup.ReconcileAccessOnBoot {
|
if o.cfg.Startup.ReconcileAccessOnBoot {
|
||||||
o.bestEffort("reconcile worker access", func() error { return o.reconcileNodeAccess(ctx, workers) })
|
o.bestEffort("reconcile worker access", func() error { return o.reconcileNodeAccess(ctx, workers) })
|
||||||
}
|
}
|
||||||
|
|||||||
@ -264,6 +264,33 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// quarantineContainerRuntimeWedgeNodesFromCluster runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) quarantineContainerRuntimeWedgeNodesFromCluster(ctx context.Context) ([]string, error).
|
||||||
|
// Why: worker startup needs this scan before SSH-heavy steps so a Ready but
|
||||||
|
// container-runtime-wedged node cannot stall the whole recovery run.
|
||||||
|
func (o *Orchestrator) quarantineContainerRuntimeWedgeNodesFromCluster(ctx context.Context) ([]string, error) {
|
||||||
|
out, err := o.kubectl(ctx, 30*time.Second, "get", "pods", "-A", "-o", "json")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query pods for container runtime wedge scan: %w", err)
|
||||||
|
}
|
||||||
|
var list podList
|
||||||
|
if err := json.Unmarshal([]byte(out), &list); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode pods for container runtime wedge scan: %w", err)
|
||||||
|
}
|
||||||
|
grace := time.Duration(o.cfg.Startup.StuckPodGraceSeconds) * time.Second
|
||||||
|
if grace <= 0 {
|
||||||
|
grace = 180 * time.Second
|
||||||
|
}
|
||||||
|
reasons, err := o.containerRuntimeWedgePodReasons(ctx, list, grace)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
ignoredNamespaces := makeStringSet(o.cfg.Startup.IgnoreWorkloadNamespaces)
|
||||||
|
ignoredNodes := makeStringSet(o.cfg.Startup.IgnoreUnavailableNodes)
|
||||||
|
ignoreRules := parseWorkloadIgnoreRules(o.cfg.Startup.IgnoreWorkloads)
|
||||||
|
return o.quarantineContainerRuntimeWedgeNodes(ctx, list, reasons, grace, ignoredNamespaces, ignoredNodes, ignoreRules), nil
|
||||||
|
}
|
||||||
|
|
||||||
// containerRuntimeWedgePodReasons runs one orchestration or CLI step.
|
// containerRuntimeWedgePodReasons runs one orchestration or CLI step.
|
||||||
// Signature: (o *Orchestrator) containerRuntimeWedgePodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
|
// Signature: (o *Orchestrator) containerRuntimeWedgePodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
|
||||||
// Why: after a power event, a node-local container runtime can reserve names and
|
// Why: after a power event, a node-local container runtime can reserve names and
|
||||||
@ -342,13 +369,13 @@ func (o *Orchestrator) containerRuntimeWedgePodReasons(ctx context.Context, pods
|
|||||||
}
|
}
|
||||||
|
|
||||||
// quarantineContainerRuntimeWedgeNodes runs one orchestration or CLI step.
|
// quarantineContainerRuntimeWedgeNodes runs one orchestration or CLI step.
|
||||||
// Signature: (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context, pods podList, reasons map[string]string, grace time.Duration, ignoredNamespaces map[string]struct{}, ignoredNodes map[string]struct{}, ignoreRules []workloadIgnoreRule).
|
// Signature: (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context, pods podList, reasons map[string]string, grace time.Duration, ignoredNamespaces map[string]struct{}, ignoredNodes map[string]struct{}, ignoreRules []workloadIgnoreRule) []string.
|
||||||
// Why: cordoning a proven-bad start node is scheduler-only; it prevents fresh
|
// Why: cordoning a proven-bad start node is scheduler-only; it prevents fresh
|
||||||
// non-storage pods from being trapped while leaving running workloads and
|
// non-storage pods from being trapped while leaving running workloads and
|
||||||
// Longhorn data-plane state alone.
|
// Longhorn data-plane state alone.
|
||||||
func (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context, pods podList, reasons map[string]string, grace time.Duration, ignoredNamespaces map[string]struct{}, ignoredNodes map[string]struct{}, ignoreRules []workloadIgnoreRule) {
|
func (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context, pods podList, reasons map[string]string, grace time.Duration, ignoredNamespaces map[string]struct{}, ignoredNodes map[string]struct{}, ignoreRules []workloadIgnoreRule) []string {
|
||||||
if len(reasons) == 0 {
|
if len(reasons) == 0 {
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
const minRuntimeWedgePodsPerNode = 2
|
const minRuntimeWedgePodsPerNode = 2
|
||||||
byNode := map[string][]string{}
|
byNode := map[string][]string{}
|
||||||
@ -398,10 +425,18 @@ func (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context,
|
|||||||
quarantined = append(quarantined, fmt.Sprintf("%s pods=%d", node, len(keys)))
|
quarantined = append(quarantined, fmt.Sprintf("%s pods=%d", node, len(keys)))
|
||||||
}
|
}
|
||||||
if len(quarantined) == 0 {
|
if len(quarantined) == 0 {
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
sort.Strings(quarantined)
|
sort.Strings(quarantined)
|
||||||
o.noteStartupAutoHeal(fmt.Sprintf("cordoned container-runtime-wedged node(s): %s", joinLimited(quarantined, 8)))
|
o.noteStartupAutoHeal(fmt.Sprintf("cordoned container-runtime-wedged node(s): %s", joinLimited(quarantined, 8)))
|
||||||
|
nodes := make([]string, 0, len(quarantined))
|
||||||
|
for _, item := range quarantined {
|
||||||
|
fields := strings.Fields(item)
|
||||||
|
if len(fields) > 0 {
|
||||||
|
nodes = append(nodes, fields[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
// staleControllerPodReasons runs one orchestration or CLI step.
|
// staleControllerPodReasons runs one orchestration or CLI step.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user