diff --git a/internal/cluster/orchestrator_unit_additional_test.go b/internal/cluster/orchestrator_unit_additional_test.go
index 1aafade..f03a562 100644
--- a/internal/cluster/orchestrator_unit_additional_test.go
+++ b/internal/cluster/orchestrator_unit_additional_test.go
@@ -357,6 +357,71 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
 	}
 }
 
+// TestRecycleStuckControllerPodsCordonsContainerRuntimeWedgeNode runs one orchestration or CLI step.
+// Signature: TestRecycleStuckControllerPodsCordonsContainerRuntimeWedgeNode(t *testing.T).
+// Why: a Ready node with a wedged container runtime can trap replacement pods
+// indefinitely; startup should cordon that scheduler target without draining it
+// or touching Longhorn data-plane objects.
+func TestRecycleStuckControllerPodsCordonsContainerRuntimeWedgeNode(t *testing.T) {
+	old := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
+	lastSeen := time.Now().UTC().Format(time.RFC3339)
+	pods := `{"items":[` +
+		`{"metadata":{"namespace":"logging","name":"oauth2-proxy-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"oauth2-proxy"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","containerStatuses":[{"name":"oauth2-proxy","state":{"waiting":{"reason":"CreateContainerError"}}}]}},` +
+		`{"metadata":{"namespace":"monitoring","name":"suite-probe-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"Job","name":"suite-probe"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","containerStatuses":[{"name":"probe","state":{"waiting":{"reason":"CreateContainerError"}}}]}},` +
+		`{"metadata":{"namespace":"sso","name":"secret-ensure-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"Job","name":"secret-ensure"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","initContainerStatuses":[{"name":"init","state":{"waiting":{"reason":"CreateContainerError"}}}]}},` +
+		`{"metadata":{"namespace":"finance","name":"single-node-bad","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"single"}]},"spec":{"nodeName":"titan-19","volumes":[{"name":"scratch"}]},"status":{"phase":"Pending","containerStatuses":[{"name":"app","state":{"waiting":{"reason":"CreateContainerError"}}}]}}]}`
+	events := `{"items":[` +
+		`{"metadata":{"namespace":"logging","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"logging","name":"oauth2-proxy-bad"},"type":"Warning","reason":"Failed","message":"spec.containers{oauth2-proxy}: Error: failed to reserve container name oauth2-proxy_logging","lastTimestamp":"` + lastSeen + `"},` +
+		`{"metadata":{"namespace":"monitoring","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"monitoring","name":"suite-probe-bad"},"type":"Warning","reason":"Failed","message":"spec.containers{probe}: Error: context deadline exceeded","lastTimestamp":"` + lastSeen + `"},` +
+		`{"metadata":{"namespace":"sso","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"sso","name":"secret-ensure-bad"},"type":"Warning","reason":"Failed","message":"spec.initContainers{init}: Error: failed to reserve container name init_sso","lastTimestamp":"` + lastSeen + `"},` +
+		`{"metadata":{"namespace":"finance","creationTimestamp":"` + lastSeen + `"},"involvedObject":{"kind":"Pod","namespace":"finance","name":"single-node-bad"},"type":"Warning","reason":"Failed","message":"spec.containers{app}: Error: failed to reserve container name app_finance","lastTimestamp":"` + lastSeen + `"}]}`
+
+	cordoned := []string{}
+	deleted := []string{}
+	orch := buildOrchestratorWithStubs(t, config.Config{
+		Startup: config.Startup{StuckPodGraceSeconds: 180},
+	}, []commandStub{
+		{match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods},
+		{match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: ""},
+		{match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events},
+		{match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-18"},"status":{"conditions":[{"type":"Ready","status":"True"}]}},{"metadata":{"name":"titan-19"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`},
+		{
+			match: func(name string, args []string) bool {
+				if !matchContains("kubectl", "cordon")(name, args) {
+					return false
+				}
+				cordoned = append(cordoned, args[len(args)-1])
+				return true
+			},
+		},
+		{
+			match: func(name string, args []string) bool {
+				if !matchContains("kubectl", "delete", "pod", "--wait=false")(name, args) {
+					return false
+				}
+				joined := strings.Join(args, " ")
+				if strings.Contains(joined, "--force") {
+					t.Fatalf("container-runtime wedge recycle must not force-delete fresh pods")
+				}
+				if len(args) >= 5 {
+					deleted = append(deleted, args[4])
+				}
+				return true
+			},
+		},
+	})
+
+	if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
+		t.Fatalf("recycleStuckControllerPods failed: %v", err)
+	}
+	if strings.Join(cordoned, ",") != "titan-18" {
+		t.Fatalf("expected only titan-18 to be cordoned, got %#v", cordoned)
+	}
+	if strings.Join(deleted, ",") != "oauth2-proxy-bad,suite-probe-bad,secret-ensure-bad,single-node-bad" {
+		t.Fatalf("expected runtime-wedged pods to be recycled, got %#v", deleted)
+	}
+}
+
 // TestEffectiveWorkersFiltersIgnoredUnavailableNodes runs one orchestration or CLI step.
 // Signature: TestEffectiveWorkersFiltersIgnoredUnavailableNodes(t *testing.T).
 // Why: ignored unavailable nodes should be excluded before startup tries SSH,
diff --git a/internal/cluster/orchestrator_workload_convergence.go b/internal/cluster/orchestrator_workload_convergence.go
index d6943f8..3d87b47 100644
--- a/internal/cluster/orchestrator_workload_convergence.go
+++ b/internal/cluster/orchestrator_workload_convergence.go
@@ -188,6 +188,13 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error {
 	} else {
 		stalePhaseReasons = reasons
 	}
+	containerRuntimeWedgeReasons := map[string]string{}
+	if reasons, scanErr := o.containerRuntimeWedgePodReasons(ctx, list, grace); scanErr != nil {
+		o.log.Printf("warning: container runtime wedge scan failed: %v", scanErr)
+	} else {
+		containerRuntimeWedgeReasons = reasons
+		o.quarantineContainerRuntimeWedgeNodes(ctx, list, reasons, grace, ignoredNamespaces, ignoredNodes, ignoreRules)
+	}
 	recycled := []string{}
 	for _, pod := range list.Items {
 		ns := strings.TrimSpace(pod.Metadata.Namespace)
@@ -224,6 +231,9 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error {
 		if reason == "" {
 			reason = stalePhaseReasons[ns+"/"+name]
 		}
+		if runtimeReason := containerRuntimeWedgeReasons[ns+"/"+name]; runtimeReason != "" {
+			reason = runtimeReason
+		}
 		if reason == "" && staleControllerPodForceDeleteSafe(pod, grace) {
 			reason = "StaleDeletingControllerPod"
 		}
@@ -254,6 +264,146 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error {
 	return nil
 }
 
+// containerRuntimeWedgePodReasons runs one orchestration or CLI step.
+// Signature: (o *Orchestrator) containerRuntimeWedgePodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
+// Why: after a power event, a node-local container runtime can reserve names and
+// fail every new container start while Kubernetes still reports the node Ready.
+// Detecting the runtime symptom lets startup move work elsewhere without
+// restarting the node or touching storage objects.
+func (o *Orchestrator) containerRuntimeWedgePodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error) {
+	eventsOut, err := o.kubectl(ctx, 30*time.Second, "get", "events", "-A", "-o", "json")
+	if err != nil {
+		return nil, fmt.Errorf("query events for container runtime wedge scan: %w", err)
+	}
+	var events eventList
+	if err := json.Unmarshal([]byte(eventsOut), &events); err != nil {
+		return nil, fmt.Errorf("decode events for container runtime wedge scan: %w", err)
+	}
+
+	runtimeReasons := map[string]struct{}{
+		"CreateContainerError": {},
+		"RunContainerError":    {},
+	}
+	podsByKey := map[string]podResource{}
+	for _, pod := range pods.Items {
+		ns := strings.TrimSpace(pod.Metadata.Namespace)
+		name := strings.TrimSpace(pod.Metadata.Name)
+		node := strings.TrimSpace(pod.Spec.NodeName)
+		if ns == "" || name == "" || node == "" {
+			continue
+		}
+		if !strings.EqualFold(strings.TrimSpace(pod.Status.Phase), "Pending") {
+			continue
+		}
+		if !podControllerOwned(pod) {
+			continue
+		}
+		if !pod.Metadata.CreationTimestamp.IsZero() && time.Since(pod.Metadata.CreationTimestamp) < grace {
+			continue
+		}
+		if stuckContainerReason(pod, runtimeReasons) == "" {
+			continue
+		}
+		podsByKey[ns+"/"+name] = pod
+	}
+	if len(podsByKey) == 0 {
+		return map[string]string{}, nil
+	}
+
+	reasons := map[string]string{}
+	for _, event := range events.Items {
+		if !strings.EqualFold(strings.TrimSpace(event.Type), "Warning") {
+			continue
+		}
+		if strings.TrimSpace(event.Reason) != "Failed" {
+			continue
+		}
+		if !strings.EqualFold(strings.TrimSpace(event.InvolvedObject.Kind), "Pod") {
+			continue
+		}
+		key := strings.TrimSpace(event.InvolvedObject.Namespace) + "/" + strings.TrimSpace(event.InvolvedObject.Name)
+		pod, ok := podsByKey[key]
+		if !ok {
+			continue
+		}
+		lastSeen := eventLastObservedAt(event)
+		if !lastSeen.IsZero() && !pod.Metadata.CreationTimestamp.IsZero() && lastSeen.Before(pod.Metadata.CreationTimestamp) {
+			continue
+		}
+		message := strings.ToLower(strings.TrimSpace(event.Message))
+		if !strings.Contains(message, "failed to reserve container name") &&
+			!strings.Contains(message, " is reserved for ") &&
+			!strings.Contains(message, "context deadline exceeded") {
+			continue
+		}
+		reasons[key] = "ContainerRuntimeWedge:" + strings.TrimSpace(pod.Spec.NodeName)
+	}
+	return reasons, nil
+}
+
+// quarantineContainerRuntimeWedgeNodes runs one orchestration or CLI step.
+// Signature: (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context, pods podList, reasons map[string]string, grace time.Duration, ignoredNamespaces map[string]struct{}, ignoredNodes map[string]struct{}, ignoreRules []workloadIgnoreRule).
+// Why: cordoning a proven-bad start node is scheduler-only; it prevents fresh
+// non-storage pods from being trapped while leaving running workloads and
+// Longhorn data-plane state alone.
+func (o *Orchestrator) quarantineContainerRuntimeWedgeNodes(ctx context.Context, pods podList, reasons map[string]string, grace time.Duration, ignoredNamespaces map[string]struct{}, ignoredNodes map[string]struct{}, ignoreRules []workloadIgnoreRule) {
+	if len(reasons) == 0 {
+		return
+	}
+	const minRuntimeWedgePodsPerNode = 2
+	byNode := map[string][]string{}
+	for _, pod := range pods.Items {
+		ns := strings.TrimSpace(pod.Metadata.Namespace)
+		name := strings.TrimSpace(pod.Metadata.Name)
+		node := strings.TrimSpace(pod.Spec.NodeName)
+		if ns == "" || name == "" || node == "" {
+			continue
+		}
+		key := ns + "/" + name
+		if reasons[key] == "" {
+			continue
+		}
+		if _, ok := ignoredNamespaces[ns]; ok {
+			continue
+		}
+		if workloadIgnored(ignoreRules, ns, "", name) {
+			continue
+		}
+		if podTargetsIgnoredNode(pod, ignoredNodes) {
+			continue
+		}
+		if !podControllerOwned(pod) {
+			continue
+		}
+		if !pod.Metadata.CreationTimestamp.IsZero() && time.Since(pod.Metadata.CreationTimestamp) < grace {
+			continue
+		}
+		if podUsesPersistentVolumeClaim(pod) {
+			continue
+		}
+		byNode[node] = append(byNode[node], key)
+	}
+
+	quarantined := []string{}
+	for node, keys := range byNode {
+		if len(keys) < minRuntimeWedgePodsPerNode {
+			continue
+		}
+		sort.Strings(keys)
+		if _, err := o.kubectl(ctx, 30*time.Second, "cordon", node); err != nil {
+			o.log.Printf("warning: cordon container-runtime-wedged node %s failed: %v", node, err)
+			continue
+		}
+		o.log.Printf("warning: cordoned node %s after repeated container runtime start failures: %s", node, joinLimited(keys, 8))
+		quarantined = append(quarantined, fmt.Sprintf("%s pods=%d", node, len(keys)))
+	}
+	if len(quarantined) == 0 {
+		return
+	}
+	sort.Strings(quarantined)
+	o.noteStartupAutoHeal(fmt.Sprintf("cordoned container-runtime-wedged node(s): %s", joinLimited(quarantined, 8)))
+}
+
 // staleControllerPodReasons runs one orchestration or CLI step.
 // Signature: (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
 // Why: after node or kubelet recovery, controller-owned pods can stay in
@@ -573,7 +723,7 @@ func (o *Orchestrator) longhornUnreadyNodes(ctx context.Context) (map[string]str
 func podControllerOwned(p podResource) bool {
 	for _, owner := range p.Metadata.OwnerReferences {
 		switch strings.TrimSpace(owner.Kind) {
-		case "ReplicaSet", "StatefulSet", "DaemonSet":
+		case "ReplicaSet", "StatefulSet", "DaemonSet", "Job":
 			return true
 		}
 	}