hecate: retry ssh with known_hosts repair on silent 255
This commit is contained in:
parent
75ad091898
commit
b5f27a79e0
@ -553,7 +553,7 @@ func runSSHWithRecovery(ctx context.Context, logger *log.Logger, cfg config.Conf
|
||||
if err == nil {
|
||||
return out, nil
|
||||
}
|
||||
if !sshutil.IsHostKeyError(out, err) {
|
||||
if !sshutil.ShouldAttemptKnownHostsRepair(out, err) {
|
||||
return out, err
|
||||
}
|
||||
|
||||
|
||||
@ -1296,8 +1296,8 @@ func (o *Orchestrator) ssh(ctx context.Context, node string, command string) (st
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
if sshutil.IsHostKeyError(out, err) {
|
||||
o.log.Printf("warning: ssh host-key mismatch detected for %s via %s path; repairing known_hosts and retrying once", node, attemptNames[i])
|
||||
if sshutil.ShouldAttemptKnownHostsRepair(out, err) {
|
||||
o.log.Printf("warning: ssh failure on %s via %s path may be host-key related; repairing known_hosts and retrying once", node, attemptNames[i])
|
||||
sshutil.RepairKnownHosts(ctx, o.log, knownHostsFiles, repairHosts, o.cfg.SSHPort)
|
||||
retryOut, retryErr := o.run(ctx, 45*time.Second, "ssh", args...)
|
||||
if retryErr == nil {
|
||||
|
||||
@ -256,7 +256,7 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
||||
}
|
||||
|
||||
out, err := try()
|
||||
if err != nil && sshutil.IsHostKeyError(out, err) {
|
||||
if err != nil && sshutil.ShouldAttemptKnownHostsRepair(out, err) {
|
||||
repairHosts := []string{d.cfg.Coordination.ForwardShutdownHost, host}
|
||||
if d.cfg.SSHJumpHost != "" {
|
||||
repairHosts = append(repairHosts, d.cfg.SSHJumpHost)
|
||||
|
||||
@ -34,6 +34,21 @@ func IsHostKeyError(output string, err error) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
||||
if IsHostKeyError(output, err) {
|
||||
return true
|
||||
}
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
// Some SSH invocations (especially under strict non-interactive configs)
|
||||
// return exit 255 without forwarding the host-key mismatch text.
|
||||
if strings.Contains(strings.ToLower(err.Error()), "exit status 255") && strings.TrimSpace(output) == "" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
||||
seen := map[string]struct{}{}
|
||||
add := func(path string) {
|
||||
|
||||
@ -20,6 +20,12 @@ func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T) {
|
||||
if !ShouldAttemptKnownHostsRepair("", errors.New("ssh ...: exit status 255")) {
|
||||
t.Fatalf("expected silent exit status 255 to trigger known_hosts repair")
|
||||
}
|
||||
}
|
||||
|
||||
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
|
||||
configFile := "/home/atlas/.ssh/config"
|
||||
identityFile := "/home/tethys/.ssh/id_ed25519"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user