2026-04-04 22:24:56 -03:00
|
|
|
package sshutil
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"fmt"
|
|
|
|
|
"log"
|
|
|
|
|
"os"
|
|
|
|
|
"os/exec"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"strings"
|
2026-04-04 22:44:19 -03:00
|
|
|
"syscall"
|
2026-04-04 22:24:56 -03:00
|
|
|
"time"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var hostKeyErrorMarkers = []string{
|
|
|
|
|
"remote host identification has changed",
|
|
|
|
|
"host key verification failed",
|
|
|
|
|
"offending ",
|
|
|
|
|
"possible dns spoofing detected",
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// IsHostKeyError runs one orchestration or CLI step.
|
|
|
|
|
// Signature: IsHostKeyError(output string, err error) bool.
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:24:56 -03:00
|
|
|
func IsHostKeyError(output string, err error) bool {
|
|
|
|
|
if err == nil {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
combined := strings.ToLower(strings.TrimSpace(output + "\n" + err.Error()))
|
|
|
|
|
if combined == "" {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
for _, marker := range hostKeyErrorMarkers {
|
|
|
|
|
if strings.Contains(combined, marker) {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// ShouldAttemptKnownHostsRepair runs one orchestration or CLI step.
|
|
|
|
|
// Signature: ShouldAttemptKnownHostsRepair(output string, err error) bool.
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:40:39 -03:00
|
|
|
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
|
|
|
|
if IsHostKeyError(output, err) {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if err == nil {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
// Some SSH invocations (especially under strict non-interactive configs)
|
|
|
|
|
// return exit 255 without forwarding the host-key mismatch text.
|
|
|
|
|
if strings.Contains(strings.ToLower(err.Error()), "exit status 255") && strings.TrimSpace(output) == "" {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// KnownHostsFiles runs one orchestration or CLI step.
|
|
|
|
|
// Signature: KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string.
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:24:56 -03:00
|
|
|
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
|
|
|
|
seen := map[string]struct{}{}
|
|
|
|
|
add := func(path string) {
|
|
|
|
|
p := strings.TrimSpace(path)
|
|
|
|
|
if p == "" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if _, ok := seen[p]; ok {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
seen[p] = struct{}{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Common locations for this environment.
|
|
|
|
|
add("/root/.ssh/known_hosts")
|
|
|
|
|
add("/home/atlas/.ssh/known_hosts")
|
|
|
|
|
add("/home/tethys/.ssh/known_hosts")
|
|
|
|
|
|
|
|
|
|
if home, err := os.UserHomeDir(); err == nil && strings.TrimSpace(home) != "" {
|
|
|
|
|
add(filepath.Join(home, ".ssh", "known_hosts"))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if cfg := strings.TrimSpace(sshConfigFile); cfg != "" {
|
|
|
|
|
add(filepath.Join(filepath.Dir(cfg), "known_hosts"))
|
|
|
|
|
}
|
|
|
|
|
if key := strings.TrimSpace(sshIdentityFile); key != "" {
|
|
|
|
|
add(filepath.Join(filepath.Dir(key), "known_hosts"))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out := make([]string, 0, len(seen))
|
|
|
|
|
for path := range seen {
|
|
|
|
|
out = append(out, path)
|
|
|
|
|
}
|
|
|
|
|
return out
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// RepairKnownHosts runs one orchestration or CLI step.
|
|
|
|
|
// Signature: RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int).
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:24:56 -03:00
|
|
|
func RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int) {
|
|
|
|
|
if _, err := exec.LookPath("ssh-keygen"); err != nil {
|
|
|
|
|
logf(logger, "warning: cannot repair known_hosts (ssh-keygen missing): %v", err)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dedupHosts := make([]string, 0, len(hosts))
|
|
|
|
|
hostSet := map[string]struct{}{}
|
|
|
|
|
for _, h := range hosts {
|
|
|
|
|
host := strings.TrimSpace(h)
|
|
|
|
|
if host == "" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if _, ok := hostSet[host]; ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
hostSet[host] = struct{}{}
|
|
|
|
|
dedupHosts = append(dedupHosts, host)
|
|
|
|
|
}
|
|
|
|
|
if len(dedupHosts) == 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fileSet := map[string]struct{}{}
|
|
|
|
|
for _, f := range knownHostsFiles {
|
|
|
|
|
file := strings.TrimSpace(f)
|
|
|
|
|
if file == "" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if _, ok := fileSet[file]; ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
fileSet[file] = struct{}{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for file := range fileSet {
|
|
|
|
|
if stat, err := os.Stat(file); err != nil || stat.IsDir() {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for _, host := range dedupHosts {
|
|
|
|
|
removeKnownHostEntry(ctx, logger, file, host)
|
|
|
|
|
if port > 0 {
|
|
|
|
|
removeKnownHostEntry(ctx, logger, file, fmt.Sprintf("[%s]:%d", host, port))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// removeKnownHostEntry runs one orchestration or CLI step.
|
|
|
|
|
// Signature: removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string).
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:24:56 -03:00
|
|
|
func removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string) {
|
2026-04-04 22:44:19 -03:00
|
|
|
uid, gid, mode := captureOwnership(file)
|
|
|
|
|
|
2026-04-04 22:24:56 -03:00
|
|
|
runCtx, cancel := context.WithTimeout(ctx, 8*time.Second)
|
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
cmd := exec.CommandContext(runCtx, "ssh-keygen", "-R", entry, "-f", file)
|
|
|
|
|
out, err := cmd.CombinedOutput()
|
2026-04-04 22:44:19 -03:00
|
|
|
restoreOwnership(file, file+".old", uid, gid, mode)
|
2026-04-04 22:24:56 -03:00
|
|
|
if err == nil {
|
|
|
|
|
logf(logger, "known_hosts repaired: removed %s from %s", entry, file)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
trimmed := strings.ToLower(strings.TrimSpace(string(out)))
|
|
|
|
|
// ssh-keygen exits non-zero when entry is absent; this is fine.
|
|
|
|
|
if strings.Contains(trimmed, "not found in") {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
logf(logger, "warning: known_hosts cleanup failed for %s in %s: %v: %s", entry, file, err, strings.TrimSpace(string(out)))
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// captureOwnership runs one orchestration or CLI step.
|
|
|
|
|
// Signature: captureOwnership(path string) (int, int, os.FileMode).
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:44:19 -03:00
|
|
|
func captureOwnership(path string) (int, int, os.FileMode) {
|
|
|
|
|
info, err := os.Stat(path)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return -1, -1, 0
|
|
|
|
|
}
|
|
|
|
|
st, ok := info.Sys().(*syscall.Stat_t)
|
|
|
|
|
if !ok {
|
|
|
|
|
return -1, -1, info.Mode().Perm()
|
|
|
|
|
}
|
|
|
|
|
return int(st.Uid), int(st.Gid), info.Mode().Perm()
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// restoreOwnership runs one orchestration or CLI step.
|
|
|
|
|
// Signature: restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode).
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:44:19 -03:00
|
|
|
func restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode) {
|
|
|
|
|
if uid < 0 || gid < 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
for _, candidate := range []string{path, backupPath} {
|
|
|
|
|
if candidate == "" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if _, err := os.Stat(candidate); err != nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
_ = os.Chown(candidate, uid, gid)
|
|
|
|
|
if mode != 0 {
|
|
|
|
|
_ = os.Chmod(candidate, mode)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// logf runs one orchestration or CLI step.
|
|
|
|
|
// Signature: logf(logger *log.Logger, format string, args ...any).
|
|
|
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
2026-04-04 22:24:56 -03:00
|
|
|
func logf(logger *log.Logger, format string, args ...any) {
|
|
|
|
|
if logger != nil {
|
|
|
|
|
logger.Printf(format, args...)
|
|
|
|
|
}
|
|
|
|
|
}
|