package state import ( "encoding/json" "errors" "fmt" "math" "os" "path/filepath" "sort" "strconv" "strings" "sync" "syscall" "time" ) type RunRecord struct { ID string `json:"id"` Action string `json:"action"` Reason string `json:"reason,omitempty"` StartedAt time.Time `json:"started_at"` EndedAt time.Time `json:"ended_at"` DurationSeconds int `json:"duration_seconds"` Success bool `json:"success"` Error string `json:"error,omitempty"` } type Store struct { path string mu sync.Mutex } func New(path string) *Store { return &Store{path: path} } func EnsureDir(dir string) error { if dir == "" { return fmt.Errorf("state dir must not be empty") } return os.MkdirAll(dir, 0o750) } func AcquireLock(path string) (func(), error) { if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil { return nil, err } create := func() (func(), error) { f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) if err != nil { return nil, err } _, _ = f.WriteString(fmt.Sprintf("pid=%d started=%s\n", os.Getpid(), time.Now().Format(time.RFC3339))) _ = f.Close() return func() { _ = os.Remove(path) }, nil } unlock, err := create() if err == nil { return unlock, nil } if !errors.Is(err, os.ErrExist) { return nil, fmt.Errorf("acquire lock %s: %w", path, err) } stale, staleErr := staleLock(path) if staleErr != nil { return nil, fmt.Errorf("acquire lock %s: existing lock check failed: %w", path, staleErr) } if !stale { return nil, fmt.Errorf("acquire lock %s: lock is held by active process", path) } if rmErr := os.Remove(path); rmErr != nil { return nil, fmt.Errorf("acquire lock %s: remove stale lock: %w", path, rmErr) } unlock, err = create() if err != nil { return nil, fmt.Errorf("acquire lock %s: recreate after stale lock removal: %w", path, err) } return unlock, nil } func staleLock(path string) (bool, error) { b, err := os.ReadFile(path) if err != nil { if os.IsNotExist(err) { return true, nil } return false, err } lines := strings.Split(string(b), "\n") var pid int for _, line := range lines { line = strings.TrimSpace(line) if strings.HasPrefix(line, "pid=") { v := strings.TrimPrefix(line, "pid=") parsed, parseErr := strconv.Atoi(v) if parseErr != nil { return true, nil } pid = parsed break } } if pid <= 0 { return true, nil } if err := syscall.Kill(pid, 0); err != nil { if errors.Is(err, syscall.ESRCH) { return true, nil } } return false, nil } func (s *Store) Append(record RunRecord) error { s.mu.Lock() defer s.mu.Unlock() records, err := s.loadUnlocked() if err != nil { return err } records = append(records, record) if len(records) > 200 { records = records[len(records)-200:] } if err := os.MkdirAll(filepath.Dir(s.path), 0o750); err != nil { return err } b, err := json.MarshalIndent(records, "", " ") if err != nil { return err } return os.WriteFile(s.path, b, 0o640) } func (s *Store) Load() ([]RunRecord, error) { s.mu.Lock() defer s.mu.Unlock() return s.loadUnlocked() } func (s *Store) loadUnlocked() ([]RunRecord, error) { b, err := os.ReadFile(s.path) if err != nil { if os.IsNotExist(err) { return nil, nil } return nil, err } if len(b) == 0 { return nil, nil } var records []RunRecord if err := json.Unmarshal(b, &records); err != nil { if healErr := quarantineCorruptFile(s.path, b, []byte("[]\n"), 0o640); healErr != nil { return nil, fmt.Errorf("decode run history: %w (auto-heal failed: %v)", err, healErr) } return nil, nil } return records, nil } func (s *Store) ShutdownP95(defaultSeconds int) int { records, err := s.Load() if err != nil { return defaultSeconds } var d []int for _, r := range records { if r.Action == "shutdown" && r.Success && r.DurationSeconds > 0 { d = append(d, r.DurationSeconds) } } if len(d) == 0 { return defaultSeconds } sort.Ints(d) idx := int(math.Ceil(0.95*float64(len(d)))) - 1 if idx < 0 { idx = 0 } if idx >= len(d) { idx = len(d) - 1 } if d[idx] <= 0 { return defaultSeconds } return d[idx] }