From 4b5e4f9e31e254b830f40c0699ec0abefd56e8f1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 13 Apr 2026 13:55:17 -0300 Subject: [PATCH] backup: add configurable keep-last retention --- internal/api/types.go | 6 +++ internal/k8s/jobs.go | 45 +++++++++++++++++++-- internal/server/server.go | 72 +++++++++++++++++++++++++++++++++- internal/server/server_test.go | 49 ++++++++++++++++++++++- web/src/App.tsx | 35 ++++++++++++++--- 5 files changed, 197 insertions(+), 10 deletions(-) diff --git a/internal/api/types.go b/internal/api/types.go index 2aa9234..4b921be 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -7,6 +7,7 @@ type BackupRequest struct { Snapshot bool `json:"snapshot"` DryRun bool `json:"dry_run"` Dedupe *bool `json:"dedupe,omitempty"` + KeepLast *int `json:"keep_last,omitempty"` } type BackupResponse struct { @@ -19,6 +20,7 @@ type BackupResponse struct { RequestedBy string `json:"requested_by,omitempty"` DryRun bool `json:"dry_run"` Dedupe bool `json:"dedupe"` + KeepLast int `json:"keep_last"` } type RestoreTestRequest struct { @@ -112,6 +114,7 @@ type BackupPolicy struct { IntervalHours float64 `json:"interval_hours"` Enabled bool `json:"enabled"` Dedupe bool `json:"dedupe"` + KeepLast int `json:"keep_last"` CreatedAt string `json:"created_at,omitempty"` UpdatedAt string `json:"updated_at,omitempty"` } @@ -122,6 +125,7 @@ type BackupPolicyUpsertRequest struct { IntervalHours float64 `json:"interval_hours"` Enabled *bool `json:"enabled,omitempty"` Dedupe *bool `json:"dedupe,omitempty"` + KeepLast *int `json:"keep_last,omitempty"` } type BackupPolicyListResponse struct { @@ -132,6 +136,7 @@ type NamespaceBackupRequest struct { Namespace string `json:"namespace"` DryRun bool `json:"dry_run"` Dedupe *bool `json:"dedupe,omitempty"` + KeepLast *int `json:"keep_last,omitempty"` } type NamespaceBackupResult struct { @@ -149,6 +154,7 @@ type NamespaceBackupResponse struct { Driver string `json:"driver"` DryRun bool `json:"dry_run"` Dedupe bool `json:"dedupe"` + KeepLast int `json:"keep_last"` Total int `json:"total"` Succeeded int `json:"succeeded"` Failed int `json:"failed"` diff --git a/internal/k8s/jobs.go b/internal/k8s/jobs.go index e03925d..d63b78a 100644 --- a/internal/k8s/jobs.go +++ b/internal/k8s/jobs.go @@ -24,6 +24,7 @@ const ( labelPVC = "soteria.bstein.dev/pvc" annotationResticRepository = "soteria.bstein.dev/restic-repository" annotationDedupeEnabled = "soteria.bstein.dev/dedupe-enabled" + annotationKeepLast = "soteria.bstein.dev/keep-last" ) type BackupJobSummary struct { @@ -32,6 +33,7 @@ type BackupJobSummary struct { PVC string Repository string DedupeEnabled bool + KeepLast int CreatedAt time.Time CompletionTime time.Time State string @@ -116,6 +118,7 @@ func summarizeBackupJob(job batchv1.Job, pvc string) BackupJobSummary { Namespace: job.Namespace, PVC: pvc, DedupeEnabled: true, + KeepLast: 0, CreatedAt: job.CreationTimestamp.Time, State: "Pending", } @@ -123,6 +126,7 @@ func summarizeBackupJob(job batchv1.Job, pvc string) BackupJobSummary { summary.Repository = raw } summary.DedupeEnabled = parseBoolWithDefault(job.Annotations[annotationDedupeEnabled], true) + summary.KeepLast = parseIntWithDefault(job.Annotations[annotationKeepLast], 0) if job.Status.CompletionTime != nil { summary.CompletionTime = job.Status.CompletionTime.Time } @@ -168,6 +172,7 @@ func (c *Client) CreateBackupJob(ctx context.Context, cfg *config.Config, req ap jobName := jobName("backup", req.PVC) secretName := fmt.Sprintf("soteria-%s-restic", jobName) dedupeEnabled := dedupeEnabled(req.Dedupe) + keepLast := keepLastWithDefault(req.KeepLast) repository := resticRepositoryForBackup(cfg.ResticRepository, req.Namespace, req.PVC, dedupeEnabled) if req.DryRun { @@ -184,7 +189,7 @@ func (c *Client) CreateBackupJob(ctx context.Context, cfg *config.Config, req ap return "", "", err } - job := buildBackupJob(cfg, req, jobName, secretName, repository, dedupeEnabled) + job := buildBackupJob(cfg, req, jobName, secretName, repository, dedupeEnabled, keepLast) if nodeName, err := c.resolvePVCMountedNode(ctx, req.Namespace, req.PVC); err == nil && nodeName != "" { job.Spec.Template.Spec.NodeName = nodeName } @@ -272,7 +277,7 @@ func (c *Client) CreateRestoreJob(ctx context.Context, cfg *config.Config, req a return jobName, secretName, nil } -func buildBackupJob(cfg *config.Config, req api.BackupRequest, jobName, secretName, repository string, dedupeEnabled bool) *batchv1.Job { +func buildBackupJob(cfg *config.Config, req api.BackupRequest, jobName, secretName, repository string, dedupeEnabled bool, keepLast int) *batchv1.Job { labels := map[string]string{ labelAppName: "soteria", labelComponent: "backup", @@ -282,6 +287,7 @@ func buildBackupJob(cfg *config.Config, req api.BackupRequest, jobName, secretNa annotations := map[string]string{ annotationResticRepository: repository, annotationDedupeEnabled: strconv.FormatBool(dedupeEnabled), + annotationKeepLast: strconv.Itoa(keepLast), } command := backupCommand(cfg, req) @@ -431,8 +437,10 @@ func backupCommand(cfg *config.Config, req api.BackupRequest) string { if !dedupeEnabled(req.Dedupe) { mode = "off" } + keepLast := keepLastWithDefault(req.KeepLast) args := []string{ "restic", "backup", "/data", + "--host", "soteria", "--tag", "soteria", "--tag", fmt.Sprintf("pvc=%s", req.PVC), "--tag", fmt.Sprintf("dedupe=%s", mode), @@ -447,7 +455,17 @@ func backupCommand(cfg *config.Config, req api.BackupRequest) string { args = append(args, cfg.ResticBackupArgs...) cmd := strings.Join(args, " ") - if len(cfg.ResticForgetArgs) > 0 { + if keepLast > 0 { + forget := strings.Join([]string{ + "restic", "forget", + "--host", "soteria", + "--group-by", "host,tags", + "--tag", fmt.Sprintf("pvc=%s", req.PVC), + "--keep-last", strconv.Itoa(keepLast), + "--prune", + }, " ") + cmd = fmt.Sprintf("%s && %s", cmd, forget) + } else if len(cfg.ResticForgetArgs) > 0 { forget := strings.Join(append([]string{"restic", "forget"}, cfg.ResticForgetArgs...), " ") cmd = fmt.Sprintf("%s && %s", cmd, forget) } @@ -573,6 +591,16 @@ func dedupeEnabled(raw *bool) bool { return *raw } +func keepLastWithDefault(raw *int) int { + if raw == nil { + return 0 + } + if *raw < 0 { + return 0 + } + return *raw +} + func resticRepositoryForBackup(base, namespace, pvc string, dedupe bool) string { if dedupe { return strings.TrimSpace(base) @@ -626,6 +654,17 @@ func parseBoolWithDefault(raw string, fallback bool) bool { } } +func parseIntWithDefault(raw string, fallback int) int { + parsed, err := strconv.Atoi(strings.TrimSpace(raw)) + if err != nil { + return fallback + } + if parsed < 0 { + return fallback + } + return parsed +} + func int32Ptr(val int32) *int32 { return &val } diff --git a/internal/server/server.go b/internal/server/server.go index 8758088..d79235a 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -82,6 +82,7 @@ const ( policySecretKey = "policies.json" defaultPolicyHours = 24.0 maxPolicyIntervalHrs = 24 * 365 + maxPolicyKeepLast = 1000 maxUsageSampleJobs = 20 resticSelectorPrefix = "restic-latest:" ) @@ -331,6 +332,11 @@ func (s *Server) handleBackup(w http.ResponseWriter, r *http.Request) { writeError(w, http.StatusBadRequest, "namespace and pvc are required") return } + if err := validateKeepLast(req.KeepLast); err != nil { + s.metrics.RecordBackupRequest(s.cfg.BackupDriver, "validation_error") + writeError(w, http.StatusBadRequest, err.Error()) + return + } req.Namespace = strings.TrimSpace(req.Namespace) req.PVC = strings.TrimSpace(req.PVC) requester := currentRequester(r.Context()) @@ -435,6 +441,11 @@ func (s *Server) handleNamespaceBackup(w http.ResponseWriter, r *http.Request) { writeError(w, http.StatusBadRequest, "namespace is required") return } + if err := validateKeepLast(req.KeepLast); err != nil { + s.metrics.RecordNamespaceBackupRequest(s.cfg.BackupDriver, "validation_error") + writeError(w, http.StatusBadRequest, err.Error()) + return + } if err := validateKubernetesName("namespace", req.Namespace); err != nil { s.metrics.RecordNamespaceBackupRequest(s.cfg.BackupDriver, "validation_error") writeError(w, http.StatusBadRequest, err.Error()) @@ -450,12 +461,14 @@ func (s *Server) handleNamespaceBackup(w http.ResponseWriter, r *http.Request) { requester := currentRequester(r.Context()) resolvedDedupe := dedupeDefault(req.Dedupe) + resolvedKeepLast := keepLastDefault(req.KeepLast) response := api.NamespaceBackupResponse{ Namespace: req.Namespace, RequestedBy: requester, Driver: s.cfg.BackupDriver, DryRun: req.DryRun, Dedupe: resolvedDedupe, + KeepLast: resolvedKeepLast, Results: make([]api.NamespaceBackupResult, 0, len(pvcs)), } @@ -465,6 +478,7 @@ func (s *Server) handleNamespaceBackup(w http.ResponseWriter, r *http.Request) { PVC: pvc.Name, DryRun: req.DryRun, Dedupe: boolPtr(resolvedDedupe), + KeepLast: intPtr(resolvedKeepLast), } result, status, execErr := s.executeBackup(r.Context(), backupReq, requester) s.metrics.RecordBackupRequest(s.cfg.BackupDriver, status) @@ -637,7 +651,9 @@ func (s *Server) executeBackup(ctx context.Context, req api.BackupRequest, reque return api.BackupResponse{}, "validation_error", fmt.Errorf("namespace and pvc are required") } resolvedDedupe := dedupeDefault(req.Dedupe) + resolvedKeepLast := keepLastDefault(req.KeepLast) req.Dedupe = boolPtr(resolvedDedupe) + req.KeepLast = intPtr(resolvedKeepLast) switch s.cfg.BackupDriver { case "longhorn": @@ -655,6 +671,7 @@ func (s *Server) executeBackup(ctx context.Context, req api.BackupRequest, reque RequestedBy: requester, DryRun: req.DryRun, Dedupe: resolvedDedupe, + KeepLast: resolvedKeepLast, } if req.DryRun { return response, "dry_run", nil @@ -689,6 +706,7 @@ func (s *Server) executeBackup(ctx context.Context, req api.BackupRequest, reque RequestedBy: requester, DryRun: req.DryRun, Dedupe: resolvedDedupe, + KeepLast: resolvedKeepLast, }, result, nil default: return api.BackupResponse{}, "unsupported_driver", fmt.Errorf("unsupported backup driver") @@ -1250,6 +1268,7 @@ func (s *Server) runPolicyCycle(ctx context.Context) { type effectivePolicy struct { IntervalHours float64 Dedupe bool + KeepLast int } effectivePolicies := map[string]effectivePolicy{} for _, policy := range policies { @@ -1265,10 +1284,11 @@ func (s *Server) runPolicyCycle(ctx context.Context) { for _, pvc := range matches { key := pvc.Namespace + "/" + pvc.PVC current, exists := effectivePolicies[key] - if !exists || policy.IntervalHours < current.IntervalHours { + if !exists || policy.IntervalHours < current.IntervalHours || (policy.IntervalHours == current.IntervalHours && keepLastStricter(policy.KeepLast, current.KeepLast)) { effectivePolicies[key] = effectivePolicy{ IntervalHours: policy.IntervalHours, Dedupe: policy.Dedupe, + KeepLast: policy.KeepLast, } } } @@ -1289,6 +1309,7 @@ func (s *Server) runPolicyCycle(ctx context.Context) { PVC: pvc.PVC, DryRun: false, Dedupe: boolPtr(effective.Dedupe), + KeepLast: intPtr(effective.KeepLast), }, "policy-scheduler") s.metrics.RecordBackupRequest(s.cfg.BackupDriver, result) if err != nil { @@ -1333,6 +1354,7 @@ func (s *Server) loadPolicies(ctx context.Context) error { IntervalHours float64 `json:"interval_hours"` Enabled bool `json:"enabled"` Dedupe *bool `json:"dedupe,omitempty"` + KeepLast *int `json:"keep_last,omitempty"` CreatedAt string `json:"created_at,omitempty"` UpdatedAt string `json:"updated_at,omitempty"` } `json:"policies"` @@ -1357,6 +1379,7 @@ func (s *Server) loadPolicies(ctx context.Context) error { if policy.Dedupe != nil { dedupe = *policy.Dedupe } + keepLast := keepLastDefault(policy.KeepLast) id := policyKey(namespace, pvc) createdAt := policy.CreatedAt if createdAt == "" { @@ -1373,6 +1396,7 @@ func (s *Server) loadPolicies(ctx context.Context) error { IntervalHours: interval, Enabled: policy.Enabled, Dedupe: dedupe, + KeepLast: keepLast, CreatedAt: createdAt, UpdatedAt: updatedAt, } @@ -1457,6 +1481,10 @@ func (s *Server) upsertPolicy(ctx context.Context, req api.BackupPolicyUpsertReq enabled = *req.Enabled } dedupe := dedupeDefault(req.Dedupe) + if err := validateKeepLast(req.KeepLast); err != nil { + return api.BackupPolicy{}, err + } + keepLast := keepLastDefault(req.KeepLast) id := policyKey(namespace, pvc) now := time.Now().UTC().Format(time.RFC3339) @@ -1474,6 +1502,7 @@ func (s *Server) upsertPolicy(ctx context.Context, req api.BackupPolicyUpsertReq IntervalHours: interval, Enabled: enabled, Dedupe: dedupe, + KeepLast: keepLast, CreatedAt: createdAt, UpdatedAt: now, } @@ -1917,3 +1946,44 @@ func boolPtr(value bool) *bool { ptr := value return &ptr } + +func keepLastDefault(value *int) int { + if value == nil { + return 0 + } + if *value < 0 { + return 0 + } + return *value +} + +func intPtr(value int) *int { + ptr := value + return &ptr +} + +func validateKeepLast(value *int) error { + if value == nil { + return nil + } + if *value < 0 { + return fmt.Errorf("keep_last must be >= 0") + } + if *value > maxPolicyKeepLast { + return fmt.Errorf("keep_last must be <= %d", maxPolicyKeepLast) + } + return nil +} + +func keepLastStricter(candidate, current int) bool { + switch { + case candidate > 0 && current == 0: + return true + case candidate == 0: + return false + case current == 0: + return true + default: + return candidate < current + } +} diff --git a/internal/server/server_test.go b/internal/server/server_test.go index facec44..08fe57d 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -337,6 +337,9 @@ func TestResticBackupDefaultsDedupeEnabled(t *testing.T) { if kube.lastBackupReq.Dedupe == nil || !*kube.lastBackupReq.Dedupe { t.Fatalf("expected dedupe default true, got %#v", kube.lastBackupReq.Dedupe) } + if kube.lastBackupReq.KeepLast == nil || *kube.lastBackupReq.KeepLast != 0 { + t.Fatalf("expected keep_last default 0, got %#v", kube.lastBackupReq.KeepLast) + } var payload api.BackupResponse if err := json.Unmarshal(res.Body.Bytes(), &payload); err != nil { @@ -345,6 +348,9 @@ func TestResticBackupDefaultsDedupeEnabled(t *testing.T) { if !payload.Dedupe { t.Fatalf("expected response dedupe=true, got %#v", payload) } + if payload.KeepLast != 0 { + t.Fatalf("expected response keep_last=0, got %#v", payload) + } } func TestResticInventoryUsesCompletedBackupJobs(t *testing.T) { @@ -672,6 +678,9 @@ func TestPoliciesCRUD(t *testing.T) { if !created.Dedupe { t.Fatalf("expected policy dedupe default true, got %#v", created) } + if created.KeepLast != 0 { + t.Fatalf("expected policy keep_last default 0, got %#v", created) + } listReq := httptest.NewRequest(http.MethodGet, "/v1/policies", nil) listRes := httptest.NewRecorder() @@ -723,6 +732,9 @@ func TestLoadPoliciesDefaultsDedupeEnabledWhenMissing(t *testing.T) { if !policies[0].Dedupe { t.Fatalf("expected dedupe to default true for legacy policy, got %#v", policies[0]) } + if policies[0].KeepLast != 0 { + t.Fatalf("expected keep_last to default 0 for legacy policy, got %#v", policies[0]) + } } func TestNamespaceBackupDryRun(t *testing.T) { @@ -760,6 +772,9 @@ func TestNamespaceBackupDryRun(t *testing.T) { if payload.Total != 2 || payload.Succeeded != 2 || payload.Failed != 0 { t.Fatalf("unexpected namespace backup payload: %#v", payload) } + if payload.KeepLast != 0 { + t.Fatalf("expected namespace backup keep_last default 0, got %#v", payload) + } } func TestNamespaceBackupUsesDedupeFlag(t *testing.T) { @@ -780,7 +795,7 @@ func TestNamespaceBackupUsesDedupeFlag(t *testing.T) { } srv.handler = http.HandlerFunc(srv.route) - body := `{"namespace":"apps","dry_run":false,"dedupe":false}` + body := `{"namespace":"apps","dry_run":false,"dedupe":false,"keep_last":1}` req := httptest.NewRequest(http.MethodPost, "/v1/backup/namespace", strings.NewReader(body)) req.Header.Set("Content-Type", "application/json") res := httptest.NewRecorder() @@ -797,9 +812,41 @@ func TestNamespaceBackupUsesDedupeFlag(t *testing.T) { if payload.Dedupe { t.Fatalf("expected response dedupe false, got %#v", payload) } + if payload.KeepLast != 1 { + t.Fatalf("expected keep_last=1, got %#v", payload) + } if kube.lastBackupReq.Dedupe == nil || *kube.lastBackupReq.Dedupe { t.Fatalf("expected backup request dedupe false, got %#v", kube.lastBackupReq.Dedupe) } + if kube.lastBackupReq.KeepLast == nil || *kube.lastBackupReq.KeepLast != 1 { + t.Fatalf("expected backup request keep_last=1, got %#v", kube.lastBackupReq.KeepLast) + } +} + +func TestBackupRejectsNegativeKeepLast(t *testing.T) { + srv := &Server{ + cfg: &config.Config{ + AuthRequired: false, + BackupDriver: "restic", + }, + client: &fakeKubeClient{}, + longhorn: &fakeLonghornClient{}, + metrics: newTelemetry(), + } + srv.handler = http.HandlerFunc(srv.route) + + body := `{"namespace":"apps","pvc":"data","keep_last":-1}` + req := httptest.NewRequest(http.MethodPost, "/v1/backup", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + + if res.Code != http.StatusBadRequest { + t.Fatalf("expected 400 for invalid keep_last, got %d: %s", res.Code, res.Body.String()) + } + if !strings.Contains(res.Body.String(), "keep_last must be") { + t.Fatalf("expected keep_last validation message, got %s", res.Body.String()) + } } func TestNamespaceRestoreDryRun(t *testing.T) { diff --git a/web/src/App.tsx b/web/src/App.tsx index 3d7ee5a..4b5b2fe 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -68,6 +68,7 @@ interface BackupPolicy { interval_hours: number; enabled: boolean; dedupe?: boolean; + keep_last?: number; created_at?: string; updated_at?: string; } @@ -270,7 +271,9 @@ function App() { const [policyIntervalHours, setPolicyIntervalHours] = useState(24); const [policyEnabled, setPolicyEnabled] = useState(true); const [policyDedupe, setPolicyDedupe] = useState(true); + const [policyKeepLast, setPolicyKeepLast] = useState(0); const [manualDedupe, setManualDedupe] = useState(true); + const [manualKeepLast, setManualKeepLast] = useState(0); const [lastAction, setLastAction] = useState('No action yet.'); const [busy, setBusy] = useState(false); @@ -404,7 +407,7 @@ function App() { const payload = await fetchJSON('/v1/backup', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ namespace, pvc, dry_run: false, dedupe: manualDedupe }) + body: JSON.stringify({ namespace, pvc, dry_run: false, dedupe: manualDedupe, keep_last: manualKeepLast }) }); writeAction(payload); await Promise.all([loadInventory(), loadB2Usage()]); @@ -421,7 +424,7 @@ function App() { const payload = await fetchJSON('/v1/backup/namespace', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ namespace, dry_run: false, dedupe: manualDedupe }) + body: JSON.stringify({ namespace, dry_run: false, dedupe: manualDedupe, keep_last: manualKeepLast }) }); writeAction(payload); await Promise.all([loadInventory(), loadB2Usage()]); @@ -534,7 +537,8 @@ function App() { pvc: policyPVC, interval_hours: policyIntervalHours, enabled: policyEnabled, - dedupe: policyDedupe + dedupe: policyDedupe, + keep_last: policyKeepLast }) }); writeAction(payload); @@ -599,6 +603,16 @@ function App() { setManualDedupe(event.target.checked)} /> Dedupe unchanged blocks (default) +

This setting applies to both `Backup now` and `Backup namespace` actions.

{inventoryError &&

{inventoryError}

} {!inventory && !inventoryError &&

Loading inventory...

} @@ -820,7 +834,7 @@ function App() {

Backup Policies

-

Policy backups create new restic snapshots. With dedupe on, unchanged blocks are reused in the shared repository. With dedupe off, Soteria isolates each PVC to its own repository path.

+

Policy backups create new restic snapshots. `Keep last` controls version retention per PVC: 1 means only newest copy remains after each run. With dedupe on, unchanged blocks are reused in the shared repository. With dedupe off, Soteria isolates each PVC to its own repository path.

+
@@ -861,7 +885,7 @@ function App() { {policy.namespace}/{policy.pvc || '*'} {policy.enabled ? 'Enabled' : 'Disabled'} -

Every {policy.interval_hours}h | Dedupe: {policy.dedupe === false ? 'off' : 'on'} | Updated {formatTimestamp(policy.updated_at || policy.created_at)}

+

Every {policy.interval_hours}h | Dedupe: {policy.dedupe === false ? 'off' : 'on'} | Keep last: {policy.keep_last ?? 0} | Updated {formatTimestamp(policy.updated_at || policy.created_at)}