diff --git a/internal/server/restore_handlers_test.go b/internal/server/restore_handlers_test.go new file mode 100644 index 0000000..354910d --- /dev/null +++ b/internal/server/restore_handlers_test.go @@ -0,0 +1,541 @@ +package server + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "scm.bstein.dev/bstein/soteria/internal/api" + "scm.bstein.dev/bstein/soteria/internal/config" + "scm.bstein.dev/bstein/soteria/internal/k8s" + "scm.bstein.dev/bstein/soteria/internal/longhorn" + + corev1 "k8s.io/api/core/v1" +) + +type restoreTestKubeClient struct { + *fakeKubeClient + listPVCsErr error + targetExistsErr error + resolveErr error + createRestoreErr error +} + +func (k *restoreTestKubeClient) ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error) { + if k.listPVCsErr != nil { + return nil, k.listPVCsErr + } + return k.fakeKubeClient.ListBoundPVCs(ctx) +} + +func (k *restoreTestKubeClient) PersistentVolumeClaimExists(ctx context.Context, namespace, pvcName string) (bool, error) { + if k.targetExistsErr != nil { + return false, k.targetExistsErr + } + return k.fakeKubeClient.PersistentVolumeClaimExists(ctx, namespace, pvcName) +} + +func (k *restoreTestKubeClient) ResolvePVCVolume(ctx context.Context, namespace, pvcName string) (string, *corev1.PersistentVolumeClaim, *corev1.PersistentVolume, error) { + if k.resolveErr != nil { + return "", nil, nil, k.resolveErr + } + return k.fakeKubeClient.ResolvePVCVolume(ctx, namespace, pvcName) +} + +func (k *restoreTestKubeClient) CreateRestoreJob(ctx context.Context, cfg *config.Config, req api.RestoreTestRequest) (string, string, error) { + if k.createRestoreErr != nil { + return "", "", k.createRestoreErr + } + return k.fakeKubeClient.CreateRestoreJob(ctx, cfg, req) +} + +type restoreTestLonghornClient struct { + *fakeLonghornClient + findBackup *longhorn.Backup + findErr error + getVolume *longhorn.Volume + getVolumeErr error + createVolErr error + createPVCErr error + deleteVolErr error + deletedVolume string +} + +func (l *restoreTestLonghornClient) FindBackup(ctx context.Context, volumeName, snapshot string) (*longhorn.Backup, error) { + if l.findErr != nil { + return nil, l.findErr + } + if l.findBackup != nil { + return l.findBackup, nil + } + return l.fakeLonghornClient.FindBackup(ctx, volumeName, snapshot) +} + +func (l *restoreTestLonghornClient) GetVolume(ctx context.Context, volume string) (*longhorn.Volume, error) { + if l.getVolumeErr != nil { + return nil, l.getVolumeErr + } + if l.getVolume != nil { + return l.getVolume, nil + } + return l.fakeLonghornClient.GetVolume(ctx, volume) +} + +func (l *restoreTestLonghornClient) CreateVolumeFromBackup(ctx context.Context, name, size string, replicas int, backupURL string) (*longhorn.Volume, error) { + if l.createVolErr != nil { + return nil, l.createVolErr + } + return l.fakeLonghornClient.CreateVolumeFromBackup(ctx, name, size, replicas, backupURL) +} + +func (l *restoreTestLonghornClient) CreatePVC(ctx context.Context, volumeName, namespace, pvcName string) error { + if l.createPVCErr != nil { + return l.createPVCErr + } + return l.fakeLonghornClient.CreatePVC(ctx, volumeName, namespace, pvcName) +} + +func (l *restoreTestLonghornClient) DeleteVolume(ctx context.Context, volumeName string) error { + l.deletedVolume = volumeName + if l.deleteVolErr != nil { + return l.deleteVolErr + } + return l.fakeLonghornClient.DeleteVolume(ctx, volumeName) +} + +func newRestoreTestServer(cfg *config.Config, client kubeClient, longhornClient longhornClient) *Server { + srv := &Server{ + cfg: cfg, + client: client, + longhorn: longhornClient, + metrics: newTelemetry(), + policies: map[string]api.BackupPolicy{}, + } + srv.handler = http.HandlerFunc(srv.route) + return srv +} + +func TestHandleRestoreRejectsInvalidRequests(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "longhorn"}, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + + testCases := []struct { + name string + method string + body string + status int + want string + }{ + {name: "method", method: http.MethodGet, body: "", status: http.StatusMethodNotAllowed, want: "method not allowed"}, + {name: "invalid json", method: http.MethodPost, body: `{"namespace":`, status: http.StatusBadRequest, want: "invalid JSON"}, + {name: "missing namespace", method: http.MethodPost, body: `{"pvc":"data","target_pvc":"restore-data"}`, status: http.StatusBadRequest, want: "namespace is required"}, + {name: "missing pvc", method: http.MethodPost, body: `{"namespace":"apps","target_pvc":"restore-data"}`, status: http.StatusBadRequest, want: "pvc is required"}, + {name: "missing target pvc", method: http.MethodPost, body: `{"namespace":"apps","pvc":"data"}`, status: http.StatusBadRequest, want: "target_pvc is required"}, + } + + for _, tc := range testCases { + req := httptest.NewRequest(tc.method, "/v1/restores", strings.NewReader(tc.body)) + if tc.method == http.MethodPost { + req.Header.Set("Content-Type", "application/json") + } + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + if res.Code != tc.status || !strings.Contains(res.Body.String(), tc.want) { + t.Fatalf("%s: expected %d/%q, got %d %s", tc.name, tc.status, tc.want, res.Code, res.Body.String()) + } + } +} + +func TestHandleRestoreCoversAdditionalValidationBranches(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "longhorn"}, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + + testCases := []struct { + name string + body string + want string + code int + }{ + { + name: "invalid namespace label", + body: `{"namespace":"Bad_Ns","pvc":"data","target_pvc":"restore-data"}`, + want: "namespace must be a valid Kubernetes DNS-1123 label", + code: http.StatusBadRequest, + }, + { + name: "invalid pvc label", + body: `{"namespace":"apps","pvc":"Bad_PVC","target_pvc":"restore-data"}`, + want: "pvc must be a valid Kubernetes DNS-1123 label", + code: http.StatusBadRequest, + }, + { + name: "invalid target pvc label", + body: `{"namespace":"apps","pvc":"data","target_pvc":"Bad_Target"}`, + want: "target_pvc must be a valid Kubernetes DNS-1123 label", + code: http.StatusBadRequest, + }, + { + name: "default target namespace still conflicts", + body: `{"namespace":"apps","pvc":"data","target_pvc":"data"}`, + want: "target namespace/pvc must differ from source", + code: http.StatusConflict, + }, + } + + for _, tc := range testCases { + req := httptest.NewRequest(http.MethodPost, "/v1/restores", strings.NewReader(tc.body)) + req.Header.Set("Content-Type", "application/json") + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + if res.Code != tc.code || !strings.Contains(res.Body.String(), tc.want) { + t.Fatalf("%s: expected %d/%q, got %d %s", tc.name, tc.code, tc.want, res.Code, res.Body.String()) + } + } +} + +func TestHandleNamespaceRestoreRejectsInvalidRequestsAndBackendErrors(t *testing.T) { + t.Run("bad requests", func(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "longhorn"}, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + + testCases := []struct { + name string + method string + body string + status int + want string + }{ + {name: "method", method: http.MethodGet, body: "", status: http.StatusMethodNotAllowed, want: "method not allowed"}, + {name: "invalid json", method: http.MethodPost, body: `{"namespace":`, status: http.StatusBadRequest, want: "invalid JSON"}, + {name: "missing namespace", method: http.MethodPost, body: `{}`, status: http.StatusBadRequest, want: "namespace is required"}, + {name: "invalid namespace", method: http.MethodPost, body: `{"namespace":"Bad_Ns"}`, status: http.StatusBadRequest, want: "namespace must be a valid Kubernetes DNS-1123 label"}, + {name: "invalid target namespace", method: http.MethodPost, body: `{"namespace":"apps","target_namespace":"Bad_Ns"}`, status: http.StatusBadRequest, want: "target_namespace must be a valid Kubernetes DNS-1123 label"}, + } + + for _, tc := range testCases { + req := httptest.NewRequest(tc.method, "/v1/restores/namespace", strings.NewReader(tc.body)) + if tc.method == http.MethodPost { + req.Header.Set("Content-Type", "application/json") + } + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + if res.Code != tc.status || !strings.Contains(res.Body.String(), tc.want) { + t.Fatalf("%s: expected %d/%q, got %d %s", tc.name, tc.status, tc.want, res.Code, res.Body.String()) + } + } + }) + + t.Run("backend error", func(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "longhorn"}, + &restoreTestKubeClient{ + fakeKubeClient: &fakeKubeClient{}, + listPVCsErr: errors.New("list pvcs exploded"), + }, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + + req := httptest.NewRequest(http.MethodPost, "/v1/restores/namespace", strings.NewReader(`{"namespace":"apps"}`)) + req.Header.Set("Content-Type", "application/json") + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + + if res.Code != http.StatusBadGateway || !strings.Contains(res.Body.String(), "list pvcs exploded") { + t.Fatalf("expected namespace restore backend error, got %d %s", res.Code, res.Body.String()) + } + }) +} + +func TestHandleNamespaceRestoreDefaultsTargetNamespaceForEmptyResults(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "longhorn"}, + &restoreTestKubeClient{ + fakeKubeClient: &fakeKubeClient{ + pvcs: []k8s.PVCSummary{ + {Namespace: "other", Name: "cache", VolumeName: "pv-other-cache", Phase: "Bound"}, + }, + }, + }, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + + req := httptest.NewRequest(http.MethodPost, "/v1/restores/namespace", strings.NewReader(`{"namespace":"apps","dry_run":false}`)) + req.Header.Set("Content-Type", "application/json") + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + + if res.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", res.Code, res.Body.String()) + } + if !strings.Contains(res.Body.String(), `"target_namespace":"apps"`) || !strings.Contains(res.Body.String(), `"total":0`) { + t.Fatalf("expected default target namespace and empty results, got %s", res.Body.String()) + } +} + +func TestHandleNamespaceRestoreAggregatesFailures(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "longhorn"}, + &restoreTestKubeClient{ + fakeKubeClient: &fakeKubeClient{ + pvcs: []k8s.PVCSummary{ + {Namespace: "apps", Name: "cache", VolumeName: "pv-apps-cache", Phase: "Bound"}, + }, + targetExists: true, + }, + }, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + + req := httptest.NewRequest(http.MethodPost, "/v1/restores/namespace", strings.NewReader(`{"namespace":"apps","target_namespace":"restore"}`)) + req.Header.Set("Content-Type", "application/json") + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + + if res.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", res.Code, res.Body.String()) + } + if !strings.Contains(res.Body.String(), `"failed":1`) || !strings.Contains(res.Body.String(), `"status":"conflict"`) { + t.Fatalf("expected aggregated failure response, got %s", res.Body.String()) + } +} + +func TestExecuteRestoreLonghornErrorAndCleanupPaths(t *testing.T) { + cfg := &config.Config{AuthRequired: false, BackupDriver: "longhorn"} + + t.Run("target exists lookup error", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{ + fakeKubeClient: &fakeKubeClient{}, + targetExistsErr: errors.New("target lookup exploded"), + }, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "validation_error" || err == nil || !strings.Contains(err.Error(), "target lookup exploded") { + t.Fatalf("expected target exists validation error, got result=%q err=%v", result, err) + } + }) + + t.Run("resolve pvc error", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{ + fakeKubeClient: &fakeKubeClient{}, + resolveErr: errors.New("resolve pvc exploded"), + }, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "validation_error" || err == nil || !strings.Contains(err.Error(), "resolve pvc exploded") { + t.Fatalf("expected resolve pvc validation error, got result=%q err=%v", result, err) + } + }) + + t.Run("find backup error", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{ + fakeLonghornClient: &fakeLonghornClient{}, + findErr: errors.New("find backup exploded"), + }, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "validation_error" || err == nil || !strings.Contains(err.Error(), "find backup exploded") { + t.Fatalf("expected find backup validation error, got result=%q err=%v", result, err) + } + }) + + t.Run("backup url required", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{ + fakeLonghornClient: &fakeLonghornClient{}, + findBackup: &longhorn.Backup{Name: "backup-a", URL: ""}, + }, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "validation_error" || err == nil || !strings.Contains(err.Error(), "backup_url is required") { + t.Fatalf("expected backup_url validation error, got result=%q err=%v", result, err) + } + }) + + t.Run("get volume backend error", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{ + fakeLonghornClient: &fakeLonghornClient{}, + findBackup: &longhorn.Backup{Name: "backup-a", URL: "s3://bucket/backup-a"}, + getVolumeErr: errors.New("get volume exploded"), + }, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "backend_error" || err == nil || !strings.Contains(err.Error(), "get volume exploded") { + t.Fatalf("expected get volume backend error, got result=%q err=%v", result, err) + } + }) + + t.Run("create volume backend error", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{ + fakeLonghornClient: &fakeLonghornClient{}, + findBackup: &longhorn.Backup{Name: "backup-a", URL: "s3://bucket/backup-a"}, + getVolume: &longhorn.Volume{Name: "pv-apps-data", Size: "4096", NumberOfReplicas: 0}, + createVolErr: errors.New("create volume exploded"), + }, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "backend_error" || err == nil || !strings.Contains(err.Error(), "create volume exploded") { + t.Fatalf("expected create volume backend error, got result=%q err=%v", result, err) + } + }) + + t.Run("create pvc cleanup success", func(t *testing.T) { + longhornClient := &restoreTestLonghornClient{ + fakeLonghornClient: &fakeLonghornClient{}, + findBackup: &longhorn.Backup{Name: "backup-a", URL: "s3://bucket/backup-a"}, + getVolume: &longhorn.Volume{Name: "pv-apps-data", Size: "4096", NumberOfReplicas: 1}, + createPVCErr: errors.New("create pvc exploded"), + } + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + longhornClient, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "backend_error" || err == nil || !strings.Contains(err.Error(), "create restore pvc") { + t.Fatalf("expected create pvc backend error, got result=%q err=%v", result, err) + } + if longhornClient.deletedVolume == "" { + t.Fatalf("expected failed PVC create to trigger volume cleanup") + } + }) + + t.Run("create pvc cleanup failure", func(t *testing.T) { + srv := newRestoreTestServer( + cfg, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{ + fakeLonghornClient: &fakeLonghornClient{}, + findBackup: &longhorn.Backup{Name: "backup-a", URL: "s3://bucket/backup-a"}, + getVolume: &longhorn.Volume{Name: "pv-apps-data", Size: "4096", NumberOfReplicas: 1}, + createPVCErr: errors.New("create pvc exploded"), + deleteVolErr: errors.New("cleanup exploded"), + }, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "backend_error" || err == nil || !strings.Contains(err.Error(), "cleanup failed") { + t.Fatalf("expected cleanup failure to be surfaced, got result=%q err=%v", result, err) + } + }) +} + +func TestExecuteRestoreResticAndHelperFunctions(t *testing.T) { + t.Run("restic create restore error", func(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "restic"}, + &restoreTestKubeClient{ + fakeKubeClient: &fakeKubeClient{}, + createRestoreErr: errors.New("create restore job exploded"), + }, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "backend_error" || err == nil || !strings.Contains(err.Error(), "create restore job exploded") { + t.Fatalf("expected restic backend error, got result=%q err=%v", result, err) + } + }) + + t.Run("unsupported driver", func(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "mystery"}, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + _, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", + }, "brad") + if result != "unsupported_driver" || err == nil || !strings.Contains(err.Error(), "unsupported backup driver") { + t.Fatalf("expected unsupported driver error, got result=%q err=%v", result, err) + } + }) + + t.Run("restic dry run success", func(t *testing.T) { + srv := newRestoreTestServer( + &config.Config{AuthRequired: false, BackupDriver: "restic"}, + &restoreTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, + &restoreTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}, + ) + response, result, err := srv.executeRestore(context.Background(), api.RestoreTestRequest{ + Namespace: "apps", PVC: "data", TargetNamespace: "restore", TargetPVC: "restore-data", DryRun: true, + }, "brad") + if err != nil || result != "dry_run" || response.JobName != "restore-job" || response.Secret != "restore-secret" { + t.Fatalf("expected restic dry-run success, got response=%#v result=%q err=%v", response, result, err) + } + }) + + t.Run("restore status code", func(t *testing.T) { + testCases := map[string]int{ + "validation_error": http.StatusBadRequest, + "unsupported_driver": http.StatusBadRequest, + "conflict": http.StatusConflict, + "backend_error": http.StatusBadGateway, + "anything-else": http.StatusInternalServerError, + } + for result, want := range testCases { + if got := restoreStatusCode(result); got != want { + t.Fatalf("result=%q: expected %d, got %d", result, want, got) + } + } + }) + + t.Run("target pvc name", func(t *testing.T) { + if got := targetPVCName("", "Data_PVC"); got != "restore-data-pvc" { + t.Fatalf("expected default restore prefix, got %q", got) + } + if got := targetPVCName("drill", "cache"); got != "drill-cache" { + t.Fatalf("expected explicit prefix, got %q", got) + } + if got := targetPVCName("drill-", "cache"); got != "drill-cache" { + t.Fatalf("expected suffix-preserving prefix, got %q", got) + } + if got := targetPVCName(strings.Repeat("very-long-prefix-", 8), strings.Repeat("data-", 8)); len(got) > 63 { + t.Fatalf("expected truncated target pvc name <=63 chars, got %d %q", len(got), got) + } + }) +}