diff --git a/internal/server/b2_refresh_test.go b/internal/server/b2_refresh_test.go index c2dda95..56783f7 100644 --- a/internal/server/b2_refresh_test.go +++ b/internal/server/b2_refresh_test.go @@ -2,10 +2,14 @@ package server import ( "context" + "encoding/json" "strings" "testing" "time" + "net/http" + "net/http/httptest" + "scm.bstein.dev/bstein/soteria/internal/api" "scm.bstein.dev/bstein/soteria/internal/config" ) @@ -62,3 +66,52 @@ func TestRefreshB2UsageRecordsScanErrorsAfterCredentialsResolve(t *testing.T) { t.Fatalf("expected failure metrics after scan error, got success=%f duration=%f", srv.metrics.b2ScanSuccess, srv.metrics.b2ScanDurationSeconds) } } + +func TestHandleB2UsageRefreshesWhenForcedAndQueryBoolRecognizesTruthyValues(t *testing.T) { + for _, raw := range []string{"1", "true", "yes", "y", "on", "TRUE"} { + if !queryBool(raw) { + t.Fatalf("expected %q to be treated as truthy", raw) + } + } + if queryBool("nope") { + t.Fatalf("expected invalid truthy value to be false") + } + + srv := &Server{ + cfg: &config.Config{ + B2Enabled: true, + B2Endpoint: "https://", + B2AccessKeyID: "atlas-key", + B2SecretAccessKey: "atlas-secret", + B2ScanTimeout: time.Second, + }, + client: &fakeKubeClient{}, + metrics: newTelemetry(), + } + srv.handler = http.HandlerFunc(srv.route) + srv.setB2Usage(api.B2UsageResponse{ + Enabled: true, + Available: true, + ScannedAt: time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339), + Endpoint: "cached-endpoint", + }) + + req := httptest.NewRequest(http.MethodGet, "/v1/b2?refresh=yes", nil) + res := httptest.NewRecorder() + srv.Handler().ServeHTTP(res, req) + + if res.Code != http.StatusOK { + t.Fatalf("expected forced refresh request to succeed, got %d: %s", res.Code, res.Body.String()) + } + + var payload api.B2UsageResponse + if err := json.Unmarshal(res.Body.Bytes(), &payload); err != nil { + t.Fatalf("decode forced refresh payload: %v", err) + } + if payload.Error == "" || !strings.Contains(payload.Error, "S3 endpoint host is empty") { + t.Fatalf("expected refreshed B2 failure payload, got %#v", payload) + } + if payload.Endpoint != "https://" { + t.Fatalf("expected refreshed endpoint to replace cached snapshot, got %#v", payload) + } +} diff --git a/internal/server/policy_cycle_test.go b/internal/server/policy_cycle_test.go new file mode 100644 index 0000000..870accd --- /dev/null +++ b/internal/server/policy_cycle_test.go @@ -0,0 +1,156 @@ +package server + +import ( + "context" + "errors" + "testing" + "time" + + "scm.bstein.dev/bstein/soteria/internal/api" + "scm.bstein.dev/bstein/soteria/internal/config" + "scm.bstein.dev/bstein/soteria/internal/k8s" +) + +type policyCycleTestKubeClient struct { + *inventoryTestKubeClient + createBackupErrForPVC map[string]error + backupRequests []api.BackupRequest +} + +func (k *policyCycleTestKubeClient) CreateBackupJob(ctx context.Context, cfg *config.Config, req api.BackupRequest) (string, string, error) { + k.backupRequests = append(k.backupRequests, req) + if err := k.createBackupErrForPVC[req.PVC]; err != nil { + return "", "", err + } + return k.inventoryTestKubeClient.fakeKubeClient.CreateBackupJob(ctx, cfg, req) +} + +func metricCount(samples map[string]metricSample, labels map[string]string) float64 { + sample, ok := samples[metricKey(labels)] + if !ok { + return 0 + } + return sample.value +} + +func findBackupRequestByPVC(items []api.BackupRequest, pvc string) (api.BackupRequest, bool) { + for _, item := range items { + if item.PVC == pvc { + return item, true + } + } + return api.BackupRequest{}, false +} + +func TestRunPolicyCycleCoversInventoryErrorAndConcurrentGuard(t *testing.T) { + client := &policyCycleTestKubeClient{ + inventoryTestKubeClient: &inventoryTestKubeClient{ + fakeKubeClient: &fakeKubeClient{}, + listPVCsErr: errors.New("inventory exploded"), + }, + } + srv := &Server{ + cfg: &config.Config{ + BackupDriver: "restic", + BackupMaxAge: 24 * time.Hour, + PolicyEvalInterval: time.Minute, + }, + client: client, + longhorn: &fakeLonghornClient{}, + metrics: newTelemetry(), + policies: map[string]api.BackupPolicy{ + "apps__all": {ID: "apps__all", Namespace: "apps", IntervalHours: 6, Enabled: true, Dedupe: true}, + }, + } + + srv.runPolicyCycle(context.Background()) + if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "inventory_error"}); got != 1 { + t.Fatalf("expected inventory_error policy metric, got %f", got) + } + if srv.running { + t.Fatalf("expected policy runner lock to be released after inventory failure") + } + + srv.running = true + srv.runPolicyCycle(context.Background()) + if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "inventory_error"}); got != 1 { + t.Fatalf("expected concurrent guard to skip second run, got %f", got) + } +} + +func TestRunPolicyCycleCoversEffectivePoliciesAndResultTracking(t *testing.T) { + now := time.Now().UTC() + recent := now.Add(-30 * time.Minute) + client := &policyCycleTestKubeClient{ + inventoryTestKubeClient: &inventoryTestKubeClient{ + fakeKubeClient: &fakeKubeClient{ + pvcs: []k8s.PVCSummary{ + {Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"}, + {Namespace: "apps", Name: "busy", VolumeName: "vol-busy", Phase: "Bound"}, + {Namespace: "apps", Name: "recent", VolumeName: "vol-recent", Phase: "Bound"}, + {Namespace: "apps", Name: "attempted", VolumeName: "vol-attempted", Phase: "Bound"}, + {Namespace: "apps", Name: "fail", VolumeName: "vol-fail", Phase: "Bound"}, + }, + backupJobs: map[string][]k8s.BackupJobSummary{ + "apps/busy": { + {Name: "job-busy", Namespace: "apps", PVC: "busy", State: "Running", CreatedAt: recent}, + }, + "apps/recent": { + {Name: "job-recent", Namespace: "apps", PVC: "recent", State: "Completed", CreatedAt: recent, CompletionTime: recent, KeepLast: 1}, + }, + "apps/attempted": { + {Name: "job-attempted", Namespace: "apps", PVC: "attempted", State: "Failed", CreatedAt: recent, KeepLast: 1}, + }, + }, + }, + }, + createBackupErrForPVC: map[string]error{ + "fail": errors.New("create backup job exploded"), + }, + } + srv := &Server{ + cfg: &config.Config{ + BackupDriver: "restic", + BackupMaxAge: 24 * time.Hour, + ResticRepository: "s3:https://repo/root", + }, + client: client, + longhorn: &fakeLonghornClient{}, + metrics: newTelemetry(), + policies: map[string]api.BackupPolicy{ + "apps__all": {ID: "apps__all", Namespace: "apps", IntervalHours: 6, Enabled: true, Dedupe: true, KeepLast: 1}, + "apps__data": {ID: "apps__data", Namespace: "apps", PVC: "data", IntervalHours: 1, Enabled: true, Dedupe: false, KeepLast: 2}, + "apps__disabled": {ID: "apps__disabled", Namespace: "apps", PVC: "disabled", IntervalHours: 1, Enabled: false, Dedupe: false, KeepLast: 5}, + }, + } + + srv.runPolicyCycle(context.Background()) + + if len(client.backupRequests) != 2 { + t.Fatalf("expected two executed backup requests, got %#v", client.backupRequests) + } + dataReq, ok := findBackupRequestByPVC(client.backupRequests, "data") + if !ok || dataReq.Dedupe == nil || *dataReq.Dedupe != false || dataReq.KeepLast == nil || *dataReq.KeepLast != 2 { + t.Fatalf("expected stricter PVC policy to win for data, got %#v", client.backupRequests) + } + + if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "success"}); got != 1 { + t.Fatalf("expected one successful policy backup, got %f", got) + } + if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "backend_error"}); got != 1 { + t.Fatalf("expected one backend error policy backup, got %f", got) + } + if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "in_progress"}); got != 1 { + t.Fatalf("expected one in-progress policy backup, got %f", got) + } + if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "not_due"}); got != 2 { + t.Fatalf("expected two not-due policy backups, got %f", got) + } + + if got := metricCount(srv.metrics.backupRequests, map[string]string{"driver": "restic", "result": "success"}); got != 1 { + t.Fatalf("expected one successful executed backup request, got %f", got) + } + if got := metricCount(srv.metrics.backupRequests, map[string]string{"driver": "restic", "result": "backend_error"}); got != 1 { + t.Fatalf("expected one failed executed backup request, got %f", got) + } +} diff --git a/internal/server/server_start_test.go b/internal/server/server_start_test.go new file mode 100644 index 0000000..087e471 --- /dev/null +++ b/internal/server/server_start_test.go @@ -0,0 +1,125 @@ +package server + +import ( + "context" + "strings" + "sync" + "testing" + "time" + + "scm.bstein.dev/bstein/soteria/internal/api" + "scm.bstein.dev/bstein/soteria/internal/config" + "scm.bstein.dev/bstein/soteria/internal/k8s" +) + +type startTestKubeClient struct { + *fakeKubeClient + mu sync.Mutex + loadCalls int + listPVCCalls int +} + +func (k *startTestKubeClient) LoadSecretData(ctx context.Context, namespace, secretName, key string) ([]byte, error) { + k.mu.Lock() + k.loadCalls++ + k.mu.Unlock() + return k.fakeKubeClient.LoadSecretData(ctx, namespace, secretName, key) +} + +func (k *startTestKubeClient) ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error) { + k.mu.Lock() + k.listPVCCalls++ + k.mu.Unlock() + return k.fakeKubeClient.ListBoundPVCs(ctx) +} + +func (k *startTestKubeClient) counts() (int, int) { + k.mu.Lock() + defer k.mu.Unlock() + return k.loadCalls, k.listPVCCalls +} + +func newStartTestServer(cfg *config.Config, client kubeClient) *Server { + return &Server{ + cfg: cfg, + client: client, + longhorn: &fakeLonghornClient{}, + metrics: newTelemetry(), + ui: newUIRenderer(), + policies: map[string]api.BackupPolicy{}, + jobUsage: map[string]resticJobUsageCacheEntry{}, + usageStore: map[string]resticPersistedUsageEntry{}, + } +} + +func TestStartRunsInitialLoadAndTickerLoopWithoutB2(t *testing.T) { + client := &startTestKubeClient{ + fakeKubeClient: &fakeKubeClient{ + pvcs: []k8s.PVCSummary{{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"}}, + }, + } + srv := newStartTestServer(&config.Config{ + Namespace: "atlas", + PolicySecretName: "soteria-policies", + UsageSecretName: "soteria-usage", + BackupDriver: "longhorn", + BackupMaxAge: 24 * time.Hour, + MetricsRefreshInterval: 10 * time.Millisecond, + PolicyEvalInterval: 10 * time.Millisecond, + B2Enabled: false, + }, client) + + ctx, cancel := context.WithCancel(context.Background()) + srv.Start(ctx) + time.Sleep(35 * time.Millisecond) + cancel() + time.Sleep(20 * time.Millisecond) + + loadCalls, listPVCCalls := client.counts() + if loadCalls < 2 { + t.Fatalf("expected initial policy/usage secret loads, got %d", loadCalls) + } + if listPVCCalls < 2 { + t.Fatalf("expected inventory refresh to run initially and on ticker, got %d", listPVCCalls) + } + if srv.metrics.inventoryRefreshTime == 0 { + t.Fatalf("expected inventory metrics to be recorded after start loop") + } +} + +func TestStartRunsB2TickerAndStoresRefreshFailures(t *testing.T) { + client := &startTestKubeClient{ + fakeKubeClient: &fakeKubeClient{ + pvcs: []k8s.PVCSummary{{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"}}, + }, + } + srv := newStartTestServer(&config.Config{ + Namespace: "atlas", + PolicySecretName: "soteria-policies", + UsageSecretName: "soteria-usage", + BackupDriver: "longhorn", + BackupMaxAge: 24 * time.Hour, + MetricsRefreshInterval: 10 * time.Millisecond, + PolicyEvalInterval: 10 * time.Millisecond, + B2Enabled: true, + B2Endpoint: "https://", + B2AccessKeyID: "atlas-key", + B2SecretAccessKey: "atlas-secret", + B2ScanInterval: 10 * time.Millisecond, + B2ScanTimeout: 10 * time.Millisecond, + }, client) + + ctx, cancel := context.WithCancel(context.Background()) + srv.Start(ctx) + time.Sleep(35 * time.Millisecond) + cancel() + time.Sleep(20 * time.Millisecond) + + usage := srv.getB2Usage() + if !usage.Enabled || usage.Error == "" || !strings.Contains(usage.Error, "S3 endpoint host is empty") { + t.Fatalf("expected B2 ticker refresh failure snapshot, got %#v", usage) + } + if srv.metrics.b2ScanTimestamp == 0 { + t.Fatalf("expected B2 scan metrics to be recorded during start loop") + } +}