package server import ( "context" "errors" "testing" "time" "scm.bstein.dev/bstein/soteria/internal/api" "scm.bstein.dev/bstein/soteria/internal/config" "scm.bstein.dev/bstein/soteria/internal/k8s" "scm.bstein.dev/bstein/soteria/internal/longhorn" ) type inventoryTestKubeClient struct { *fakeKubeClient listPVCsErr error listBackupJobsErr map[string]error } func (k *inventoryTestKubeClient) ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error) { if k.listPVCsErr != nil { return nil, k.listPVCsErr } return k.fakeKubeClient.ListBoundPVCs(ctx) } func (k *inventoryTestKubeClient) ListBackupJobs(ctx context.Context, namespace string) ([]k8s.BackupJobSummary, error) { if err := k.listBackupJobsErr[namespace]; err != nil { return nil, err } return k.fakeKubeClient.ListBackupJobs(ctx, namespace) } type inventoryTestLonghornClient struct { *fakeLonghornClient listBackupsByVolume map[string][]longhorn.Backup listBackupsErr map[string]error } func (l *inventoryTestLonghornClient) ListBackups(ctx context.Context, volumeName string) ([]longhorn.Backup, error) { if err := l.listBackupsErr[volumeName]; err != nil { return nil, err } if backups, ok := l.listBackupsByVolume[volumeName]; ok { return backups, nil } return l.fakeLonghornClient.ListBackups(ctx, volumeName) } func newInventoryTestServer(cfg *config.Config, client kubeClient, longhornClient longhornClient) *Server { return &Server{ cfg: cfg, client: client, longhorn: longhornClient, metrics: newTelemetry(), } } func TestBuildInventoryLonghornSortsNamespacesAndCalculatesHealth(t *testing.T) { now := time.Now().UTC() recent := now.Add(-2 * time.Hour) old := now.Add(-48 * time.Hour) client := &inventoryTestKubeClient{ fakeKubeClient: &fakeKubeClient{ pvcs: []k8s.PVCSummary{ {Namespace: "zeta", Name: "cache", VolumeName: "vol-cache", Phase: "Bound", StorageClass: "fast", Capacity: "10Gi", AccessModes: []string{"ReadWriteOnce"}}, {Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound", StorageClass: "fast", Capacity: "20Gi", AccessModes: []string{"ReadWriteOnce"}}, }, }, } longhornClient := &inventoryTestLonghornClient{ fakeLonghornClient: &fakeLonghornClient{}, listBackupsByVolume: map[string][]longhorn.Backup{ "vol-data": { {Name: "backup-new", Created: recent.Format(time.RFC3339), State: "Completed", Size: "2147483648"}, {Name: "backup-old", Created: old.Format(time.RFC3339), State: "Completed", Size: "1073741824"}, }, "vol-cache": {}, }, } srv := newInventoryTestServer(&config.Config{ BackupDriver: "longhorn", BackupMaxAge: 24 * time.Hour, }, client, longhornClient) inventory, err := srv.buildInventory(context.Background()) if err != nil { t.Fatalf("build longhorn inventory: %v", err) } if len(inventory.Namespaces) != 2 { t.Fatalf("expected two namespaces, got %#v", inventory.Namespaces) } if inventory.Namespaces[0].Name != "apps" || inventory.Namespaces[1].Name != "zeta" { t.Fatalf("expected sorted namespaces, got %#v", inventory.Namespaces) } appsPVC := inventory.Namespaces[0].PVCs[0] if !appsPVC.Healthy || appsPVC.HealthReason != "fresh" { t.Fatalf("expected healthy fresh apps pvc, got %#v", appsPVC) } if appsPVC.BackupCount != 2 || appsPVC.CompletedBackups != 2 { t.Fatalf("expected completed backup counts, got %#v", appsPVC) } if appsPVC.LastBackupSizeBytes != 2147483648 || appsPVC.TotalBackupSizeBytes != 3221225472 { t.Fatalf("expected longhorn byte totals, got %#v", appsPVC) } if appsPVC.LastBackupAt == "" || appsPVC.LastBackupAgeHours <= 0 { t.Fatalf("expected latest backup timestamp, got %#v", appsPVC) } cachePVC := inventory.Namespaces[1].PVCs[0] if cachePVC.Healthy || cachePVC.HealthReason != "missing" { t.Fatalf("expected missing backup health, got %#v", cachePVC) } } func TestEnrichPVCInventoryCoversLonghornAndResticBranches(t *testing.T) { now := time.Now().UTC() recent := now.Add(-1 * time.Hour) old := now.Add(-72 * time.Hour) t.Run("longhorn branches", func(t *testing.T) { srv := newInventoryTestServer(&config.Config{ BackupDriver: "longhorn", BackupMaxAge: 24 * time.Hour, }, &inventoryTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, &inventoryTestLonghornClient{ fakeLonghornClient: &fakeLonghornClient{}, listBackupsErr: map[string]error{ "vol-error": errors.New("list backups exploded"), }, listBackupsByVolume: map[string][]longhorn.Backup{ "vol-no-completed": { {Name: "backup-pending", Created: recent.Format(time.RFC3339), State: "InProgress"}, }, "vol-bad-time": { {Name: "backup-complete", Created: "not-a-time", State: "Completed", Size: "123"}, }, "vol-stale": { {Name: "backup-old", Created: old.Format(time.RFC3339), State: "Completed", Size: "10"}, }, }, }) testCases := []struct { entry apiPVCInventory want string ok bool err string }{ {entry: apiPVCInventory{Namespace: "apps", PVC: "err", Volume: "vol-error"}, want: "lookup_failed", ok: false, err: "list backups exploded"}, {entry: apiPVCInventory{Namespace: "apps", PVC: "none", Volume: "vol-no-completed"}, want: "no_completed", ok: false}, {entry: apiPVCInventory{Namespace: "apps", PVC: "bad", Volume: "vol-bad-time"}, want: "unknown_timestamp", ok: false}, {entry: apiPVCInventory{Namespace: "apps", PVC: "stale", Volume: "vol-stale"}, want: "stale", ok: false}, } for _, tc := range testCases { entry := tc.entry.toAPI() srv.enrichPVCInventory(context.Background(), &entry, nil, nil) if entry.HealthReason != tc.want || entry.Healthy != tc.ok { t.Fatalf("%s/%s: expected %q healthy=%v, got %#v", entry.Namespace, entry.PVC, tc.want, tc.ok, entry) } if tc.err != "" && entry.Error != tc.err { t.Fatalf("%s/%s: expected error %q, got %#v", entry.Namespace, entry.PVC, tc.err, entry) } } }) t.Run("restic branches", func(t *testing.T) { client := &inventoryTestKubeClient{ fakeKubeClient: &fakeKubeClient{ jobLogs: map[string]string{ "apps/job-new": `{"message_type":"summary","data_added":2048}`, "apps/job-old": `{"message_type":"summary","data_added":1024}`, }, }, } srv := newInventoryTestServer(&config.Config{ Namespace: "atlas", UsageSecretName: "restic-usage", BackupDriver: "restic", BackupMaxAge: 24 * time.Hour, }, client, &inventoryTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}) resticJobsByPVC := map[string][]k8s.BackupJobSummary{ "apps/data": { {Name: "job-old", Namespace: "apps", PVC: "data", State: "Completed", CreatedAt: old, CompletionTime: old, KeepLast: 1}, {Name: "job-new", Namespace: "apps", PVC: "data", State: "Completed", CreatedAt: recent, CompletionTime: recent, KeepLast: 1}, }, "apps/running": { {Name: "job-running", Namespace: "apps", PVC: "running", State: "Running", CreatedAt: recent}, }, "apps/bad-time": { {Name: "job-bad", Namespace: "apps", PVC: "bad-time", State: "Completed"}, }, } for key := range resticJobsByPVC { sortBackupJobsNewestFirst(resticJobsByPVC[key]) } entry := apiPVCInventory{Namespace: "apps", PVC: "data", Volume: "vol-data"}.toAPI() srv.enrichPVCInventory(context.Background(), &entry, resticJobsByPVC, nil) if !entry.Healthy || entry.HealthReason != "fresh" { t.Fatalf("expected fresh restic pvc, got %#v", entry) } if entry.LastJobName != "job-new" || entry.BackupCount != 2 || entry.CompletedBackups != 2 { t.Fatalf("expected sorted restic jobs, got %#v", entry) } if entry.LastBackupSizeBytes != 2048 || entry.TotalBackupSizeBytes != 2048 { t.Fatalf("expected retained size accounting, got %#v", entry) } running := apiPVCInventory{Namespace: "apps", PVC: "running", Volume: "vol-running"}.toAPI() srv.enrichPVCInventory(context.Background(), &running, resticJobsByPVC, nil) if running.Healthy || running.HealthReason != "in_progress" || running.ActiveBackups != 1 { t.Fatalf("expected in-progress restic pvc, got %#v", running) } missing := apiPVCInventory{Namespace: "apps", PVC: "missing", Volume: "vol-missing"}.toAPI() srv.enrichPVCInventory(context.Background(), &missing, resticJobsByPVC, nil) if missing.Healthy || missing.HealthReason != "missing" { t.Fatalf("expected missing restic pvc, got %#v", missing) } badTime := apiPVCInventory{Namespace: "apps", PVC: "bad-time", Volume: "vol-bad-time"}.toAPI() srv.enrichPVCInventory(context.Background(), &badTime, resticJobsByPVC, nil) if badTime.Healthy || badTime.HealthReason != "unknown_timestamp" { t.Fatalf("expected unknown timestamp restic pvc, got %#v", badTime) } lookupFailed := apiPVCInventory{Namespace: "ops", PVC: "data", Volume: "vol-ops"}.toAPI() srv.enrichPVCInventory(context.Background(), &lookupFailed, resticJobsByPVC, map[string]error{"ops": errors.New("list jobs exploded")}) if lookupFailed.Healthy || lookupFailed.HealthReason != "lookup_failed" || lookupFailed.Error != "list jobs exploded" { t.Fatalf("expected lookup failure restic pvc, got %#v", lookupFailed) } }) } func TestRefreshTelemetryRecordsSuccessAndFailure(t *testing.T) { successClient := &inventoryTestKubeClient{ fakeKubeClient: &fakeKubeClient{ pvcs: []k8s.PVCSummary{ {Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"}, }, }, } successLonghorn := &inventoryTestLonghornClient{ fakeLonghornClient: &fakeLonghornClient{}, listBackupsByVolume: map[string][]longhorn.Backup{ "vol-data": { {Name: "backup-new", Created: time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339), State: "Completed", Size: "512"}, }, }, } srv := newInventoryTestServer(&config.Config{ BackupDriver: "longhorn", BackupMaxAge: 24 * time.Hour, }, successClient, successLonghorn) srv.refreshTelemetry(context.Background()) if srv.metrics.inventoryRefreshFailure != 0 { t.Fatalf("expected successful refresh, got failure count %f", srv.metrics.inventoryRefreshFailure) } if srv.metrics.inventoryRefreshTime == 0 { t.Fatalf("expected inventory refresh timestamp to be recorded") } if len(srv.metrics.pvcBackupHealth) != 1 || len(srv.metrics.pvcBackupCount) != 1 { t.Fatalf("expected pvc metrics to be recorded, got health=%d count=%d", len(srv.metrics.pvcBackupHealth), len(srv.metrics.pvcBackupCount)) } failingSrv := newInventoryTestServer(&config.Config{ BackupDriver: "longhorn", BackupMaxAge: 24 * time.Hour, }, &inventoryTestKubeClient{ fakeKubeClient: &fakeKubeClient{}, listPVCsErr: errors.New("inventory exploded"), }, &inventoryTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}}) failingSrv.refreshTelemetry(context.Background()) if failingSrv.metrics.inventoryRefreshFailure != 1 { t.Fatalf("expected failed refresh metric, got %f", failingSrv.metrics.inventoryRefreshFailure) } } func TestSortBackupJobsNewestFirstUsesCompletionCreatedAndNameTiebreakers(t *testing.T) { now := time.Now().UTC() items := []k8s.BackupJobSummary{ { Name: "job-a", Namespace: "apps", PVC: "data", CreatedAt: now.Add(-3 * time.Hour), CompletionTime: now.Add(-2 * time.Hour), }, { Name: "job-c", Namespace: "apps", PVC: "data", CreatedAt: now.Add(-1 * time.Hour), }, { Name: "job-b", Namespace: "apps", PVC: "data", CreatedAt: now.Add(-4 * time.Hour), CompletionTime: now.Add(-2 * time.Hour), }, } sortBackupJobsNewestFirst(items) got := []string{items[0].Name, items[1].Name, items[2].Name} want := []string{"job-c", "job-b", "job-a"} for index := range want { if got[index] != want[index] { t.Fatalf("expected sorted names %v, got %v", want, got) } } } type apiPVCInventory struct { Namespace string PVC string Volume string } func (p apiPVCInventory) toAPI() api.PVCInventory { return api.PVCInventory{ Namespace: p.Namespace, PVC: p.PVC, Volume: p.Volume, } }