soteria/internal/server/inventory_builder_test.go

335 lines
12 KiB
Go
Raw Permalink Normal View History

package server
import (
"context"
"errors"
"testing"
"time"
"scm.bstein.dev/bstein/soteria/internal/api"
"scm.bstein.dev/bstein/soteria/internal/config"
"scm.bstein.dev/bstein/soteria/internal/k8s"
"scm.bstein.dev/bstein/soteria/internal/longhorn"
)
type inventoryTestKubeClient struct {
*fakeKubeClient
listPVCsErr error
listBackupJobsErr map[string]error
}
func (k *inventoryTestKubeClient) ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error) {
if k.listPVCsErr != nil {
return nil, k.listPVCsErr
}
return k.fakeKubeClient.ListBoundPVCs(ctx)
}
func (k *inventoryTestKubeClient) ListBackupJobs(ctx context.Context, namespace string) ([]k8s.BackupJobSummary, error) {
if err := k.listBackupJobsErr[namespace]; err != nil {
return nil, err
}
return k.fakeKubeClient.ListBackupJobs(ctx, namespace)
}
type inventoryTestLonghornClient struct {
*fakeLonghornClient
listBackupsByVolume map[string][]longhorn.Backup
listBackupsErr map[string]error
}
func (l *inventoryTestLonghornClient) ListBackups(ctx context.Context, volumeName string) ([]longhorn.Backup, error) {
if err := l.listBackupsErr[volumeName]; err != nil {
return nil, err
}
if backups, ok := l.listBackupsByVolume[volumeName]; ok {
return backups, nil
}
return l.fakeLonghornClient.ListBackups(ctx, volumeName)
}
func newInventoryTestServer(cfg *config.Config, client kubeClient, longhornClient longhornClient) *Server {
return &Server{
cfg: cfg,
client: client,
longhorn: longhornClient,
metrics: newTelemetry(),
}
}
func TestBuildInventoryLonghornSortsNamespacesAndCalculatesHealth(t *testing.T) {
now := time.Now().UTC()
recent := now.Add(-2 * time.Hour)
old := now.Add(-48 * time.Hour)
client := &inventoryTestKubeClient{
fakeKubeClient: &fakeKubeClient{
pvcs: []k8s.PVCSummary{
{Namespace: "zeta", Name: "cache", VolumeName: "vol-cache", Phase: "Bound", StorageClass: "fast", Capacity: "10Gi", AccessModes: []string{"ReadWriteOnce"}},
{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound", StorageClass: "fast", Capacity: "20Gi", AccessModes: []string{"ReadWriteOnce"}},
},
},
}
longhornClient := &inventoryTestLonghornClient{
fakeLonghornClient: &fakeLonghornClient{},
listBackupsByVolume: map[string][]longhorn.Backup{
"vol-data": {
{Name: "backup-new", Created: recent.Format(time.RFC3339), State: "Completed", Size: "2147483648"},
{Name: "backup-old", Created: old.Format(time.RFC3339), State: "Completed", Size: "1073741824"},
},
"vol-cache": {},
},
}
srv := newInventoryTestServer(&config.Config{
BackupDriver: "longhorn",
BackupMaxAge: 24 * time.Hour,
}, client, longhornClient)
inventory, err := srv.buildInventory(context.Background())
if err != nil {
t.Fatalf("build longhorn inventory: %v", err)
}
if len(inventory.Namespaces) != 2 {
t.Fatalf("expected two namespaces, got %#v", inventory.Namespaces)
}
if inventory.Namespaces[0].Name != "apps" || inventory.Namespaces[1].Name != "zeta" {
t.Fatalf("expected sorted namespaces, got %#v", inventory.Namespaces)
}
appsPVC := inventory.Namespaces[0].PVCs[0]
if !appsPVC.Healthy || appsPVC.HealthReason != "fresh" {
t.Fatalf("expected healthy fresh apps pvc, got %#v", appsPVC)
}
if appsPVC.BackupCount != 2 || appsPVC.CompletedBackups != 2 {
t.Fatalf("expected completed backup counts, got %#v", appsPVC)
}
if appsPVC.LastBackupSizeBytes != 2147483648 || appsPVC.TotalBackupSizeBytes != 3221225472 {
t.Fatalf("expected longhorn byte totals, got %#v", appsPVC)
}
if appsPVC.LastBackupAt == "" || appsPVC.LastBackupAgeHours <= 0 {
t.Fatalf("expected latest backup timestamp, got %#v", appsPVC)
}
cachePVC := inventory.Namespaces[1].PVCs[0]
if cachePVC.Healthy || cachePVC.HealthReason != "missing" {
t.Fatalf("expected missing backup health, got %#v", cachePVC)
}
}
func TestEnrichPVCInventoryCoversLonghornAndResticBranches(t *testing.T) {
now := time.Now().UTC()
recent := now.Add(-1 * time.Hour)
old := now.Add(-72 * time.Hour)
t.Run("longhorn branches", func(t *testing.T) {
srv := newInventoryTestServer(&config.Config{
BackupDriver: "longhorn",
BackupMaxAge: 24 * time.Hour,
}, &inventoryTestKubeClient{fakeKubeClient: &fakeKubeClient{}}, &inventoryTestLonghornClient{
fakeLonghornClient: &fakeLonghornClient{},
listBackupsErr: map[string]error{
"vol-error": errors.New("list backups exploded"),
},
listBackupsByVolume: map[string][]longhorn.Backup{
"vol-no-completed": {
{Name: "backup-pending", Created: recent.Format(time.RFC3339), State: "InProgress"},
},
"vol-bad-time": {
{Name: "backup-complete", Created: "not-a-time", State: "Completed", Size: "123"},
},
"vol-stale": {
{Name: "backup-old", Created: old.Format(time.RFC3339), State: "Completed", Size: "10"},
},
},
})
testCases := []struct {
entry apiPVCInventory
want string
ok bool
err string
}{
{entry: apiPVCInventory{Namespace: "apps", PVC: "err", Volume: "vol-error"}, want: "lookup_failed", ok: false, err: "list backups exploded"},
{entry: apiPVCInventory{Namespace: "apps", PVC: "none", Volume: "vol-no-completed"}, want: "no_completed", ok: false},
{entry: apiPVCInventory{Namespace: "apps", PVC: "bad", Volume: "vol-bad-time"}, want: "unknown_timestamp", ok: false},
{entry: apiPVCInventory{Namespace: "apps", PVC: "stale", Volume: "vol-stale"}, want: "stale", ok: false},
}
for _, tc := range testCases {
entry := tc.entry.toAPI()
srv.enrichPVCInventory(context.Background(), &entry, nil, nil)
if entry.HealthReason != tc.want || entry.Healthy != tc.ok {
t.Fatalf("%s/%s: expected %q healthy=%v, got %#v", entry.Namespace, entry.PVC, tc.want, tc.ok, entry)
}
if tc.err != "" && entry.Error != tc.err {
t.Fatalf("%s/%s: expected error %q, got %#v", entry.Namespace, entry.PVC, tc.err, entry)
}
}
})
t.Run("restic branches", func(t *testing.T) {
client := &inventoryTestKubeClient{
fakeKubeClient: &fakeKubeClient{
jobLogs: map[string]string{
"apps/job-new": `{"message_type":"summary","data_added":2048}`,
"apps/job-old": `{"message_type":"summary","data_added":1024}`,
},
},
}
srv := newInventoryTestServer(&config.Config{
Namespace: "atlas",
UsageSecretName: "restic-usage",
BackupDriver: "restic",
BackupMaxAge: 24 * time.Hour,
}, client, &inventoryTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}})
resticJobsByPVC := map[string][]k8s.BackupJobSummary{
"apps/data": {
{Name: "job-old", Namespace: "apps", PVC: "data", State: "Completed", CreatedAt: old, CompletionTime: old, KeepLast: 1},
{Name: "job-new", Namespace: "apps", PVC: "data", State: "Completed", CreatedAt: recent, CompletionTime: recent, KeepLast: 1},
},
"apps/running": {
{Name: "job-running", Namespace: "apps", PVC: "running", State: "Running", CreatedAt: recent},
},
"apps/bad-time": {
{Name: "job-bad", Namespace: "apps", PVC: "bad-time", State: "Completed"},
},
}
for key := range resticJobsByPVC {
sortBackupJobsNewestFirst(resticJobsByPVC[key])
}
entry := apiPVCInventory{Namespace: "apps", PVC: "data", Volume: "vol-data"}.toAPI()
srv.enrichPVCInventory(context.Background(), &entry, resticJobsByPVC, nil)
if !entry.Healthy || entry.HealthReason != "fresh" {
t.Fatalf("expected fresh restic pvc, got %#v", entry)
}
if entry.LastJobName != "job-new" || entry.BackupCount != 2 || entry.CompletedBackups != 2 {
t.Fatalf("expected sorted restic jobs, got %#v", entry)
}
if entry.LastBackupSizeBytes != 2048 || entry.TotalBackupSizeBytes != 2048 {
t.Fatalf("expected retained size accounting, got %#v", entry)
}
running := apiPVCInventory{Namespace: "apps", PVC: "running", Volume: "vol-running"}.toAPI()
srv.enrichPVCInventory(context.Background(), &running, resticJobsByPVC, nil)
if running.Healthy || running.HealthReason != "in_progress" || running.ActiveBackups != 1 {
t.Fatalf("expected in-progress restic pvc, got %#v", running)
}
missing := apiPVCInventory{Namespace: "apps", PVC: "missing", Volume: "vol-missing"}.toAPI()
srv.enrichPVCInventory(context.Background(), &missing, resticJobsByPVC, nil)
if missing.Healthy || missing.HealthReason != "missing" {
t.Fatalf("expected missing restic pvc, got %#v", missing)
}
badTime := apiPVCInventory{Namespace: "apps", PVC: "bad-time", Volume: "vol-bad-time"}.toAPI()
srv.enrichPVCInventory(context.Background(), &badTime, resticJobsByPVC, nil)
if badTime.Healthy || badTime.HealthReason != "unknown_timestamp" {
t.Fatalf("expected unknown timestamp restic pvc, got %#v", badTime)
}
lookupFailed := apiPVCInventory{Namespace: "ops", PVC: "data", Volume: "vol-ops"}.toAPI()
srv.enrichPVCInventory(context.Background(), &lookupFailed, resticJobsByPVC, map[string]error{"ops": errors.New("list jobs exploded")})
if lookupFailed.Healthy || lookupFailed.HealthReason != "lookup_failed" || lookupFailed.Error != "list jobs exploded" {
t.Fatalf("expected lookup failure restic pvc, got %#v", lookupFailed)
}
})
}
func TestRefreshTelemetryRecordsSuccessAndFailure(t *testing.T) {
successClient := &inventoryTestKubeClient{
fakeKubeClient: &fakeKubeClient{
pvcs: []k8s.PVCSummary{
{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"},
},
},
}
successLonghorn := &inventoryTestLonghornClient{
fakeLonghornClient: &fakeLonghornClient{},
listBackupsByVolume: map[string][]longhorn.Backup{
"vol-data": {
{Name: "backup-new", Created: time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339), State: "Completed", Size: "512"},
},
},
}
srv := newInventoryTestServer(&config.Config{
BackupDriver: "longhorn",
BackupMaxAge: 24 * time.Hour,
}, successClient, successLonghorn)
srv.refreshTelemetry(context.Background())
if srv.metrics.inventoryRefreshFailure != 0 {
t.Fatalf("expected successful refresh, got failure count %f", srv.metrics.inventoryRefreshFailure)
}
if srv.metrics.inventoryRefreshTime == 0 {
t.Fatalf("expected inventory refresh timestamp to be recorded")
}
if len(srv.metrics.pvcBackupHealth) != 1 || len(srv.metrics.pvcBackupCount) != 1 {
t.Fatalf("expected pvc metrics to be recorded, got health=%d count=%d", len(srv.metrics.pvcBackupHealth), len(srv.metrics.pvcBackupCount))
}
failingSrv := newInventoryTestServer(&config.Config{
BackupDriver: "longhorn",
BackupMaxAge: 24 * time.Hour,
}, &inventoryTestKubeClient{
fakeKubeClient: &fakeKubeClient{},
listPVCsErr: errors.New("inventory exploded"),
}, &inventoryTestLonghornClient{fakeLonghornClient: &fakeLonghornClient{}})
failingSrv.refreshTelemetry(context.Background())
if failingSrv.metrics.inventoryRefreshFailure != 1 {
t.Fatalf("expected failed refresh metric, got %f", failingSrv.metrics.inventoryRefreshFailure)
}
}
func TestSortBackupJobsNewestFirstUsesCompletionCreatedAndNameTiebreakers(t *testing.T) {
now := time.Now().UTC()
items := []k8s.BackupJobSummary{
{
Name: "job-a",
Namespace: "apps",
PVC: "data",
CreatedAt: now.Add(-3 * time.Hour),
CompletionTime: now.Add(-2 * time.Hour),
},
{
Name: "job-c",
Namespace: "apps",
PVC: "data",
CreatedAt: now.Add(-1 * time.Hour),
},
{
Name: "job-b",
Namespace: "apps",
PVC: "data",
CreatedAt: now.Add(-4 * time.Hour),
CompletionTime: now.Add(-2 * time.Hour),
},
}
sortBackupJobsNewestFirst(items)
got := []string{items[0].Name, items[1].Name, items[2].Name}
want := []string{"job-c", "job-b", "job-a"}
for index := range want {
if got[index] != want[index] {
t.Fatalf("expected sorted names %v, got %v", want, got)
}
}
}
type apiPVCInventory struct {
Namespace string
PVC string
Volume string
}
func (p apiPVCInventory) toAPI() api.PVCInventory {
return api.PVCInventory{
Namespace: p.Namespace,
PVC: p.PVC,
Volume: p.Volume,
}
}