From c5f922c6b718472f960b27c69bf28182bf18178f Mon Sep 17 00:00:00 2001 From: codex Date: Mon, 20 Apr 2026 17:41:28 -0300 Subject: [PATCH] test(soteria): raise telemetry and utility coverage --- internal/server/metrics_test.go | 251 +++++++++++++++++++++ internal/server/server_utilities_test.go | 270 +++++++++++++++++++++++ 2 files changed, 521 insertions(+) create mode 100644 internal/server/metrics_test.go create mode 100644 internal/server/server_utilities_test.go diff --git a/internal/server/metrics_test.go b/internal/server/metrics_test.go new file mode 100644 index 0000000..ab6f924 --- /dev/null +++ b/internal/server/metrics_test.go @@ -0,0 +1,251 @@ +package server + +import ( + "testing" + "time" + + "scm.bstein.dev/bstein/soteria/internal/api" +) + +func TestMetricValuesEmptyAndSetMetricClonesLabels(t *testing.T) { + if values := metricValues(map[string]metricSample{}); len(values) != 0 { + t.Fatalf("expected no metric values for empty source, got %#v", values) + } + + target := map[string]metricSample{} + labels := map[string]string{ + "namespace": "apps", + "pvc": "data", + } + + setMetric(target, labels, 42) + labels["namespace"] = "mutated" + + values := metricValues(target) + if len(values) != 1 { + t.Fatalf("expected one metric sample, got %#v", values) + } + if values[0].labels["namespace"] != "apps" { + t.Fatalf("expected cloned labels to remain unchanged, got %#v", values[0].labels) + } + if values[0].value != 42 { + t.Fatalf("expected metric value 42, got %v", values[0].value) + } +} + +func TestTelemetryRecordInventoryFailureIncrementsCounter(t *testing.T) { + telemetry := newTelemetry() + + telemetry.RecordInventoryFailure() + telemetry.RecordInventoryFailure() + + if telemetry.inventoryRefreshFailure != 2 { + t.Fatalf("expected inventory refresh failure count 2, got %v", telemetry.inventoryRefreshFailure) + } +} + +func TestTelemetryRecordInventoryPopulatesAndResetsMetrics(t *testing.T) { + telemetry := newTelemetry() + recordedAt := time.Date(2026, 4, 20, 13, 14, 15, 0, time.UTC) + + telemetry.RecordInventory(api.InventoryResponse{ + Namespaces: []api.NamespaceInventory{ + { + Name: "apps", + PVCs: []api.PVCInventory{ + { + Namespace: "apps", + PVC: "data", + Volume: "pv-apps-data", + Driver: "restic", + LastBackupAt: recordedAt.Format(time.RFC3339), + LastBackupAgeHours: 2.5, + BackupCount: 3, + CompletedBackups: 2, + LastBackupSizeBytes: 512, + TotalBackupSizeBytes: 2048, + Healthy: true, + }, + { + Namespace: "apps", + PVC: "cache", + Volume: "pv-apps-cache", + Driver: "restic", + LastBackupAt: "not-a-time", + LastBackupAgeHours: 99, + BackupCount: 1, + CompletedBackups: 0, + LastBackupSizeBytes: 0, + TotalBackupSizeBytes: 0, + Healthy: false, + HealthReason: "stale", + }, + }, + }, + }, + }) + + dataKey := metricKey(map[string]string{ + "namespace": "apps", + "pvc": "data", + "volume": "pv-apps-data", + "driver": "restic", + }) + cacheKey := metricKey(map[string]string{ + "namespace": "apps", + "pvc": "cache", + "volume": "pv-apps-cache", + "driver": "restic", + }) + dataReasonKey := metricKey(map[string]string{ + "namespace": "apps", + "pvc": "data", + "volume": "pv-apps-data", + "driver": "restic", + "reason": "unknown", + }) + cacheReasonKey := metricKey(map[string]string{ + "namespace": "apps", + "pvc": "cache", + "volume": "pv-apps-cache", + "driver": "restic", + "reason": "stale", + }) + + if telemetry.inventoryRefreshTime <= 0 { + t.Fatalf("expected inventory refresh time to be recorded, got %v", telemetry.inventoryRefreshTime) + } + if got := telemetry.pvcBackupCount[dataKey].value; got != 3 { + t.Fatalf("expected backup count 3 for data pvc, got %v", got) + } + if got := telemetry.pvcBackupCompletedCount[dataKey].value; got != 2 { + t.Fatalf("expected completed backup count 2, got %v", got) + } + if got := telemetry.pvcBackupLastSizeBytes[dataKey].value; got != 512 { + t.Fatalf("expected last backup size 512, got %v", got) + } + if got := telemetry.pvcBackupTotalSizeBytes[dataKey].value; got != 2048 { + t.Fatalf("expected total backup size 2048, got %v", got) + } + if got := telemetry.pvcBackupHealth[dataKey].value; got != 1 { + t.Fatalf("expected healthy pvc to emit 1, got %v", got) + } + if got := telemetry.pvcBackupHealth[cacheKey].value; got != 0 { + t.Fatalf("expected unhealthy pvc to emit 0, got %v", got) + } + if got := telemetry.pvcBackupHealthReason[dataReasonKey].value; got != 1 { + t.Fatalf("expected unknown health reason marker, got %v", got) + } + if got := telemetry.pvcBackupHealthReason[cacheReasonKey].value; got != 1 { + t.Fatalf("expected explicit stale health reason marker, got %v", got) + } + if got := telemetry.pvcBackupAgeHours[dataKey].value; got != 2.5 { + t.Fatalf("expected backup age 2.5 hours, got %v", got) + } + if got := telemetry.pvcBackupAgeHours[cacheKey].value; got != 99 { + t.Fatalf("expected invalid-time pvc to still expose age, got %v", got) + } + if got := telemetry.pvcBackupLastSuccess[dataKey].value; got != float64(recordedAt.Unix()) { + t.Fatalf("expected last success timestamp %d, got %v", recordedAt.Unix(), got) + } + if _, ok := telemetry.pvcBackupLastSuccess[cacheKey]; ok { + t.Fatalf("expected invalid last_backup_at to skip success timestamp") + } + + telemetry.RecordInventory(api.InventoryResponse{}) + + if len(telemetry.pvcBackupCount) != 0 || len(telemetry.pvcBackupHealthReason) != 0 { + t.Fatalf("expected inventory metrics to reset on empty refresh, got counts=%d reasons=%d", + len(telemetry.pvcBackupCount), len(telemetry.pvcBackupHealthReason)) + } +} + +func TestTelemetryRecordB2UsageTracksBucketsAndFallbackTimestamp(t *testing.T) { + telemetry := newTelemetry() + scannedAt := time.Date(2026, 4, 20, 16, 30, 0, 0, time.UTC) + lastModifiedAt := scannedAt.Add(-45 * time.Minute) + + telemetry.RecordB2Usage(api.B2UsageResponse{ + Available: true, + ScannedAt: scannedAt.Format(time.RFC3339), + ScanDurationMS: 2750, + TotalObjects: 99, + TotalBytes: 123456, + RecentObjects24h: 7, + RecentBytes24h: 890, + Buckets: []api.B2BucketUsage{ + { + Name: "atlas-backups", + ObjectCount: 44, + TotalBytes: 1000, + RecentObjects24h: 3, + RecentBytes24h: 250, + LastModifiedAt: lastModifiedAt.Format(time.RFC3339), + }, + { + Name: "atlas-logs", + ObjectCount: 55, + TotalBytes: 2000, + RecentObjects24h: 4, + RecentBytes24h: 640, + LastModifiedAt: "invalid", + }, + }, + }) + + backupKey := metricKey(map[string]string{"bucket": "atlas-backups"}) + logsKey := metricKey(map[string]string{"bucket": "atlas-logs"}) + + if telemetry.b2ScanSuccess != 1 { + t.Fatalf("expected successful scan marker, got %v", telemetry.b2ScanSuccess) + } + if telemetry.b2ScanTimestamp != float64(scannedAt.Unix()) { + t.Fatalf("expected scan timestamp %d, got %v", scannedAt.Unix(), telemetry.b2ScanTimestamp) + } + if telemetry.b2ScanDurationSeconds != 2.75 { + t.Fatalf("expected scan duration 2.75s, got %v", telemetry.b2ScanDurationSeconds) + } + if telemetry.b2AccountObjects != 99 || telemetry.b2AccountBytes != 123456 { + t.Fatalf("unexpected account usage totals: objects=%v bytes=%v", telemetry.b2AccountObjects, telemetry.b2AccountBytes) + } + if telemetry.b2AccountRecentObjects != 7 || telemetry.b2AccountRecentBytes != 890 { + t.Fatalf("unexpected recent account usage: objects=%v bytes=%v", telemetry.b2AccountRecentObjects, telemetry.b2AccountRecentBytes) + } + if got := telemetry.b2BucketObjects[backupKey].value; got != 44 { + t.Fatalf("expected atlas-backups object count 44, got %v", got) + } + if got := telemetry.b2BucketBytes[logsKey].value; got != 2000 { + t.Fatalf("expected atlas-logs bytes 2000, got %v", got) + } + if got := telemetry.b2BucketRecentBytes[backupKey].value; got != 250 { + t.Fatalf("expected atlas-backups recent bytes 250, got %v", got) + } + if got := telemetry.b2BucketLastModified[backupKey].value; got != float64(lastModifiedAt.Unix()) { + t.Fatalf("expected atlas-backups last modified timestamp %d, got %v", lastModifiedAt.Unix(), got) + } + if _, ok := telemetry.b2BucketLastModified[logsKey]; ok { + t.Fatalf("expected invalid bucket last modified timestamp to be ignored") + } + + before := time.Now().Unix() + telemetry.RecordB2Usage(api.B2UsageResponse{ + Available: false, + ScannedAt: "definitely-invalid", + ScanDurationMS: 500, + }) + after := time.Now().Unix() + + if telemetry.b2ScanSuccess != 0 { + t.Fatalf("expected failed scan marker after unavailable refresh, got %v", telemetry.b2ScanSuccess) + } + if telemetry.b2ScanDurationSeconds != 0.5 { + t.Fatalf("expected scan duration 0.5s, got %v", telemetry.b2ScanDurationSeconds) + } + if telemetry.b2ScanTimestamp < float64(before) || telemetry.b2ScanTimestamp > float64(after) { + t.Fatalf("expected invalid scanned_at to fall back to now, got %v not in [%d,%d]", + telemetry.b2ScanTimestamp, before, after) + } + if len(telemetry.b2BucketObjects) != 0 || len(telemetry.b2BucketLastModified) != 0 { + t.Fatalf("expected bucket metrics to reset when no buckets are provided") + } +} diff --git a/internal/server/server_utilities_test.go b/internal/server/server_utilities_test.go new file mode 100644 index 0000000..9f717b5 --- /dev/null +++ b/internal/server/server_utilities_test.go @@ -0,0 +1,270 @@ +package server + +import ( + "math" + "regexp" + "strings" + "testing" + "time" + + "scm.bstein.dev/bstein/soteria/internal/k8s" + "scm.bstein.dev/bstein/soteria/internal/longhorn" +) + +func TestBuildBackupRecordsSortsAndMarksLatestCompleted(t *testing.T) { + records := buildBackupRecords([]longhorn.Backup{ + { + Name: "backup-b", + SnapshotName: "snap-b", + Created: "not-a-time", + State: "Completed", + URL: "s3://bucket/backup-b", + Size: "2Gi", + }, + { + Name: "backup-a", + SnapshotName: "snap-a", + Created: "2026-04-20T10:00:00Z", + State: "Completed", + URL: "s3://bucket/backup-a", + Size: "1Gi", + }, + { + Name: "backup-c", + SnapshotName: "snap-c", + Created: "2026-04-20T11:00:00Z", + State: "Failed", + URL: "s3://bucket/backup-c", + Size: "3Gi", + }, + }) + + if len(records) != 3 { + t.Fatalf("expected three backup records, got %#v", records) + } + if records[0].Name != "backup-c" || records[1].Name != "backup-a" || records[2].Name != "backup-b" { + t.Fatalf("expected records sorted newest-first with invalid timestamps last, got %#v", records) + } + if records[1].Latest != true { + t.Fatalf("expected latest completed backup to be marked latest, got %#v", records[1]) + } + if records[0].Latest || records[2].Latest { + t.Fatalf("expected only the latest completed backup to be marked latest, got %#v", records) + } +} + +func TestBuildBackupRecordsWithoutCompletedBackupsLeavesLatestUnset(t *testing.T) { + records := buildBackupRecords([]longhorn.Backup{ + {Name: "backup-b", Created: "2026-04-20T10:00:00Z", State: "Failed"}, + {Name: "backup-a", Created: "2026-04-20T09:00:00Z", State: "Pending"}, + }) + + if len(records) != 2 { + t.Fatalf("expected two backup records, got %#v", records) + } + if records[0].Latest || records[1].Latest { + t.Fatalf("expected no latest marker when no completed backups exist, got %#v", records) + } +} + +func TestBuildBackupRecordsFallsBackToNameOrderForInvalidTimes(t *testing.T) { + records := buildBackupRecords([]longhorn.Backup{ + {Name: "backup-a", Created: "invalid-a", State: "Completed"}, + {Name: "backup-z", Created: "invalid-z", State: "Completed"}, + }) + + if len(records) != 2 { + t.Fatalf("expected two backup records, got %#v", records) + } + if records[0].Name != "backup-z" || records[1].Name != "backup-a" { + t.Fatalf("expected invalid timestamps to sort by name fallback, got %#v", records) + } + if records[0].Latest || !records[1].Latest { + t.Fatalf("expected first invalid completed backup encountered pre-sort to retain latest marker, got %#v", records) + } +} + +func TestDecodeResticSelectorCoversValidAndInvalidValues(t *testing.T) { + if repository, snapshot, ok := decodeResticSelector(""); ok || repository != "" || snapshot != "" { + t.Fatalf("expected empty selector to be invalid, got repo=%q snapshot=%q ok=%v", repository, snapshot, ok) + } + + if repository, snapshot, ok := decodeResticSelector("latest"); !ok || repository != "" || snapshot != "latest" { + t.Fatalf("expected latest selector to decode, got repo=%q snapshot=%q ok=%v", repository, snapshot, ok) + } + + encoded := encodeResticSelector(" s3://bucket/repository ") + if repository, snapshot, ok := decodeResticSelector(encoded); !ok || repository != "s3://bucket/repository" || snapshot != "latest" { + t.Fatalf("expected encoded selector to round-trip, got repo=%q snapshot=%q ok=%v", repository, snapshot, ok) + } + + invalidInputs := []string{ + "plain-text", + resticSelectorPrefix, + resticSelectorPrefix + "###", + resticSelectorPrefix + "ICAg", + } + for _, input := range invalidInputs { + if repository, snapshot, ok := decodeResticSelector(input); ok || repository != "" || snapshot != "" { + t.Fatalf("expected invalid selector %q to fail decode, got repo=%q snapshot=%q ok=%v", input, repository, snapshot, ok) + } + } +} + +func TestBackupJobTimestampPrefersCompletionTime(t *testing.T) { + createdAt := time.Date(2026, 4, 20, 9, 0, 0, 0, time.UTC) + completedAt := createdAt.Add(15 * time.Minute) + + if got := backupJobTimestamp(k8sBackupJobSummary(createdAt, completedAt)); !got.Equal(completedAt) { + t.Fatalf("expected completion timestamp to win, got %s", got) + } + if got := backupJobTimestamp(k8sBackupJobSummary(createdAt, time.Time{})); !got.Equal(createdAt) { + t.Fatalf("expected created timestamp fallback, got %s", got) + } +} + +func TestBackupNameTruncatesToKubernetesSafeLength(t *testing.T) { + name := backupName("backup", strings.Repeat("very-long-volume-name-", 6)) + + if len(name) > 63 { + t.Fatalf("expected backup name <= 63 chars, got %d: %q", len(name), name) + } + if !strings.HasPrefix(name, "soteria-backup-") { + t.Fatalf("expected sanitized backup name prefix, got %q", name) + } + if matched, err := regexp.MatchString(`^[a-z0-9-]+-\d{8}-\d{6}$`, name); err != nil || !matched { + t.Fatalf("expected backup name to end with timestamp, got %q (matched=%v err=%v)", name, matched, err) + } +} + +func TestBackupNameKeepsShortNamesReadable(t *testing.T) { + name := backupName("restore", "My.Volume_Name") + + if len(name) > 63 { + t.Fatalf("expected short backup name <= 63 chars, got %d: %q", len(name), name) + } + if !strings.HasPrefix(name, "soteria-restore-my-volume-name-") { + t.Fatalf("expected readable sanitized prefix, got %q", name) + } +} + +func TestBackupJobProgressPctCoversKnownStates(t *testing.T) { + testCases := []struct { + state string + expected int + }{ + {state: "pending", expected: 20}, + {state: "running", expected: 70}, + {state: "completed", expected: 100}, + {state: "failed", expected: 100}, + {state: "unknown", expected: 0}, + } + + for _, tc := range testCases { + if got := backupJobProgressPct(tc.state); got != tc.expected { + t.Fatalf("state=%q: expected %d, got %d", tc.state, tc.expected, got) + } + } +} + +func TestParseSizeBytesHandlesNumericQuantityAndInvalidForms(t *testing.T) { + testCases := []struct { + name string + raw string + value int64 + }{ + {name: "blank", raw: "", value: 0}, + {name: "int", raw: "123", value: 123}, + {name: "float", raw: "42.9", value: 42}, + {name: "negative-float", raw: "-1.5", value: 0}, + {name: "quantity", raw: "2Gi", value: 2147483648}, + {name: "invalid", raw: "definitely-not-a-size", value: 0}, + } + + for _, tc := range testCases { + if got := parseSizeBytes(tc.raw); got != tc.value { + t.Fatalf("%s: expected %d, got %d", tc.name, tc.value, got) + } + } +} + +func TestFormatBytesIECCoversEdgeCasesAndUnits(t *testing.T) { + testCases := []struct { + name string + value float64 + want string + }{ + {name: "zero", value: 0, want: "0 B"}, + {name: "nan", value: nanValue(), want: "0 B"}, + {name: "inf", value: infValue(), want: "0 B"}, + {name: "bytes", value: 12, want: "12 B"}, + {name: "kib", value: 1536, want: "1.50 KiB"}, + } + + for _, tc := range testCases { + if got := formatBytesIEC(tc.value); got != tc.want { + t.Fatalf("%s: expected %q, got %q", tc.name, tc.want, got) + } + } +} + +func TestKeepLastDefaultAndValidateKeepLast(t *testing.T) { + if got := keepLastDefault(nil); got != 0 { + t.Fatalf("expected nil keep_last default to 0, got %d", got) + } + if got := keepLastDefault(intPtr(-3)); got != 0 { + t.Fatalf("expected negative keep_last default to clamp to 0, got %d", got) + } + if got := keepLastDefault(intPtr(7)); got != 7 { + t.Fatalf("expected positive keep_last to pass through, got %d", got) + } + + if err := validateKeepLast(nil); err != nil { + t.Fatalf("expected nil keep_last to validate, got %v", err) + } + if err := validateKeepLast(intPtr(-1)); err == nil || !strings.Contains(err.Error(), ">= 0") { + t.Fatalf("expected negative keep_last validation error, got %v", err) + } + if err := validateKeepLast(intPtr(maxPolicyKeepLast + 1)); err == nil || !strings.Contains(err.Error(), "<=") { + t.Fatalf("expected too-large keep_last validation error, got %v", err) + } + if err := validateKeepLast(intPtr(maxPolicyKeepLast)); err != nil { + t.Fatalf("expected max valid keep_last to pass, got %v", err) + } +} + +func TestKeepLastStricterTruthTable(t *testing.T) { + testCases := []struct { + candidate int + current int + expected bool + }{ + {candidate: 5, current: 0, expected: true}, + {candidate: -1, current: 0, expected: true}, + {candidate: 0, current: 5, expected: false}, + {candidate: 0, current: 0, expected: false}, + {candidate: 5, current: 10, expected: true}, + {candidate: 10, current: 5, expected: false}, + } + + for _, tc := range testCases { + if got := keepLastStricter(tc.candidate, tc.current); got != tc.expected { + t.Fatalf("candidate=%d current=%d: expected %v, got %v", tc.candidate, tc.current, tc.expected, got) + } + } +} + +func nanValue() float64 { + return math.NaN() +} + +func infValue() float64 { + return math.Inf(1) +} + +func k8sBackupJobSummary(createdAt, completedAt time.Time) k8s.BackupJobSummary { + return k8s.BackupJobSummary{ + CreatedAt: createdAt, + CompletionTime: completedAt, + } +}