test(soteria): raise telemetry and utility coverage

2026-04-20 17:41:28 -03:00 · 2026-04-20 17:41:28 -03:00 · c5f922c6b7
commit c5f922c6b7
parent 0cda32777f
2 changed files with 521 additions and 0 deletions
--- a/internal/server/metrics_test.go
+++ b/internal/server/metrics_test.go
@ -0,0 +1,251 @@
+package server
+
+import (
+	"testing"
+	"time"
+
+	"scm.bstein.dev/bstein/soteria/internal/api"
+)
+
+func TestMetricValuesEmptyAndSetMetricClonesLabels(t *testing.T) {
+	if values := metricValues(map[string]metricSample{}); len(values) != 0 {
+		t.Fatalf("expected no metric values for empty source, got %#v", values)
+	}
+
+	target := map[string]metricSample{}
+	labels := map[string]string{
+		"namespace": "apps",
+		"pvc":       "data",
+	}
+
+	setMetric(target, labels, 42)
+	labels["namespace"] = "mutated"
+
+	values := metricValues(target)
+	if len(values) != 1 {
+		t.Fatalf("expected one metric sample, got %#v", values)
+	}
+	if values[0].labels["namespace"] != "apps" {
+		t.Fatalf("expected cloned labels to remain unchanged, got %#v", values[0].labels)
+	}
+	if values[0].value != 42 {
+		t.Fatalf("expected metric value 42, got %v", values[0].value)
+	}
+}
+
+func TestTelemetryRecordInventoryFailureIncrementsCounter(t *testing.T) {
+	telemetry := newTelemetry()
+
+	telemetry.RecordInventoryFailure()
+	telemetry.RecordInventoryFailure()
+
+	if telemetry.inventoryRefreshFailure != 2 {
+		t.Fatalf("expected inventory refresh failure count 2, got %v", telemetry.inventoryRefreshFailure)
+	}
+}
+
+func TestTelemetryRecordInventoryPopulatesAndResetsMetrics(t *testing.T) {
+	telemetry := newTelemetry()
+	recordedAt := time.Date(2026, 4, 20, 13, 14, 15, 0, time.UTC)
+
+	telemetry.RecordInventory(api.InventoryResponse{
+		Namespaces: []api.NamespaceInventory{
+			{
+				Name: "apps",
+				PVCs: []api.PVCInventory{
+					{
+						Namespace:            "apps",
+						PVC:                  "data",
+						Volume:               "pv-apps-data",
+						Driver:               "restic",
+						LastBackupAt:         recordedAt.Format(time.RFC3339),
+						LastBackupAgeHours:   2.5,
+						BackupCount:          3,
+						CompletedBackups:     2,
+						LastBackupSizeBytes:  512,
+						TotalBackupSizeBytes: 2048,
+						Healthy:              true,
+					},
+					{
+						Namespace:            "apps",
+						PVC:                  "cache",
+						Volume:               "pv-apps-cache",
+						Driver:               "restic",
+						LastBackupAt:         "not-a-time",
+						LastBackupAgeHours:   99,
+						BackupCount:          1,
+						CompletedBackups:     0,
+						LastBackupSizeBytes:  0,
+						TotalBackupSizeBytes: 0,
+						Healthy:              false,
+						HealthReason:         "stale",
+					},
+				},
+			},
+		},
+	})
+
+	dataKey := metricKey(map[string]string{
+		"namespace": "apps",
+		"pvc":       "data",
+		"volume":    "pv-apps-data",
+		"driver":    "restic",
+	})
+	cacheKey := metricKey(map[string]string{
+		"namespace": "apps",
+		"pvc":       "cache",
+		"volume":    "pv-apps-cache",
+		"driver":    "restic",
+	})
+	dataReasonKey := metricKey(map[string]string{
+		"namespace": "apps",
+		"pvc":       "data",
+		"volume":    "pv-apps-data",
+		"driver":    "restic",
+		"reason":    "unknown",
+	})
+	cacheReasonKey := metricKey(map[string]string{
+		"namespace": "apps",
+		"pvc":       "cache",
+		"volume":    "pv-apps-cache",
+		"driver":    "restic",
+		"reason":    "stale",
+	})
+
+	if telemetry.inventoryRefreshTime <= 0 {
+		t.Fatalf("expected inventory refresh time to be recorded, got %v", telemetry.inventoryRefreshTime)
+	}
+	if got := telemetry.pvcBackupCount[dataKey].value; got != 3 {
+		t.Fatalf("expected backup count 3 for data pvc, got %v", got)
+	}
+	if got := telemetry.pvcBackupCompletedCount[dataKey].value; got != 2 {
+		t.Fatalf("expected completed backup count 2, got %v", got)
+	}
+	if got := telemetry.pvcBackupLastSizeBytes[dataKey].value; got != 512 {
+		t.Fatalf("expected last backup size 512, got %v", got)
+	}
+	if got := telemetry.pvcBackupTotalSizeBytes[dataKey].value; got != 2048 {
+		t.Fatalf("expected total backup size 2048, got %v", got)
+	}
+	if got := telemetry.pvcBackupHealth[dataKey].value; got != 1 {
+		t.Fatalf("expected healthy pvc to emit 1, got %v", got)
+	}
+	if got := telemetry.pvcBackupHealth[cacheKey].value; got != 0 {
+		t.Fatalf("expected unhealthy pvc to emit 0, got %v", got)
+	}
+	if got := telemetry.pvcBackupHealthReason[dataReasonKey].value; got != 1 {
+		t.Fatalf("expected unknown health reason marker, got %v", got)
+	}
+	if got := telemetry.pvcBackupHealthReason[cacheReasonKey].value; got != 1 {
+		t.Fatalf("expected explicit stale health reason marker, got %v", got)
+	}
+	if got := telemetry.pvcBackupAgeHours[dataKey].value; got != 2.5 {
+		t.Fatalf("expected backup age 2.5 hours, got %v", got)
+	}
+	if got := telemetry.pvcBackupAgeHours[cacheKey].value; got != 99 {
+		t.Fatalf("expected invalid-time pvc to still expose age, got %v", got)
+	}
+	if got := telemetry.pvcBackupLastSuccess[dataKey].value; got != float64(recordedAt.Unix()) {
+		t.Fatalf("expected last success timestamp %d, got %v", recordedAt.Unix(), got)
+	}
+	if _, ok := telemetry.pvcBackupLastSuccess[cacheKey]; ok {
+		t.Fatalf("expected invalid last_backup_at to skip success timestamp")
+	}
+
+	telemetry.RecordInventory(api.InventoryResponse{})
+
+	if len(telemetry.pvcBackupCount) != 0 || len(telemetry.pvcBackupHealthReason) != 0 {
+		t.Fatalf("expected inventory metrics to reset on empty refresh, got counts=%d reasons=%d",
+			len(telemetry.pvcBackupCount), len(telemetry.pvcBackupHealthReason))
+	}
+}
+
+func TestTelemetryRecordB2UsageTracksBucketsAndFallbackTimestamp(t *testing.T) {
+	telemetry := newTelemetry()
+	scannedAt := time.Date(2026, 4, 20, 16, 30, 0, 0, time.UTC)
+	lastModifiedAt := scannedAt.Add(-45 * time.Minute)
+
+	telemetry.RecordB2Usage(api.B2UsageResponse{
+		Available:        true,
+		ScannedAt:        scannedAt.Format(time.RFC3339),
+		ScanDurationMS:   2750,
+		TotalObjects:     99,
+		TotalBytes:       123456,
+		RecentObjects24h: 7,
+		RecentBytes24h:   890,
+		Buckets: []api.B2BucketUsage{
+			{
+				Name:             "atlas-backups",
+				ObjectCount:      44,
+				TotalBytes:       1000,
+				RecentObjects24h: 3,
+				RecentBytes24h:   250,
+				LastModifiedAt:   lastModifiedAt.Format(time.RFC3339),
+			},
+			{
+				Name:             "atlas-logs",
+				ObjectCount:      55,
+				TotalBytes:       2000,
+				RecentObjects24h: 4,
+				RecentBytes24h:   640,
+				LastModifiedAt:   "invalid",
+			},
+		},
+	})
+
+	backupKey := metricKey(map[string]string{"bucket": "atlas-backups"})
+	logsKey := metricKey(map[string]string{"bucket": "atlas-logs"})
+
+	if telemetry.b2ScanSuccess != 1 {
+		t.Fatalf("expected successful scan marker, got %v", telemetry.b2ScanSuccess)
+	}
+	if telemetry.b2ScanTimestamp != float64(scannedAt.Unix()) {
+		t.Fatalf("expected scan timestamp %d, got %v", scannedAt.Unix(), telemetry.b2ScanTimestamp)
+	}
+	if telemetry.b2ScanDurationSeconds != 2.75 {
+		t.Fatalf("expected scan duration 2.75s, got %v", telemetry.b2ScanDurationSeconds)
+	}
+	if telemetry.b2AccountObjects != 99 || telemetry.b2AccountBytes != 123456 {
+		t.Fatalf("unexpected account usage totals: objects=%v bytes=%v", telemetry.b2AccountObjects, telemetry.b2AccountBytes)
+	}
+	if telemetry.b2AccountRecentObjects != 7 || telemetry.b2AccountRecentBytes != 890 {
+		t.Fatalf("unexpected recent account usage: objects=%v bytes=%v", telemetry.b2AccountRecentObjects, telemetry.b2AccountRecentBytes)
+	}
+	if got := telemetry.b2BucketObjects[backupKey].value; got != 44 {
+		t.Fatalf("expected atlas-backups object count 44, got %v", got)
+	}
+	if got := telemetry.b2BucketBytes[logsKey].value; got != 2000 {
+		t.Fatalf("expected atlas-logs bytes 2000, got %v", got)
+	}
+	if got := telemetry.b2BucketRecentBytes[backupKey].value; got != 250 {
+		t.Fatalf("expected atlas-backups recent bytes 250, got %v", got)
+	}
+	if got := telemetry.b2BucketLastModified[backupKey].value; got != float64(lastModifiedAt.Unix()) {
+		t.Fatalf("expected atlas-backups last modified timestamp %d, got %v", lastModifiedAt.Unix(), got)
+	}
+	if _, ok := telemetry.b2BucketLastModified[logsKey]; ok {
+		t.Fatalf("expected invalid bucket last modified timestamp to be ignored")
+	}
+
+	before := time.Now().Unix()
+	telemetry.RecordB2Usage(api.B2UsageResponse{
+		Available:      false,
+		ScannedAt:      "definitely-invalid",
+		ScanDurationMS: 500,
+	})
+	after := time.Now().Unix()
+
+	if telemetry.b2ScanSuccess != 0 {
+		t.Fatalf("expected failed scan marker after unavailable refresh, got %v", telemetry.b2ScanSuccess)
+	}
+	if telemetry.b2ScanDurationSeconds != 0.5 {
+		t.Fatalf("expected scan duration 0.5s, got %v", telemetry.b2ScanDurationSeconds)
+	}
+	if telemetry.b2ScanTimestamp < float64(before) || telemetry.b2ScanTimestamp > float64(after) {
+		t.Fatalf("expected invalid scanned_at to fall back to now, got %v not in [%d,%d]",
+			telemetry.b2ScanTimestamp, before, after)
+	}
+	if len(telemetry.b2BucketObjects) != 0 || len(telemetry.b2BucketLastModified) != 0 {
+		t.Fatalf("expected bucket metrics to reset when no buckets are provided")
+	}
+}
--- a/internal/server/server_utilities_test.go
+++ b/internal/server/server_utilities_test.go
@ -0,0 +1,270 @@
+package server
+
+import (
+	"math"
+	"regexp"
+	"strings"
+	"testing"
+	"time"
+
+	"scm.bstein.dev/bstein/soteria/internal/k8s"
+	"scm.bstein.dev/bstein/soteria/internal/longhorn"
+)
+
+func TestBuildBackupRecordsSortsAndMarksLatestCompleted(t *testing.T) {
+	records := buildBackupRecords([]longhorn.Backup{
+		{
+			Name:         "backup-b",
+			SnapshotName: "snap-b",
+			Created:      "not-a-time",
+			State:        "Completed",
+			URL:          "s3://bucket/backup-b",
+			Size:         "2Gi",
+		},
+		{
+			Name:         "backup-a",
+			SnapshotName: "snap-a",
+			Created:      "2026-04-20T10:00:00Z",
+			State:        "Completed",
+			URL:          "s3://bucket/backup-a",
+			Size:         "1Gi",
+		},
+		{
+			Name:         "backup-c",
+			SnapshotName: "snap-c",
+			Created:      "2026-04-20T11:00:00Z",
+			State:        "Failed",
+			URL:          "s3://bucket/backup-c",
+			Size:         "3Gi",
+		},
+	})
+
+	if len(records) != 3 {
+		t.Fatalf("expected three backup records, got %#v", records)
+	}
+	if records[0].Name != "backup-c" || records[1].Name != "backup-a" || records[2].Name != "backup-b" {
+		t.Fatalf("expected records sorted newest-first with invalid timestamps last, got %#v", records)
+	}
+	if records[1].Latest != true {
+		t.Fatalf("expected latest completed backup to be marked latest, got %#v", records[1])
+	}
+	if records[0].Latest || records[2].Latest {
+		t.Fatalf("expected only the latest completed backup to be marked latest, got %#v", records)
+	}
+}
+
+func TestBuildBackupRecordsWithoutCompletedBackupsLeavesLatestUnset(t *testing.T) {
+	records := buildBackupRecords([]longhorn.Backup{
+		{Name: "backup-b", Created: "2026-04-20T10:00:00Z", State: "Failed"},
+		{Name: "backup-a", Created: "2026-04-20T09:00:00Z", State: "Pending"},
+	})
+
+	if len(records) != 2 {
+		t.Fatalf("expected two backup records, got %#v", records)
+	}
+	if records[0].Latest || records[1].Latest {
+		t.Fatalf("expected no latest marker when no completed backups exist, got %#v", records)
+	}
+}
+
+func TestBuildBackupRecordsFallsBackToNameOrderForInvalidTimes(t *testing.T) {
+	records := buildBackupRecords([]longhorn.Backup{
+		{Name: "backup-a", Created: "invalid-a", State: "Completed"},
+		{Name: "backup-z", Created: "invalid-z", State: "Completed"},
+	})
+
+	if len(records) != 2 {
+		t.Fatalf("expected two backup records, got %#v", records)
+	}
+	if records[0].Name != "backup-z" || records[1].Name != "backup-a" {
+		t.Fatalf("expected invalid timestamps to sort by name fallback, got %#v", records)
+	}
+	if records[0].Latest || !records[1].Latest {
+		t.Fatalf("expected first invalid completed backup encountered pre-sort to retain latest marker, got %#v", records)
+	}
+}
+
+func TestDecodeResticSelectorCoversValidAndInvalidValues(t *testing.T) {
+	if repository, snapshot, ok := decodeResticSelector(""); ok || repository != "" || snapshot != "" {
+		t.Fatalf("expected empty selector to be invalid, got repo=%q snapshot=%q ok=%v", repository, snapshot, ok)
+	}
+
+	if repository, snapshot, ok := decodeResticSelector("latest"); !ok || repository != "" || snapshot != "latest" {
+		t.Fatalf("expected latest selector to decode, got repo=%q snapshot=%q ok=%v", repository, snapshot, ok)
+	}
+
+	encoded := encodeResticSelector(" s3://bucket/repository ")
+	if repository, snapshot, ok := decodeResticSelector(encoded); !ok || repository != "s3://bucket/repository" || snapshot != "latest" {
+		t.Fatalf("expected encoded selector to round-trip, got repo=%q snapshot=%q ok=%v", repository, snapshot, ok)
+	}
+
+	invalidInputs := []string{
+		"plain-text",
+		resticSelectorPrefix,
+		resticSelectorPrefix + "###",
+		resticSelectorPrefix + "ICAg",
+	}
+	for _, input := range invalidInputs {
+		if repository, snapshot, ok := decodeResticSelector(input); ok || repository != "" || snapshot != "" {
+			t.Fatalf("expected invalid selector %q to fail decode, got repo=%q snapshot=%q ok=%v", input, repository, snapshot, ok)
+		}
+	}
+}
+
+func TestBackupJobTimestampPrefersCompletionTime(t *testing.T) {
+	createdAt := time.Date(2026, 4, 20, 9, 0, 0, 0, time.UTC)
+	completedAt := createdAt.Add(15 * time.Minute)
+
+	if got := backupJobTimestamp(k8sBackupJobSummary(createdAt, completedAt)); !got.Equal(completedAt) {
+		t.Fatalf("expected completion timestamp to win, got %s", got)
+	}
+	if got := backupJobTimestamp(k8sBackupJobSummary(createdAt, time.Time{})); !got.Equal(createdAt) {
+		t.Fatalf("expected created timestamp fallback, got %s", got)
+	}
+}
+
+func TestBackupNameTruncatesToKubernetesSafeLength(t *testing.T) {
+	name := backupName("backup", strings.Repeat("very-long-volume-name-", 6))
+
+	if len(name) > 63 {
+		t.Fatalf("expected backup name <= 63 chars, got %d: %q", len(name), name)
+	}
+	if !strings.HasPrefix(name, "soteria-backup-") {
+		t.Fatalf("expected sanitized backup name prefix, got %q", name)
+	}
+	if matched, err := regexp.MatchString(`^[a-z0-9-]+-\d{8}-\d{6}$`, name); err != nil || !matched {
+		t.Fatalf("expected backup name to end with timestamp, got %q (matched=%v err=%v)", name, matched, err)
+	}
+}
+
+func TestBackupNameKeepsShortNamesReadable(t *testing.T) {
+	name := backupName("restore", "My.Volume_Name")
+
+	if len(name) > 63 {
+		t.Fatalf("expected short backup name <= 63 chars, got %d: %q", len(name), name)
+	}
+	if !strings.HasPrefix(name, "soteria-restore-my-volume-name-") {
+		t.Fatalf("expected readable sanitized prefix, got %q", name)
+	}
+}
+
+func TestBackupJobProgressPctCoversKnownStates(t *testing.T) {
+	testCases := []struct {
+		state    string
+		expected int
+	}{
+		{state: "pending", expected: 20},
+		{state: "running", expected: 70},
+		{state: "completed", expected: 100},
+		{state: "failed", expected: 100},
+		{state: "unknown", expected: 0},
+	}
+
+	for _, tc := range testCases {
+		if got := backupJobProgressPct(tc.state); got != tc.expected {
+			t.Fatalf("state=%q: expected %d, got %d", tc.state, tc.expected, got)
+		}
+	}
+}
+
+func TestParseSizeBytesHandlesNumericQuantityAndInvalidForms(t *testing.T) {
+	testCases := []struct {
+		name  string
+		raw   string
+		value int64
+	}{
+		{name: "blank", raw: "", value: 0},
+		{name: "int", raw: "123", value: 123},
+		{name: "float", raw: "42.9", value: 42},
+		{name: "negative-float", raw: "-1.5", value: 0},
+		{name: "quantity", raw: "2Gi", value: 2147483648},
+		{name: "invalid", raw: "definitely-not-a-size", value: 0},
+	}
+
+	for _, tc := range testCases {
+		if got := parseSizeBytes(tc.raw); got != tc.value {
+			t.Fatalf("%s: expected %d, got %d", tc.name, tc.value, got)
+		}
+	}
+}
+
+func TestFormatBytesIECCoversEdgeCasesAndUnits(t *testing.T) {
+	testCases := []struct {
+		name  string
+		value float64
+		want  string
+	}{
+		{name: "zero", value: 0, want: "0 B"},
+		{name: "nan", value: nanValue(), want: "0 B"},
+		{name: "inf", value: infValue(), want: "0 B"},
+		{name: "bytes", value: 12, want: "12 B"},
+		{name: "kib", value: 1536, want: "1.50 KiB"},
+	}
+
+	for _, tc := range testCases {
+		if got := formatBytesIEC(tc.value); got != tc.want {
+			t.Fatalf("%s: expected %q, got %q", tc.name, tc.want, got)
+		}
+	}
+}
+
+func TestKeepLastDefaultAndValidateKeepLast(t *testing.T) {
+	if got := keepLastDefault(nil); got != 0 {
+		t.Fatalf("expected nil keep_last default to 0, got %d", got)
+	}
+	if got := keepLastDefault(intPtr(-3)); got != 0 {
+		t.Fatalf("expected negative keep_last default to clamp to 0, got %d", got)
+	}
+	if got := keepLastDefault(intPtr(7)); got != 7 {
+		t.Fatalf("expected positive keep_last to pass through, got %d", got)
+	}
+
+	if err := validateKeepLast(nil); err != nil {
+		t.Fatalf("expected nil keep_last to validate, got %v", err)
+	}
+	if err := validateKeepLast(intPtr(-1)); err == nil || !strings.Contains(err.Error(), ">= 0") {
+		t.Fatalf("expected negative keep_last validation error, got %v", err)
+	}
+	if err := validateKeepLast(intPtr(maxPolicyKeepLast + 1)); err == nil || !strings.Contains(err.Error(), "<=") {
+		t.Fatalf("expected too-large keep_last validation error, got %v", err)
+	}
+	if err := validateKeepLast(intPtr(maxPolicyKeepLast)); err != nil {
+		t.Fatalf("expected max valid keep_last to pass, got %v", err)
+	}
+}
+
+func TestKeepLastStricterTruthTable(t *testing.T) {
+	testCases := []struct {
+		candidate int
+		current   int
+		expected  bool
+	}{
+		{candidate: 5, current: 0, expected: true},
+		{candidate: -1, current: 0, expected: true},
+		{candidate: 0, current: 5, expected: false},
+		{candidate: 0, current: 0, expected: false},
+		{candidate: 5, current: 10, expected: true},
+		{candidate: 10, current: 5, expected: false},
+	}
+
+	for _, tc := range testCases {
+		if got := keepLastStricter(tc.candidate, tc.current); got != tc.expected {
+			t.Fatalf("candidate=%d current=%d: expected %v, got %v", tc.candidate, tc.current, tc.expected, got)
+		}
+	}
+}
+
+func nanValue() float64 {
+	return math.NaN()
+}
+
+func infValue() float64 {
+	return math.Inf(1)
+}
+
+func k8sBackupJobSummary(createdAt, completedAt time.Time) k8s.BackupJobSummary {
+	return k8s.BackupJobSummary{
+		CreatedAt:      createdAt,
+		CompletionTime: completedAt,
+	}
+}