test(soteria): cover policy cycle and start loops
This commit is contained in:
parent
7b592edb6d
commit
5c9cca4420
@ -2,10 +2,14 @@ package server
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/api"
|
||||||
"scm.bstein.dev/bstein/soteria/internal/config"
|
"scm.bstein.dev/bstein/soteria/internal/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -62,3 +66,52 @@ func TestRefreshB2UsageRecordsScanErrorsAfterCredentialsResolve(t *testing.T) {
|
|||||||
t.Fatalf("expected failure metrics after scan error, got success=%f duration=%f", srv.metrics.b2ScanSuccess, srv.metrics.b2ScanDurationSeconds)
|
t.Fatalf("expected failure metrics after scan error, got success=%f duration=%f", srv.metrics.b2ScanSuccess, srv.metrics.b2ScanDurationSeconds)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHandleB2UsageRefreshesWhenForcedAndQueryBoolRecognizesTruthyValues(t *testing.T) {
|
||||||
|
for _, raw := range []string{"1", "true", "yes", "y", "on", "TRUE"} {
|
||||||
|
if !queryBool(raw) {
|
||||||
|
t.Fatalf("expected %q to be treated as truthy", raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if queryBool("nope") {
|
||||||
|
t.Fatalf("expected invalid truthy value to be false")
|
||||||
|
}
|
||||||
|
|
||||||
|
srv := &Server{
|
||||||
|
cfg: &config.Config{
|
||||||
|
B2Enabled: true,
|
||||||
|
B2Endpoint: "https://",
|
||||||
|
B2AccessKeyID: "atlas-key",
|
||||||
|
B2SecretAccessKey: "atlas-secret",
|
||||||
|
B2ScanTimeout: time.Second,
|
||||||
|
},
|
||||||
|
client: &fakeKubeClient{},
|
||||||
|
metrics: newTelemetry(),
|
||||||
|
}
|
||||||
|
srv.handler = http.HandlerFunc(srv.route)
|
||||||
|
srv.setB2Usage(api.B2UsageResponse{
|
||||||
|
Enabled: true,
|
||||||
|
Available: true,
|
||||||
|
ScannedAt: time.Now().UTC().Add(-1 * time.Hour).Format(time.RFC3339),
|
||||||
|
Endpoint: "cached-endpoint",
|
||||||
|
})
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/v1/b2?refresh=yes", nil)
|
||||||
|
res := httptest.NewRecorder()
|
||||||
|
srv.Handler().ServeHTTP(res, req)
|
||||||
|
|
||||||
|
if res.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected forced refresh request to succeed, got %d: %s", res.Code, res.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var payload api.B2UsageResponse
|
||||||
|
if err := json.Unmarshal(res.Body.Bytes(), &payload); err != nil {
|
||||||
|
t.Fatalf("decode forced refresh payload: %v", err)
|
||||||
|
}
|
||||||
|
if payload.Error == "" || !strings.Contains(payload.Error, "S3 endpoint host is empty") {
|
||||||
|
t.Fatalf("expected refreshed B2 failure payload, got %#v", payload)
|
||||||
|
}
|
||||||
|
if payload.Endpoint != "https://" {
|
||||||
|
t.Fatalf("expected refreshed endpoint to replace cached snapshot, got %#v", payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
156
internal/server/policy_cycle_test.go
Normal file
156
internal/server/policy_cycle_test.go
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/api"
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/k8s"
|
||||||
|
)
|
||||||
|
|
||||||
|
type policyCycleTestKubeClient struct {
|
||||||
|
*inventoryTestKubeClient
|
||||||
|
createBackupErrForPVC map[string]error
|
||||||
|
backupRequests []api.BackupRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *policyCycleTestKubeClient) CreateBackupJob(ctx context.Context, cfg *config.Config, req api.BackupRequest) (string, string, error) {
|
||||||
|
k.backupRequests = append(k.backupRequests, req)
|
||||||
|
if err := k.createBackupErrForPVC[req.PVC]; err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
return k.inventoryTestKubeClient.fakeKubeClient.CreateBackupJob(ctx, cfg, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricCount(samples map[string]metricSample, labels map[string]string) float64 {
|
||||||
|
sample, ok := samples[metricKey(labels)]
|
||||||
|
if !ok {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return sample.value
|
||||||
|
}
|
||||||
|
|
||||||
|
func findBackupRequestByPVC(items []api.BackupRequest, pvc string) (api.BackupRequest, bool) {
|
||||||
|
for _, item := range items {
|
||||||
|
if item.PVC == pvc {
|
||||||
|
return item, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return api.BackupRequest{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunPolicyCycleCoversInventoryErrorAndConcurrentGuard(t *testing.T) {
|
||||||
|
client := &policyCycleTestKubeClient{
|
||||||
|
inventoryTestKubeClient: &inventoryTestKubeClient{
|
||||||
|
fakeKubeClient: &fakeKubeClient{},
|
||||||
|
listPVCsErr: errors.New("inventory exploded"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
srv := &Server{
|
||||||
|
cfg: &config.Config{
|
||||||
|
BackupDriver: "restic",
|
||||||
|
BackupMaxAge: 24 * time.Hour,
|
||||||
|
PolicyEvalInterval: time.Minute,
|
||||||
|
},
|
||||||
|
client: client,
|
||||||
|
longhorn: &fakeLonghornClient{},
|
||||||
|
metrics: newTelemetry(),
|
||||||
|
policies: map[string]api.BackupPolicy{
|
||||||
|
"apps__all": {ID: "apps__all", Namespace: "apps", IntervalHours: 6, Enabled: true, Dedupe: true},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
srv.runPolicyCycle(context.Background())
|
||||||
|
if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "inventory_error"}); got != 1 {
|
||||||
|
t.Fatalf("expected inventory_error policy metric, got %f", got)
|
||||||
|
}
|
||||||
|
if srv.running {
|
||||||
|
t.Fatalf("expected policy runner lock to be released after inventory failure")
|
||||||
|
}
|
||||||
|
|
||||||
|
srv.running = true
|
||||||
|
srv.runPolicyCycle(context.Background())
|
||||||
|
if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "inventory_error"}); got != 1 {
|
||||||
|
t.Fatalf("expected concurrent guard to skip second run, got %f", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunPolicyCycleCoversEffectivePoliciesAndResultTracking(t *testing.T) {
|
||||||
|
now := time.Now().UTC()
|
||||||
|
recent := now.Add(-30 * time.Minute)
|
||||||
|
client := &policyCycleTestKubeClient{
|
||||||
|
inventoryTestKubeClient: &inventoryTestKubeClient{
|
||||||
|
fakeKubeClient: &fakeKubeClient{
|
||||||
|
pvcs: []k8s.PVCSummary{
|
||||||
|
{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"},
|
||||||
|
{Namespace: "apps", Name: "busy", VolumeName: "vol-busy", Phase: "Bound"},
|
||||||
|
{Namespace: "apps", Name: "recent", VolumeName: "vol-recent", Phase: "Bound"},
|
||||||
|
{Namespace: "apps", Name: "attempted", VolumeName: "vol-attempted", Phase: "Bound"},
|
||||||
|
{Namespace: "apps", Name: "fail", VolumeName: "vol-fail", Phase: "Bound"},
|
||||||
|
},
|
||||||
|
backupJobs: map[string][]k8s.BackupJobSummary{
|
||||||
|
"apps/busy": {
|
||||||
|
{Name: "job-busy", Namespace: "apps", PVC: "busy", State: "Running", CreatedAt: recent},
|
||||||
|
},
|
||||||
|
"apps/recent": {
|
||||||
|
{Name: "job-recent", Namespace: "apps", PVC: "recent", State: "Completed", CreatedAt: recent, CompletionTime: recent, KeepLast: 1},
|
||||||
|
},
|
||||||
|
"apps/attempted": {
|
||||||
|
{Name: "job-attempted", Namespace: "apps", PVC: "attempted", State: "Failed", CreatedAt: recent, KeepLast: 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
createBackupErrForPVC: map[string]error{
|
||||||
|
"fail": errors.New("create backup job exploded"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
srv := &Server{
|
||||||
|
cfg: &config.Config{
|
||||||
|
BackupDriver: "restic",
|
||||||
|
BackupMaxAge: 24 * time.Hour,
|
||||||
|
ResticRepository: "s3:https://repo/root",
|
||||||
|
},
|
||||||
|
client: client,
|
||||||
|
longhorn: &fakeLonghornClient{},
|
||||||
|
metrics: newTelemetry(),
|
||||||
|
policies: map[string]api.BackupPolicy{
|
||||||
|
"apps__all": {ID: "apps__all", Namespace: "apps", IntervalHours: 6, Enabled: true, Dedupe: true, KeepLast: 1},
|
||||||
|
"apps__data": {ID: "apps__data", Namespace: "apps", PVC: "data", IntervalHours: 1, Enabled: true, Dedupe: false, KeepLast: 2},
|
||||||
|
"apps__disabled": {ID: "apps__disabled", Namespace: "apps", PVC: "disabled", IntervalHours: 1, Enabled: false, Dedupe: false, KeepLast: 5},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
srv.runPolicyCycle(context.Background())
|
||||||
|
|
||||||
|
if len(client.backupRequests) != 2 {
|
||||||
|
t.Fatalf("expected two executed backup requests, got %#v", client.backupRequests)
|
||||||
|
}
|
||||||
|
dataReq, ok := findBackupRequestByPVC(client.backupRequests, "data")
|
||||||
|
if !ok || dataReq.Dedupe == nil || *dataReq.Dedupe != false || dataReq.KeepLast == nil || *dataReq.KeepLast != 2 {
|
||||||
|
t.Fatalf("expected stricter PVC policy to win for data, got %#v", client.backupRequests)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "success"}); got != 1 {
|
||||||
|
t.Fatalf("expected one successful policy backup, got %f", got)
|
||||||
|
}
|
||||||
|
if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "backend_error"}); got != 1 {
|
||||||
|
t.Fatalf("expected one backend error policy backup, got %f", got)
|
||||||
|
}
|
||||||
|
if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "in_progress"}); got != 1 {
|
||||||
|
t.Fatalf("expected one in-progress policy backup, got %f", got)
|
||||||
|
}
|
||||||
|
if got := metricCount(srv.metrics.policyBackups, map[string]string{"result": "not_due"}); got != 2 {
|
||||||
|
t.Fatalf("expected two not-due policy backups, got %f", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := metricCount(srv.metrics.backupRequests, map[string]string{"driver": "restic", "result": "success"}); got != 1 {
|
||||||
|
t.Fatalf("expected one successful executed backup request, got %f", got)
|
||||||
|
}
|
||||||
|
if got := metricCount(srv.metrics.backupRequests, map[string]string{"driver": "restic", "result": "backend_error"}); got != 1 {
|
||||||
|
t.Fatalf("expected one failed executed backup request, got %f", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
125
internal/server/server_start_test.go
Normal file
125
internal/server/server_start_test.go
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/api"
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/soteria/internal/k8s"
|
||||||
|
)
|
||||||
|
|
||||||
|
type startTestKubeClient struct {
|
||||||
|
*fakeKubeClient
|
||||||
|
mu sync.Mutex
|
||||||
|
loadCalls int
|
||||||
|
listPVCCalls int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *startTestKubeClient) LoadSecretData(ctx context.Context, namespace, secretName, key string) ([]byte, error) {
|
||||||
|
k.mu.Lock()
|
||||||
|
k.loadCalls++
|
||||||
|
k.mu.Unlock()
|
||||||
|
return k.fakeKubeClient.LoadSecretData(ctx, namespace, secretName, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *startTestKubeClient) ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error) {
|
||||||
|
k.mu.Lock()
|
||||||
|
k.listPVCCalls++
|
||||||
|
k.mu.Unlock()
|
||||||
|
return k.fakeKubeClient.ListBoundPVCs(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *startTestKubeClient) counts() (int, int) {
|
||||||
|
k.mu.Lock()
|
||||||
|
defer k.mu.Unlock()
|
||||||
|
return k.loadCalls, k.listPVCCalls
|
||||||
|
}
|
||||||
|
|
||||||
|
func newStartTestServer(cfg *config.Config, client kubeClient) *Server {
|
||||||
|
return &Server{
|
||||||
|
cfg: cfg,
|
||||||
|
client: client,
|
||||||
|
longhorn: &fakeLonghornClient{},
|
||||||
|
metrics: newTelemetry(),
|
||||||
|
ui: newUIRenderer(),
|
||||||
|
policies: map[string]api.BackupPolicy{},
|
||||||
|
jobUsage: map[string]resticJobUsageCacheEntry{},
|
||||||
|
usageStore: map[string]resticPersistedUsageEntry{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStartRunsInitialLoadAndTickerLoopWithoutB2(t *testing.T) {
|
||||||
|
client := &startTestKubeClient{
|
||||||
|
fakeKubeClient: &fakeKubeClient{
|
||||||
|
pvcs: []k8s.PVCSummary{{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
srv := newStartTestServer(&config.Config{
|
||||||
|
Namespace: "atlas",
|
||||||
|
PolicySecretName: "soteria-policies",
|
||||||
|
UsageSecretName: "soteria-usage",
|
||||||
|
BackupDriver: "longhorn",
|
||||||
|
BackupMaxAge: 24 * time.Hour,
|
||||||
|
MetricsRefreshInterval: 10 * time.Millisecond,
|
||||||
|
PolicyEvalInterval: 10 * time.Millisecond,
|
||||||
|
B2Enabled: false,
|
||||||
|
}, client)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
srv.Start(ctx)
|
||||||
|
time.Sleep(35 * time.Millisecond)
|
||||||
|
cancel()
|
||||||
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
|
||||||
|
loadCalls, listPVCCalls := client.counts()
|
||||||
|
if loadCalls < 2 {
|
||||||
|
t.Fatalf("expected initial policy/usage secret loads, got %d", loadCalls)
|
||||||
|
}
|
||||||
|
if listPVCCalls < 2 {
|
||||||
|
t.Fatalf("expected inventory refresh to run initially and on ticker, got %d", listPVCCalls)
|
||||||
|
}
|
||||||
|
if srv.metrics.inventoryRefreshTime == 0 {
|
||||||
|
t.Fatalf("expected inventory metrics to be recorded after start loop")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStartRunsB2TickerAndStoresRefreshFailures(t *testing.T) {
|
||||||
|
client := &startTestKubeClient{
|
||||||
|
fakeKubeClient: &fakeKubeClient{
|
||||||
|
pvcs: []k8s.PVCSummary{{Namespace: "apps", Name: "data", VolumeName: "vol-data", Phase: "Bound"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
srv := newStartTestServer(&config.Config{
|
||||||
|
Namespace: "atlas",
|
||||||
|
PolicySecretName: "soteria-policies",
|
||||||
|
UsageSecretName: "soteria-usage",
|
||||||
|
BackupDriver: "longhorn",
|
||||||
|
BackupMaxAge: 24 * time.Hour,
|
||||||
|
MetricsRefreshInterval: 10 * time.Millisecond,
|
||||||
|
PolicyEvalInterval: 10 * time.Millisecond,
|
||||||
|
B2Enabled: true,
|
||||||
|
B2Endpoint: "https://",
|
||||||
|
B2AccessKeyID: "atlas-key",
|
||||||
|
B2SecretAccessKey: "atlas-secret",
|
||||||
|
B2ScanInterval: 10 * time.Millisecond,
|
||||||
|
B2ScanTimeout: 10 * time.Millisecond,
|
||||||
|
}, client)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
srv.Start(ctx)
|
||||||
|
time.Sleep(35 * time.Millisecond)
|
||||||
|
cancel()
|
||||||
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
|
||||||
|
usage := srv.getB2Usage()
|
||||||
|
if !usage.Enabled || usage.Error == "" || !strings.Contains(usage.Error, "S3 endpoint host is empty") {
|
||||||
|
t.Fatalf("expected B2 ticker refresh failure snapshot, got %#v", usage)
|
||||||
|
}
|
||||||
|
if srv.metrics.b2ScanTimestamp == 0 {
|
||||||
|
t.Fatalf("expected B2 scan metrics to be recorded during start loop")
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user