diff --git a/internal/api/types.go b/internal/api/types.go index d7b9c9a..678df2c 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -99,6 +99,7 @@ type AuthInfoResponse struct { User string `json:"user,omitempty"` Email string `json:"email,omitempty"` Groups []string `json:"groups,omitempty"` + AllowedGroups []string `json:"allowed_groups,omitempty"` } type BackupPolicy struct { diff --git a/internal/k8s/jobs.go b/internal/k8s/jobs.go index d25e238..5132b8e 100644 --- a/internal/k8s/jobs.go +++ b/internal/k8s/jobs.go @@ -32,6 +32,26 @@ type BackupJobSummary struct { State string } +func (c *Client) ListBackupJobs(ctx context.Context, namespace string) ([]BackupJobSummary, error) { + selector := fmt.Sprintf("%s=soteria,%s=backup,%s=backup", labelAppName, labelComponent, labelAction) + jobs, err := c.Clientset.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return nil, fmt.Errorf("list backup jobs for namespace %s: %w", namespace, err) + } + + out := make([]BackupJobSummary, 0, len(jobs.Items)) + for _, job := range jobs.Items { + pvc := strings.TrimSpace(job.Labels[labelPVC]) + if pvc == "" { + continue + } + out = append(out, summarizeBackupJob(job, pvc)) + } + + sortBackupJobSummaries(out) + return out, nil +} + func (c *Client) ListBackupJobsForPVC(ctx context.Context, namespace, pvc string) ([]BackupJobSummary, error) { selector := fmt.Sprintf("%s=soteria,%s=backup,%s=backup,%s=%s", labelAppName, labelComponent, labelAction, labelPVC, pvc) jobs, err := c.Clientset.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) @@ -41,43 +61,51 @@ func (c *Client) ListBackupJobsForPVC(ctx context.Context, namespace, pvc string out := make([]BackupJobSummary, 0, len(jobs.Items)) for _, job := range jobs.Items { - summary := BackupJobSummary{ - Name: job.Name, - Namespace: job.Namespace, - PVC: pvc, - CreatedAt: job.CreationTimestamp.Time, - State: "Pending", - } - if job.Status.CompletionTime != nil { - summary.CompletionTime = job.Status.CompletionTime.Time - } - switch { - case job.Status.Succeeded > 0: - summary.State = "Completed" - case job.Status.Failed > 0: - summary.State = "Failed" - case job.Status.Active > 0: - summary.State = "Running" - } - out = append(out, summary) + out = append(out, summarizeBackupJob(job, pvc)) } - sort.Slice(out, func(i, j int) bool { - left := out[i].CompletionTime + sortBackupJobSummaries(out) + + return out, nil +} + +func summarizeBackupJob(job batchv1.Job, pvc string) BackupJobSummary { + summary := BackupJobSummary{ + Name: job.Name, + Namespace: job.Namespace, + PVC: pvc, + CreatedAt: job.CreationTimestamp.Time, + State: "Pending", + } + if job.Status.CompletionTime != nil { + summary.CompletionTime = job.Status.CompletionTime.Time + } + switch { + case job.Status.Succeeded > 0: + summary.State = "Completed" + case job.Status.Failed > 0: + summary.State = "Failed" + case job.Status.Active > 0: + summary.State = "Running" + } + return summary +} + +func sortBackupJobSummaries(items []BackupJobSummary) { + sort.Slice(items, func(i, j int) bool { + left := items[i].CompletionTime if left.IsZero() { - left = out[i].CreatedAt + left = items[i].CreatedAt } - right := out[j].CompletionTime + right := items[j].CompletionTime if right.IsZero() { - right = out[j].CreatedAt + right = items[j].CreatedAt } if left.Equal(right) { - return out[i].Name > out[j].Name + return items[i].Name > items[j].Name } return left.After(right) }) - - return out, nil } func (c *Client) CreateBackupJob(ctx context.Context, cfg *config.Config, req api.BackupRequest) (string, string, error) { diff --git a/internal/server/server.go b/internal/server/server.go index b7b98fb..0c64517 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -28,6 +28,7 @@ type kubeClient interface { ResolvePVCVolume(ctx context.Context, namespace, pvcName string) (string, *corev1.PersistentVolumeClaim, *corev1.PersistentVolume, error) CreateBackupJob(ctx context.Context, cfg *config.Config, req api.BackupRequest) (string, string, error) CreateRestoreJob(ctx context.Context, cfg *config.Config, req api.RestoreTestRequest) (string, string, error) + ListBackupJobs(ctx context.Context, namespace string) ([]k8s.BackupJobSummary, error) ListBackupJobsForPVC(ctx context.Context, namespace, pvc string) ([]k8s.BackupJobSummary, error) ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error) PersistentVolumeClaimExists(ctx context.Context, namespace, pvcName string) (bool, error) @@ -227,6 +228,7 @@ func (s *Server) handleWhoAmI(w http.ResponseWriter, r *http.Request) { User: identity.User, Email: identity.Email, Groups: identity.Groups, + AllowedGroups: s.cfg.AllowedGroups, }) } @@ -847,6 +849,8 @@ func (s *Server) buildInventory(ctx context.Context) (api.InventoryResponse, err return api.InventoryResponse{}, err } + resticJobsByPVC, resticLookupErrors := s.prefetchResticBackupJobs(ctx, pvcs) + groups := make(map[string][]api.PVCInventory) for _, summary := range pvcs { entry := api.PVCInventory{ @@ -859,7 +863,7 @@ func (s *Server) buildInventory(ctx context.Context) (api.InventoryResponse, err AccessModes: summary.AccessModes, Driver: s.cfg.BackupDriver, } - s.enrichPVCInventory(ctx, &entry) + s.enrichPVCInventory(ctx, &entry, resticJobsByPVC, resticLookupErrors) groups[summary.Namespace] = append(groups[summary.Namespace], entry) } @@ -882,7 +886,49 @@ func (s *Server) buildInventory(ctx context.Context) (api.InventoryResponse, err return response, nil } -func (s *Server) enrichPVCInventory(ctx context.Context, entry *api.PVCInventory) { +func (s *Server) prefetchResticBackupJobs(ctx context.Context, pvcs []k8s.PVCSummary) (map[string][]k8s.BackupJobSummary, map[string]error) { + if s.cfg.BackupDriver != "restic" { + return nil, nil + } + + namespaces := map[string]struct{}{} + for _, pvc := range pvcs { + namespaces[pvc.Namespace] = struct{}{} + } + + namespaceNames := make([]string, 0, len(namespaces)) + for namespace := range namespaces { + namespaceNames = append(namespaceNames, namespace) + } + sort.Strings(namespaceNames) + + jobsByPVC := map[string][]k8s.BackupJobSummary{} + lookupErrors := map[string]error{} + for _, namespace := range namespaceNames { + jobs, err := s.client.ListBackupJobs(ctx, namespace) + if err != nil { + lookupErrors[namespace] = err + continue + } + for _, job := range jobs { + key := job.Namespace + "/" + job.PVC + jobsByPVC[key] = append(jobsByPVC[key], job) + } + } + + for key := range jobsByPVC { + sortBackupJobsNewestFirst(jobsByPVC[key]) + } + + return jobsByPVC, lookupErrors +} + +func (s *Server) enrichPVCInventory( + ctx context.Context, + entry *api.PVCInventory, + resticJobsByPVC map[string][]k8s.BackupJobSummary, + resticLookupErrors map[string]error, +) { switch s.cfg.BackupDriver { case "longhorn": backups, err := s.longhorn.ListBackups(ctx, entry.Volume) @@ -928,13 +974,18 @@ func (s *Server) enrichPVCInventory(ctx context.Context, entry *api.PVCInventory entry.HealthReason = "stale" } case "restic": - jobs, err := s.client.ListBackupJobsForPVC(ctx, entry.Namespace, entry.PVC) - if err != nil { + if err, hasErr := resticLookupErrors[entry.Namespace]; hasErr { entry.Healthy = false entry.HealthReason = "lookup_failed" entry.Error = err.Error() return } + + key := entry.Namespace + "/" + entry.PVC + jobs := resticJobsByPVC[key] + if jobs == nil { + jobs = []k8s.BackupJobSummary{} + } entry.BackupCount = len(jobs) if len(jobs) > 0 { entry.LastJobName = jobs[0].Name @@ -990,6 +1041,23 @@ func (s *Server) enrichPVCInventory(ctx context.Context, entry *api.PVCInventory } } +func sortBackupJobsNewestFirst(items []k8s.BackupJobSummary) { + sort.Slice(items, func(i, j int) bool { + left := items[i].CompletionTime + if left.IsZero() { + left = items[i].CreatedAt + } + right := items[j].CompletionTime + if right.IsZero() { + right = items[j].CreatedAt + } + if left.Equal(right) { + return items[i].Name > items[j].Name + } + return left.After(right) + }) +} + func (s *Server) refreshTelemetry(ctx context.Context) { refreshCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 7901681..20f03f7 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -52,6 +52,30 @@ func (f *fakeKubeClient) ListBackupJobsForPVC(_ context.Context, namespace, pvc return out, nil } +func (f *fakeKubeClient) ListBackupJobs(_ context.Context, namespace string) ([]k8s.BackupJobSummary, error) { + if f.backupJobs == nil { + return nil, nil + } + out := []k8s.BackupJobSummary{} + for key, items := range f.backupJobs { + prefix := namespace + "/" + if !strings.HasPrefix(key, prefix) { + continue + } + for _, item := range items { + copyItem := item + if copyItem.Namespace == "" { + copyItem.Namespace = namespace + } + if copyItem.PVC == "" { + copyItem.PVC = strings.TrimPrefix(key, prefix) + } + out = append(out, copyItem) + } + } + return out, nil +} + func (f *fakeKubeClient) PersistentVolumeClaimExists(_ context.Context, _, _ string) (bool, error) { return f.targetExists, nil } diff --git a/web/src/App.tsx b/web/src/App.tsx index ac51a6a..f8a3291 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -6,6 +6,7 @@ interface AuthInfo { user?: string; email?: string; groups?: string[]; + allowed_groups?: string[]; } interface BackupRecord { @@ -113,6 +114,27 @@ const EMPTY_B2: B2UsageResponse = { buckets: [] }; +function looksLikeHTML(value: string): boolean { + const sample = value.trim().slice(0, 512).toLowerCase(); + return sample.startsWith('\s*([^<]+)\s*<\/title>/i); + return match?.[1]?.trim() || ''; +} + +function extractRequestID(value: string): string { + const match = value.match(/Request ID:\s*([0-9a-f-]+)/i); + return match?.[1]?.trim() || ''; +} + +function delay(ms: number): Promise { + return new Promise((resolve) => { + window.setTimeout(resolve, ms); + }); +} + async function fetchJSON(input: string, init?: RequestInit): Promise { const response = await fetch(input, init); const text = await response.text(); @@ -125,16 +147,27 @@ async function fetchJSON(input: string, init?: RequestInit): Promise { } } if (!response.ok) { - const message = typeof payload === 'object' && payload !== null && 'error' in payload + let message = typeof payload === 'object' && payload !== null && 'error' in payload ? String((payload as { error: unknown }).error) : `${response.status} ${response.statusText}`; + if (looksLikeHTML(text)) { + const title = extractHTMLTitle(text); + const requestID = extractRequestID(text); + message = `upstream gateway error (${response.status}${title ? ` ${title}` : ''})`; + if (requestID) { + message = `${message}; request id ${requestID}`; + } + } throw new Error(message); } return payload as T; } function formatBytes(value?: number): string { - if (!value || value <= 0) { + if (value === undefined || value === null || Number.isNaN(value)) { + return 'n/a'; + } + if (value <= 0) { return '0 B'; } const units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; @@ -278,16 +311,31 @@ function App() { }; const loadInventory = async (): Promise => { + const fetchInventory = async (): Promise => fetchJSON('/v1/inventory'); try { - const payload = await fetchJSON('/v1/inventory'); + const payload = await fetchInventory(); setInventory(payload); setInventoryError(''); if (!policyNamespace && payload.namespaces.length > 0) { setPolicyNamespace(payload.namespaces[0].name); } } catch (error) { - setInventory(null); - setInventoryError(error instanceof Error ? error.message : 'failed to load inventory'); + let message = error instanceof Error ? error.message : 'failed to load inventory'; + if (message.includes('upstream gateway error')) { + try { + await delay(1000); + const retry = await fetchInventory(); + setInventory(retry); + setInventoryError(''); + if (!policyNamespace && retry.namespaces.length > 0) { + setPolicyNamespace(retry.namespaces[0].name); + } + return; + } catch (retryError) { + message = retryError instanceof Error ? retryError.message : message; + } + } + setInventoryError(message); } }; @@ -510,8 +558,11 @@ function App() { }; const authLabel = auth - ? `${auth.user || auth.email || 'authenticated'} (${(auth.groups || []).join(', ') || 'no groups'})` + ? `${auth.user || auth.email || 'authenticated'} | groups: ${(auth.groups || []).join(', ') || 'none'}` : authError || 'anonymous'; + const allowedGroupLabel = auth?.allowed_groups && auth.allowed_groups.length > 0 + ? `Access requires: ${auth.allowed_groups.join(', ')}` + : 'Access requires: any authenticated user'; return (
@@ -522,6 +573,7 @@ function App() {
{authLabel} + {allowedGroupLabel} {activeBackupCount > 0 && ( {activeBackupCount} backup job{activeBackupCount === 1 ? '' : 's'} active @@ -561,6 +613,8 @@ function App() { const progressPct = Math.max(0, Math.min(100, Number(pvc.last_job_progress_pct || 0))); const progressClass = progressChipClass(pvc.last_job_state); const showProgress = Boolean(pvc.last_job_name) || (pvc.active_backups || 0) > 0; + const latestSizeLabel = pvc.driver === 'restic' ? 'n/a' : formatBytes(pvc.last_backup_size_bytes); + const totalStoredLabel = pvc.driver === 'restic' ? 'n/a' : formatBytes(pvc.total_backup_size_bytes); return (
@@ -577,8 +631,11 @@ function App() { Last backup: {pvc.last_backup_at ? `${formatTimestamp(pvc.last_backup_at)} (${(pvc.last_backup_age_hours || 0).toFixed(1)}h ago)` : 'never'}

- Backups: {pvc.completed_backups}/{pvc.backup_count} completed | Latest size: {formatBytes(pvc.last_backup_size_bytes)} | Total stored: {formatBytes(pvc.total_backup_size_bytes)} + Backups: {pvc.completed_backups}/{pvc.backup_count} completed | Latest size: {latestSizeLabel} | Total stored: {totalStoredLabel}

+ {pvc.driver === 'restic' && ( +

Per-PVC size is not currently emitted for restic snapshots because repository storage is deduplicated and shared.

+ )} {showProgress && (
@@ -752,6 +809,7 @@ function App() {

Backup Policies

+

Policy backups create new restic snapshots, but unchanged blocks are deduplicated, so repeated runs do not re-upload identical data.