ui+inventory: clarify auth, size telemetry, and harden inventory fetch

This commit is contained in:
Brad Stein 2026-04-13 12:03:14 -03:00
parent 3f203b2c14
commit 9e210ceffb
5 changed files with 217 additions and 38 deletions

View File

@ -99,6 +99,7 @@ type AuthInfoResponse struct {
User string `json:"user,omitempty"`
Email string `json:"email,omitempty"`
Groups []string `json:"groups,omitempty"`
AllowedGroups []string `json:"allowed_groups,omitempty"`
}
type BackupPolicy struct {

View File

@ -32,6 +32,26 @@ type BackupJobSummary struct {
State string
}
func (c *Client) ListBackupJobs(ctx context.Context, namespace string) ([]BackupJobSummary, error) {
selector := fmt.Sprintf("%s=soteria,%s=backup,%s=backup", labelAppName, labelComponent, labelAction)
jobs, err := c.Clientset.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector})
if err != nil {
return nil, fmt.Errorf("list backup jobs for namespace %s: %w", namespace, err)
}
out := make([]BackupJobSummary, 0, len(jobs.Items))
for _, job := range jobs.Items {
pvc := strings.TrimSpace(job.Labels[labelPVC])
if pvc == "" {
continue
}
out = append(out, summarizeBackupJob(job, pvc))
}
sortBackupJobSummaries(out)
return out, nil
}
func (c *Client) ListBackupJobsForPVC(ctx context.Context, namespace, pvc string) ([]BackupJobSummary, error) {
selector := fmt.Sprintf("%s=soteria,%s=backup,%s=backup,%s=%s", labelAppName, labelComponent, labelAction, labelPVC, pvc)
jobs, err := c.Clientset.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector})
@ -41,43 +61,51 @@ func (c *Client) ListBackupJobsForPVC(ctx context.Context, namespace, pvc string
out := make([]BackupJobSummary, 0, len(jobs.Items))
for _, job := range jobs.Items {
summary := BackupJobSummary{
Name: job.Name,
Namespace: job.Namespace,
PVC: pvc,
CreatedAt: job.CreationTimestamp.Time,
State: "Pending",
}
if job.Status.CompletionTime != nil {
summary.CompletionTime = job.Status.CompletionTime.Time
}
switch {
case job.Status.Succeeded > 0:
summary.State = "Completed"
case job.Status.Failed > 0:
summary.State = "Failed"
case job.Status.Active > 0:
summary.State = "Running"
}
out = append(out, summary)
out = append(out, summarizeBackupJob(job, pvc))
}
sort.Slice(out, func(i, j int) bool {
left := out[i].CompletionTime
sortBackupJobSummaries(out)
return out, nil
}
func summarizeBackupJob(job batchv1.Job, pvc string) BackupJobSummary {
summary := BackupJobSummary{
Name: job.Name,
Namespace: job.Namespace,
PVC: pvc,
CreatedAt: job.CreationTimestamp.Time,
State: "Pending",
}
if job.Status.CompletionTime != nil {
summary.CompletionTime = job.Status.CompletionTime.Time
}
switch {
case job.Status.Succeeded > 0:
summary.State = "Completed"
case job.Status.Failed > 0:
summary.State = "Failed"
case job.Status.Active > 0:
summary.State = "Running"
}
return summary
}
func sortBackupJobSummaries(items []BackupJobSummary) {
sort.Slice(items, func(i, j int) bool {
left := items[i].CompletionTime
if left.IsZero() {
left = out[i].CreatedAt
left = items[i].CreatedAt
}
right := out[j].CompletionTime
right := items[j].CompletionTime
if right.IsZero() {
right = out[j].CreatedAt
right = items[j].CreatedAt
}
if left.Equal(right) {
return out[i].Name > out[j].Name
return items[i].Name > items[j].Name
}
return left.After(right)
})
return out, nil
}
func (c *Client) CreateBackupJob(ctx context.Context, cfg *config.Config, req api.BackupRequest) (string, string, error) {

View File

@ -28,6 +28,7 @@ type kubeClient interface {
ResolvePVCVolume(ctx context.Context, namespace, pvcName string) (string, *corev1.PersistentVolumeClaim, *corev1.PersistentVolume, error)
CreateBackupJob(ctx context.Context, cfg *config.Config, req api.BackupRequest) (string, string, error)
CreateRestoreJob(ctx context.Context, cfg *config.Config, req api.RestoreTestRequest) (string, string, error)
ListBackupJobs(ctx context.Context, namespace string) ([]k8s.BackupJobSummary, error)
ListBackupJobsForPVC(ctx context.Context, namespace, pvc string) ([]k8s.BackupJobSummary, error)
ListBoundPVCs(ctx context.Context) ([]k8s.PVCSummary, error)
PersistentVolumeClaimExists(ctx context.Context, namespace, pvcName string) (bool, error)
@ -227,6 +228,7 @@ func (s *Server) handleWhoAmI(w http.ResponseWriter, r *http.Request) {
User: identity.User,
Email: identity.Email,
Groups: identity.Groups,
AllowedGroups: s.cfg.AllowedGroups,
})
}
@ -847,6 +849,8 @@ func (s *Server) buildInventory(ctx context.Context) (api.InventoryResponse, err
return api.InventoryResponse{}, err
}
resticJobsByPVC, resticLookupErrors := s.prefetchResticBackupJobs(ctx, pvcs)
groups := make(map[string][]api.PVCInventory)
for _, summary := range pvcs {
entry := api.PVCInventory{
@ -859,7 +863,7 @@ func (s *Server) buildInventory(ctx context.Context) (api.InventoryResponse, err
AccessModes: summary.AccessModes,
Driver: s.cfg.BackupDriver,
}
s.enrichPVCInventory(ctx, &entry)
s.enrichPVCInventory(ctx, &entry, resticJobsByPVC, resticLookupErrors)
groups[summary.Namespace] = append(groups[summary.Namespace], entry)
}
@ -882,7 +886,49 @@ func (s *Server) buildInventory(ctx context.Context) (api.InventoryResponse, err
return response, nil
}
func (s *Server) enrichPVCInventory(ctx context.Context, entry *api.PVCInventory) {
func (s *Server) prefetchResticBackupJobs(ctx context.Context, pvcs []k8s.PVCSummary) (map[string][]k8s.BackupJobSummary, map[string]error) {
if s.cfg.BackupDriver != "restic" {
return nil, nil
}
namespaces := map[string]struct{}{}
for _, pvc := range pvcs {
namespaces[pvc.Namespace] = struct{}{}
}
namespaceNames := make([]string, 0, len(namespaces))
for namespace := range namespaces {
namespaceNames = append(namespaceNames, namespace)
}
sort.Strings(namespaceNames)
jobsByPVC := map[string][]k8s.BackupJobSummary{}
lookupErrors := map[string]error{}
for _, namespace := range namespaceNames {
jobs, err := s.client.ListBackupJobs(ctx, namespace)
if err != nil {
lookupErrors[namespace] = err
continue
}
for _, job := range jobs {
key := job.Namespace + "/" + job.PVC
jobsByPVC[key] = append(jobsByPVC[key], job)
}
}
for key := range jobsByPVC {
sortBackupJobsNewestFirst(jobsByPVC[key])
}
return jobsByPVC, lookupErrors
}
func (s *Server) enrichPVCInventory(
ctx context.Context,
entry *api.PVCInventory,
resticJobsByPVC map[string][]k8s.BackupJobSummary,
resticLookupErrors map[string]error,
) {
switch s.cfg.BackupDriver {
case "longhorn":
backups, err := s.longhorn.ListBackups(ctx, entry.Volume)
@ -928,13 +974,18 @@ func (s *Server) enrichPVCInventory(ctx context.Context, entry *api.PVCInventory
entry.HealthReason = "stale"
}
case "restic":
jobs, err := s.client.ListBackupJobsForPVC(ctx, entry.Namespace, entry.PVC)
if err != nil {
if err, hasErr := resticLookupErrors[entry.Namespace]; hasErr {
entry.Healthy = false
entry.HealthReason = "lookup_failed"
entry.Error = err.Error()
return
}
key := entry.Namespace + "/" + entry.PVC
jobs := resticJobsByPVC[key]
if jobs == nil {
jobs = []k8s.BackupJobSummary{}
}
entry.BackupCount = len(jobs)
if len(jobs) > 0 {
entry.LastJobName = jobs[0].Name
@ -990,6 +1041,23 @@ func (s *Server) enrichPVCInventory(ctx context.Context, entry *api.PVCInventory
}
}
func sortBackupJobsNewestFirst(items []k8s.BackupJobSummary) {
sort.Slice(items, func(i, j int) bool {
left := items[i].CompletionTime
if left.IsZero() {
left = items[i].CreatedAt
}
right := items[j].CompletionTime
if right.IsZero() {
right = items[j].CreatedAt
}
if left.Equal(right) {
return items[i].Name > items[j].Name
}
return left.After(right)
})
}
func (s *Server) refreshTelemetry(ctx context.Context) {
refreshCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
defer cancel()

View File

@ -52,6 +52,30 @@ func (f *fakeKubeClient) ListBackupJobsForPVC(_ context.Context, namespace, pvc
return out, nil
}
func (f *fakeKubeClient) ListBackupJobs(_ context.Context, namespace string) ([]k8s.BackupJobSummary, error) {
if f.backupJobs == nil {
return nil, nil
}
out := []k8s.BackupJobSummary{}
for key, items := range f.backupJobs {
prefix := namespace + "/"
if !strings.HasPrefix(key, prefix) {
continue
}
for _, item := range items {
copyItem := item
if copyItem.Namespace == "" {
copyItem.Namespace = namespace
}
if copyItem.PVC == "" {
copyItem.PVC = strings.TrimPrefix(key, prefix)
}
out = append(out, copyItem)
}
}
return out, nil
}
func (f *fakeKubeClient) PersistentVolumeClaimExists(_ context.Context, _, _ string) (bool, error) {
return f.targetExists, nil
}

View File

@ -6,6 +6,7 @@ interface AuthInfo {
user?: string;
email?: string;
groups?: string[];
allowed_groups?: string[];
}
interface BackupRecord {
@ -113,6 +114,27 @@ const EMPTY_B2: B2UsageResponse = {
buckets: []
};
function looksLikeHTML(value: string): boolean {
const sample = value.trim().slice(0, 512).toLowerCase();
return sample.startsWith('<!doctype html') || sample.includes('<html');
}
function extractHTMLTitle(value: string): string {
const match = value.match(/<title>\s*([^<]+)\s*<\/title>/i);
return match?.[1]?.trim() || '';
}
function extractRequestID(value: string): string {
const match = value.match(/Request ID:\s*([0-9a-f-]+)/i);
return match?.[1]?.trim() || '';
}
function delay(ms: number): Promise<void> {
return new Promise((resolve) => {
window.setTimeout(resolve, ms);
});
}
async function fetchJSON<T>(input: string, init?: RequestInit): Promise<T> {
const response = await fetch(input, init);
const text = await response.text();
@ -125,16 +147,27 @@ async function fetchJSON<T>(input: string, init?: RequestInit): Promise<T> {
}
}
if (!response.ok) {
const message = typeof payload === 'object' && payload !== null && 'error' in payload
let message = typeof payload === 'object' && payload !== null && 'error' in payload
? String((payload as { error: unknown }).error)
: `${response.status} ${response.statusText}`;
if (looksLikeHTML(text)) {
const title = extractHTMLTitle(text);
const requestID = extractRequestID(text);
message = `upstream gateway error (${response.status}${title ? ` ${title}` : ''})`;
if (requestID) {
message = `${message}; request id ${requestID}`;
}
}
throw new Error(message);
}
return payload as T;
}
function formatBytes(value?: number): string {
if (!value || value <= 0) {
if (value === undefined || value === null || Number.isNaN(value)) {
return 'n/a';
}
if (value <= 0) {
return '0 B';
}
const units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
@ -278,16 +311,31 @@ function App() {
};
const loadInventory = async (): Promise<void> => {
const fetchInventory = async (): Promise<InventoryResponse> => fetchJSON<InventoryResponse>('/v1/inventory');
try {
const payload = await fetchJSON<InventoryResponse>('/v1/inventory');
const payload = await fetchInventory();
setInventory(payload);
setInventoryError('');
if (!policyNamespace && payload.namespaces.length > 0) {
setPolicyNamespace(payload.namespaces[0].name);
}
} catch (error) {
setInventory(null);
setInventoryError(error instanceof Error ? error.message : 'failed to load inventory');
let message = error instanceof Error ? error.message : 'failed to load inventory';
if (message.includes('upstream gateway error')) {
try {
await delay(1000);
const retry = await fetchInventory();
setInventory(retry);
setInventoryError('');
if (!policyNamespace && retry.namespaces.length > 0) {
setPolicyNamespace(retry.namespaces[0].name);
}
return;
} catch (retryError) {
message = retryError instanceof Error ? retryError.message : message;
}
}
setInventoryError(message);
}
};
@ -510,8 +558,11 @@ function App() {
};
const authLabel = auth
? `${auth.user || auth.email || 'authenticated'} (${(auth.groups || []).join(', ') || 'no groups'})`
? `${auth.user || auth.email || 'authenticated'} | groups: ${(auth.groups || []).join(', ') || 'none'}`
: authError || 'anonymous';
const allowedGroupLabel = auth?.allowed_groups && auth.allowed_groups.length > 0
? `Access requires: ${auth.allowed_groups.join(', ')}`
: 'Access requires: any authenticated user';
return (
<div className="app-shell">
@ -522,6 +573,7 @@ function App() {
</div>
<div className="toolbar">
<span className={`chip ${auth ? 'good' : 'warn'}`}>{authLabel}</span>
<span className="chip">{allowedGroupLabel}</span>
{activeBackupCount > 0 && (
<span className="chip warn">
{activeBackupCount} backup job{activeBackupCount === 1 ? '' : 's'} active
@ -561,6 +613,8 @@ function App() {
const progressPct = Math.max(0, Math.min(100, Number(pvc.last_job_progress_pct || 0)));
const progressClass = progressChipClass(pvc.last_job_state);
const showProgress = Boolean(pvc.last_job_name) || (pvc.active_backups || 0) > 0;
const latestSizeLabel = pvc.driver === 'restic' ? 'n/a' : formatBytes(pvc.last_backup_size_bytes);
const totalStoredLabel = pvc.driver === 'restic' ? 'n/a' : formatBytes(pvc.total_backup_size_bytes);
return (
<article key={`${pvc.namespace}/${pvc.pvc}`} className="pvc-card">
@ -577,8 +631,11 @@ function App() {
Last backup: {pvc.last_backup_at ? `${formatTimestamp(pvc.last_backup_at)} (${(pvc.last_backup_age_hours || 0).toFixed(1)}h ago)` : 'never'}
</p>
<p className="subtle tiny">
Backups: {pvc.completed_backups}/{pvc.backup_count} completed | Latest size: {formatBytes(pvc.last_backup_size_bytes)} | Total stored: {formatBytes(pvc.total_backup_size_bytes)}
Backups: {pvc.completed_backups}/{pvc.backup_count} completed | Latest size: {latestSizeLabel} | Total stored: {totalStoredLabel}
</p>
{pvc.driver === 'restic' && (
<p className="subtle tiny">Per-PVC size is not currently emitted for restic snapshots because repository storage is deduplicated and shared.</p>
)}
{showProgress && (
<div className="backup-progress">
<div className="progress-header">
@ -752,6 +809,7 @@ function App() {
<section className="panel scroll-panel">
<h2>Backup Policies</h2>
<p className="subtle tiny">Policy backups create new restic snapshots, but unchanged blocks are deduplicated, so repeated runs do not re-upload identical data.</p>
<div className="stack">
<label>
Namespace