package server import ( "fmt" "net/http" "sort" "strings" "sync" "time" "scm.bstein.dev/bstein/soteria/internal/api" ) type metricSample struct { labels map[string]string value float64 } type telemetry struct { mu sync.RWMutex backupRequests map[string]metricSample restoreRequests map[string]metricSample authzDenials map[string]metricSample inventoryRefreshFailure float64 inventoryRefreshTime float64 pvcBackupAgeHours map[string]metricSample pvcBackupHealth map[string]metricSample pvcBackupLastSuccess map[string]metricSample pvcBackupCount map[string]metricSample } func newTelemetry() *telemetry { return &telemetry{ backupRequests: map[string]metricSample{}, restoreRequests: map[string]metricSample{}, authzDenials: map[string]metricSample{}, pvcBackupAgeHours: map[string]metricSample{}, pvcBackupHealth: map[string]metricSample{}, pvcBackupLastSuccess: map[string]metricSample{}, pvcBackupCount: map[string]metricSample{}, } } func (t *telemetry) Handler() http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") _, _ = w.Write([]byte(t.render())) }) } func (t *telemetry) RecordBackupRequest(driver, result string) { t.mu.Lock() defer t.mu.Unlock() incMetric(t.backupRequests, map[string]string{"driver": driver, "result": result}) } func (t *telemetry) RecordRestoreRequest(driver, result string) { t.mu.Lock() defer t.mu.Unlock() incMetric(t.restoreRequests, map[string]string{"driver": driver, "result": result}) } func (t *telemetry) RecordAuthzDenied(reason string) { t.mu.Lock() defer t.mu.Unlock() incMetric(t.authzDenials, map[string]string{"reason": reason}) } func (t *telemetry) RecordInventoryFailure() { t.mu.Lock() defer t.mu.Unlock() t.inventoryRefreshFailure++ } func (t *telemetry) RecordInventory(inv api.InventoryResponse) { t.mu.Lock() defer t.mu.Unlock() t.pvcBackupAgeHours = map[string]metricSample{} t.pvcBackupHealth = map[string]metricSample{} t.pvcBackupLastSuccess = map[string]metricSample{} t.pvcBackupCount = map[string]metricSample{} for _, namespace := range inv.Namespaces { for _, pvc := range namespace.PVCs { labels := map[string]string{ "namespace": pvc.Namespace, "pvc": pvc.PVC, "volume": pvc.Volume, "driver": pvc.Driver, } setMetric(t.pvcBackupCount, labels, float64(pvc.BackupCount)) if pvc.Healthy { setMetric(t.pvcBackupHealth, labels, 1) } else { setMetric(t.pvcBackupHealth, labels, 0) } if pvc.LastBackupAt == "" { continue } setMetric(t.pvcBackupAgeHours, labels, pvc.LastBackupAgeHours) if ts, ok := parseBackupTime(pvc.LastBackupAt); ok { setMetric(t.pvcBackupLastSuccess, labels, float64(ts.Unix())) } } } t.inventoryRefreshTime = float64(time.Now().Unix()) } func (t *telemetry) render() string { t.mu.RLock() defer t.mu.RUnlock() var b strings.Builder writeMetricFamily(&b, "soteria_backup_requests_total", "counter", "Backup requests handled by Soteria.", metricValues(t.backupRequests)) writeMetricFamily(&b, "soteria_restore_requests_total", "counter", "Restore requests handled by Soteria.", metricValues(t.restoreRequests)) writeMetricFamily(&b, "soteria_authz_denials_total", "counter", "Authorization denials emitted by Soteria.", metricValues(t.authzDenials)) writeMetricFamily(&b, "soteria_inventory_refresh_failures_total", "counter", "Inventory refresh failures while computing PVC backup telemetry.", []metricSample{{value: t.inventoryRefreshFailure}}) writeMetricFamily(&b, "soteria_inventory_refresh_timestamp_seconds", "gauge", "Unix timestamp of the last successful inventory refresh.", []metricSample{{value: t.inventoryRefreshTime}}) writeMetricFamily(&b, "pvc_backup_age_hours", "gauge", "Age in hours of the latest successful PVC backup known to Soteria.", metricValues(t.pvcBackupAgeHours)) writeMetricFamily(&b, "pvc_backup_health", "gauge", "PVC backup health according to Soteria: 1=fresh backup within policy, 0=missing/stale/error.", metricValues(t.pvcBackupHealth)) writeMetricFamily(&b, "pvc_backup_last_success_timestamp_seconds", "gauge", "Unix timestamp of the latest successful PVC backup known to Soteria.", metricValues(t.pvcBackupLastSuccess)) writeMetricFamily(&b, "pvc_backup_count", "gauge", "Count of backup records discovered for a PVC.", metricValues(t.pvcBackupCount)) return b.String() } func metricValues(source map[string]metricSample) []metricSample { keys := make([]string, 0, len(source)) for key := range source { keys = append(keys, key) } sort.Strings(keys) values := make([]metricSample, 0, len(keys)) for _, key := range keys { values = append(values, source[key]) } return values } func writeMetricFamily(b *strings.Builder, name, metricType, help string, samples []metricSample) { b.WriteString("# HELP ") b.WriteString(name) b.WriteString(" ") b.WriteString(help) b.WriteString("\n") b.WriteString("# TYPE ") b.WriteString(name) b.WriteString(" ") b.WriteString(metricType) b.WriteString("\n") for _, sample := range samples { b.WriteString(name) b.WriteString(renderLabels(sample.labels)) b.WriteString(" ") b.WriteString(fmt.Sprintf("%g", sample.value)) b.WriteString("\n") } } func renderLabels(labels map[string]string) string { if len(labels) == 0 { return "" } keys := make([]string, 0, len(labels)) for key := range labels { keys = append(keys, key) } sort.Strings(keys) parts := make([]string, 0, len(keys)) for _, key := range keys { parts = append(parts, fmt.Sprintf("%s=%q", key, labels[key])) } return "{" + strings.Join(parts, ",") + "}" } func metricKey(labels map[string]string) string { return renderLabels(labels) } func incMetric(target map[string]metricSample, labels map[string]string) { key := metricKey(labels) sample, ok := target[key] if !ok { target[key] = metricSample{labels: cloneLabels(labels), value: 1} return } sample.value++ target[key] = sample } func setMetric(target map[string]metricSample, labels map[string]string, value float64) { key := metricKey(labels) target[key] = metricSample{labels: cloneLabels(labels), value: value} } func cloneLabels(labels map[string]string) map[string]string { out := make(map[string]string, len(labels)) for key, value := range labels { out[key] = value } return out }