2026-04-12 11:09:49 -03:00
package server
import (
"fmt"
"net/http"
"sort"
"strings"
"sync"
"time"
"scm.bstein.dev/bstein/soteria/internal/api"
)
type metricSample struct {
labels map [ string ] string
value float64
}
type telemetry struct {
mu sync . RWMutex
backupRequests map [ string ] metricSample
restoreRequests map [ string ] metricSample
2026-04-12 14:32:39 -03:00
policyBackups map [ string ] metricSample
namespaceBackupRequests map [ string ] metricSample
namespaceRestoreReqs map [ string ] metricSample
2026-04-12 11:09:49 -03:00
authzDenials map [ string ] metricSample
inventoryRefreshFailure float64
inventoryRefreshTime float64
pvcBackupAgeHours map [ string ] metricSample
pvcBackupHealth map [ string ] metricSample
pvcBackupLastSuccess map [ string ] metricSample
pvcBackupCount map [ string ] metricSample
}
func newTelemetry ( ) * telemetry {
return & telemetry {
2026-04-12 14:32:39 -03:00
backupRequests : map [ string ] metricSample { } ,
restoreRequests : map [ string ] metricSample { } ,
policyBackups : map [ string ] metricSample { } ,
namespaceBackupRequests : map [ string ] metricSample { } ,
namespaceRestoreReqs : map [ string ] metricSample { } ,
authzDenials : map [ string ] metricSample { } ,
pvcBackupAgeHours : map [ string ] metricSample { } ,
pvcBackupHealth : map [ string ] metricSample { } ,
pvcBackupLastSuccess : map [ string ] metricSample { } ,
pvcBackupCount : map [ string ] metricSample { } ,
2026-04-12 11:09:49 -03:00
}
}
func ( t * telemetry ) Handler ( ) http . Handler {
return http . HandlerFunc ( func ( w http . ResponseWriter , _ * http . Request ) {
w . Header ( ) . Set ( "Content-Type" , "text/plain; version=0.0.4; charset=utf-8" )
_ , _ = w . Write ( [ ] byte ( t . render ( ) ) )
} )
}
func ( t * telemetry ) RecordBackupRequest ( driver , result string ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
incMetric ( t . backupRequests , map [ string ] string { "driver" : driver , "result" : result } )
}
func ( t * telemetry ) RecordRestoreRequest ( driver , result string ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
incMetric ( t . restoreRequests , map [ string ] string { "driver" : driver , "result" : result } )
}
2026-04-12 14:32:39 -03:00
func ( t * telemetry ) RecordPolicyBackup ( result string ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
incMetric ( t . policyBackups , map [ string ] string { "result" : result } )
}
func ( t * telemetry ) RecordNamespaceBackupRequest ( driver , result string ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
incMetric ( t . namespaceBackupRequests , map [ string ] string { "driver" : driver , "result" : result } )
}
func ( t * telemetry ) RecordNamespaceRestoreRequest ( driver , result string ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
incMetric ( t . namespaceRestoreReqs , map [ string ] string { "driver" : driver , "result" : result } )
}
2026-04-12 11:09:49 -03:00
func ( t * telemetry ) RecordAuthzDenied ( reason string ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
incMetric ( t . authzDenials , map [ string ] string { "reason" : reason } )
}
func ( t * telemetry ) RecordInventoryFailure ( ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
t . inventoryRefreshFailure ++
}
func ( t * telemetry ) RecordInventory ( inv api . InventoryResponse ) {
t . mu . Lock ( )
defer t . mu . Unlock ( )
t . pvcBackupAgeHours = map [ string ] metricSample { }
t . pvcBackupHealth = map [ string ] metricSample { }
t . pvcBackupLastSuccess = map [ string ] metricSample { }
t . pvcBackupCount = map [ string ] metricSample { }
for _ , namespace := range inv . Namespaces {
for _ , pvc := range namespace . PVCs {
labels := map [ string ] string {
"namespace" : pvc . Namespace ,
"pvc" : pvc . PVC ,
"volume" : pvc . Volume ,
"driver" : pvc . Driver ,
}
setMetric ( t . pvcBackupCount , labels , float64 ( pvc . BackupCount ) )
if pvc . Healthy {
setMetric ( t . pvcBackupHealth , labels , 1 )
} else {
setMetric ( t . pvcBackupHealth , labels , 0 )
}
if pvc . LastBackupAt == "" {
continue
}
setMetric ( t . pvcBackupAgeHours , labels , pvc . LastBackupAgeHours )
if ts , ok := parseBackupTime ( pvc . LastBackupAt ) ; ok {
setMetric ( t . pvcBackupLastSuccess , labels , float64 ( ts . Unix ( ) ) )
}
}
}
t . inventoryRefreshTime = float64 ( time . Now ( ) . Unix ( ) )
}
func ( t * telemetry ) render ( ) string {
t . mu . RLock ( )
defer t . mu . RUnlock ( )
var b strings . Builder
writeMetricFamily ( & b , "soteria_backup_requests_total" , "counter" , "Backup requests handled by Soteria." , metricValues ( t . backupRequests ) )
writeMetricFamily ( & b , "soteria_restore_requests_total" , "counter" , "Restore requests handled by Soteria." , metricValues ( t . restoreRequests ) )
2026-04-12 14:32:39 -03:00
writeMetricFamily ( & b , "soteria_policy_backups_total" , "counter" , "Policy scheduler backup execution outcomes." , metricValues ( t . policyBackups ) )
writeMetricFamily ( & b , "soteria_namespace_backup_requests_total" , "counter" , "Namespace-level backup request outcomes." , metricValues ( t . namespaceBackupRequests ) )
writeMetricFamily ( & b , "soteria_namespace_restore_requests_total" , "counter" , "Namespace-level restore request outcomes." , metricValues ( t . namespaceRestoreReqs ) )
2026-04-12 11:09:49 -03:00
writeMetricFamily ( & b , "soteria_authz_denials_total" , "counter" , "Authorization denials emitted by Soteria." , metricValues ( t . authzDenials ) )
writeMetricFamily ( & b , "soteria_inventory_refresh_failures_total" , "counter" , "Inventory refresh failures while computing PVC backup telemetry." , [ ] metricSample { { value : t . inventoryRefreshFailure } } )
writeMetricFamily ( & b , "soteria_inventory_refresh_timestamp_seconds" , "gauge" , "Unix timestamp of the last successful inventory refresh." , [ ] metricSample { { value : t . inventoryRefreshTime } } )
writeMetricFamily ( & b , "pvc_backup_age_hours" , "gauge" , "Age in hours of the latest successful PVC backup known to Soteria." , metricValues ( t . pvcBackupAgeHours ) )
writeMetricFamily ( & b , "pvc_backup_health" , "gauge" , "PVC backup health according to Soteria: 1=fresh backup within policy, 0=missing/stale/error." , metricValues ( t . pvcBackupHealth ) )
writeMetricFamily ( & b , "pvc_backup_last_success_timestamp_seconds" , "gauge" , "Unix timestamp of the latest successful PVC backup known to Soteria." , metricValues ( t . pvcBackupLastSuccess ) )
writeMetricFamily ( & b , "pvc_backup_count" , "gauge" , "Count of backup records discovered for a PVC." , metricValues ( t . pvcBackupCount ) )
return b . String ( )
}
func metricValues ( source map [ string ] metricSample ) [ ] metricSample {
keys := make ( [ ] string , 0 , len ( source ) )
for key := range source {
keys = append ( keys , key )
}
sort . Strings ( keys )
values := make ( [ ] metricSample , 0 , len ( keys ) )
for _ , key := range keys {
values = append ( values , source [ key ] )
}
return values
}
func writeMetricFamily ( b * strings . Builder , name , metricType , help string , samples [ ] metricSample ) {
b . WriteString ( "# HELP " )
b . WriteString ( name )
b . WriteString ( " " )
b . WriteString ( help )
b . WriteString ( "\n" )
b . WriteString ( "# TYPE " )
b . WriteString ( name )
b . WriteString ( " " )
b . WriteString ( metricType )
b . WriteString ( "\n" )
for _ , sample := range samples {
b . WriteString ( name )
b . WriteString ( renderLabels ( sample . labels ) )
b . WriteString ( " " )
b . WriteString ( fmt . Sprintf ( "%g" , sample . value ) )
b . WriteString ( "\n" )
}
}
func renderLabels ( labels map [ string ] string ) string {
if len ( labels ) == 0 {
return ""
}
keys := make ( [ ] string , 0 , len ( labels ) )
for key := range labels {
keys = append ( keys , key )
}
sort . Strings ( keys )
parts := make ( [ ] string , 0 , len ( keys ) )
for _ , key := range keys {
parts = append ( parts , fmt . Sprintf ( "%s=%q" , key , labels [ key ] ) )
}
return "{" + strings . Join ( parts , "," ) + "}"
}
func metricKey ( labels map [ string ] string ) string {
return renderLabels ( labels )
}
func incMetric ( target map [ string ] metricSample , labels map [ string ] string ) {
key := metricKey ( labels )
sample , ok := target [ key ]
if ! ok {
target [ key ] = metricSample { labels : cloneLabels ( labels ) , value : 1 }
return
}
sample . value ++
target [ key ] = sample
}
func setMetric ( target map [ string ] metricSample , labels map [ string ] string , value float64 ) {
key := metricKey ( labels )
target [ key ] = metricSample { labels : cloneLabels ( labels ) , value : value }
}
func cloneLabels ( labels map [ string ] string ) map [ string ] string {
out := make ( map [ string ] string , len ( labels ) )
for key , value := range labels {
out [ key ] = value
}
return out
}