204 lines
6.4 KiB
Go
204 lines
6.4 KiB
Go
package server
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/soteria/internal/api"
|
|
)
|
|
|
|
type metricSample struct {
|
|
labels map[string]string
|
|
value float64
|
|
}
|
|
|
|
type telemetry struct {
|
|
mu sync.RWMutex
|
|
backupRequests map[string]metricSample
|
|
restoreRequests map[string]metricSample
|
|
authzDenials map[string]metricSample
|
|
inventoryRefreshFailure float64
|
|
inventoryRefreshTime float64
|
|
pvcBackupAgeHours map[string]metricSample
|
|
pvcBackupHealth map[string]metricSample
|
|
pvcBackupLastSuccess map[string]metricSample
|
|
pvcBackupCount map[string]metricSample
|
|
}
|
|
|
|
func newTelemetry() *telemetry {
|
|
return &telemetry{
|
|
backupRequests: map[string]metricSample{},
|
|
restoreRequests: map[string]metricSample{},
|
|
authzDenials: map[string]metricSample{},
|
|
pvcBackupAgeHours: map[string]metricSample{},
|
|
pvcBackupHealth: map[string]metricSample{},
|
|
pvcBackupLastSuccess: map[string]metricSample{},
|
|
pvcBackupCount: map[string]metricSample{},
|
|
}
|
|
}
|
|
|
|
func (t *telemetry) Handler() http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
|
_, _ = w.Write([]byte(t.render()))
|
|
})
|
|
}
|
|
|
|
func (t *telemetry) RecordBackupRequest(driver, result string) {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
incMetric(t.backupRequests, map[string]string{"driver": driver, "result": result})
|
|
}
|
|
|
|
func (t *telemetry) RecordRestoreRequest(driver, result string) {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
incMetric(t.restoreRequests, map[string]string{"driver": driver, "result": result})
|
|
}
|
|
|
|
func (t *telemetry) RecordAuthzDenied(reason string) {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
incMetric(t.authzDenials, map[string]string{"reason": reason})
|
|
}
|
|
|
|
func (t *telemetry) RecordInventoryFailure() {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
t.inventoryRefreshFailure++
|
|
}
|
|
|
|
func (t *telemetry) RecordInventory(inv api.InventoryResponse) {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
|
|
t.pvcBackupAgeHours = map[string]metricSample{}
|
|
t.pvcBackupHealth = map[string]metricSample{}
|
|
t.pvcBackupLastSuccess = map[string]metricSample{}
|
|
t.pvcBackupCount = map[string]metricSample{}
|
|
|
|
for _, namespace := range inv.Namespaces {
|
|
for _, pvc := range namespace.PVCs {
|
|
labels := map[string]string{
|
|
"namespace": pvc.Namespace,
|
|
"pvc": pvc.PVC,
|
|
"volume": pvc.Volume,
|
|
"driver": pvc.Driver,
|
|
}
|
|
setMetric(t.pvcBackupCount, labels, float64(pvc.BackupCount))
|
|
if pvc.Healthy {
|
|
setMetric(t.pvcBackupHealth, labels, 1)
|
|
} else {
|
|
setMetric(t.pvcBackupHealth, labels, 0)
|
|
}
|
|
if pvc.LastBackupAt == "" {
|
|
continue
|
|
}
|
|
setMetric(t.pvcBackupAgeHours, labels, pvc.LastBackupAgeHours)
|
|
if ts, ok := parseBackupTime(pvc.LastBackupAt); ok {
|
|
setMetric(t.pvcBackupLastSuccess, labels, float64(ts.Unix()))
|
|
}
|
|
}
|
|
}
|
|
|
|
t.inventoryRefreshTime = float64(time.Now().Unix())
|
|
}
|
|
|
|
func (t *telemetry) render() string {
|
|
t.mu.RLock()
|
|
defer t.mu.RUnlock()
|
|
|
|
var b strings.Builder
|
|
writeMetricFamily(&b, "soteria_backup_requests_total", "counter", "Backup requests handled by Soteria.", metricValues(t.backupRequests))
|
|
writeMetricFamily(&b, "soteria_restore_requests_total", "counter", "Restore requests handled by Soteria.", metricValues(t.restoreRequests))
|
|
writeMetricFamily(&b, "soteria_authz_denials_total", "counter", "Authorization denials emitted by Soteria.", metricValues(t.authzDenials))
|
|
writeMetricFamily(&b, "soteria_inventory_refresh_failures_total", "counter", "Inventory refresh failures while computing PVC backup telemetry.", []metricSample{{value: t.inventoryRefreshFailure}})
|
|
writeMetricFamily(&b, "soteria_inventory_refresh_timestamp_seconds", "gauge", "Unix timestamp of the last successful inventory refresh.", []metricSample{{value: t.inventoryRefreshTime}})
|
|
writeMetricFamily(&b, "pvc_backup_age_hours", "gauge", "Age in hours of the latest successful PVC backup known to Soteria.", metricValues(t.pvcBackupAgeHours))
|
|
writeMetricFamily(&b, "pvc_backup_health", "gauge", "PVC backup health according to Soteria: 1=fresh backup within policy, 0=missing/stale/error.", metricValues(t.pvcBackupHealth))
|
|
writeMetricFamily(&b, "pvc_backup_last_success_timestamp_seconds", "gauge", "Unix timestamp of the latest successful PVC backup known to Soteria.", metricValues(t.pvcBackupLastSuccess))
|
|
writeMetricFamily(&b, "pvc_backup_count", "gauge", "Count of backup records discovered for a PVC.", metricValues(t.pvcBackupCount))
|
|
return b.String()
|
|
}
|
|
|
|
func metricValues(source map[string]metricSample) []metricSample {
|
|
keys := make([]string, 0, len(source))
|
|
for key := range source {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
values := make([]metricSample, 0, len(keys))
|
|
for _, key := range keys {
|
|
values = append(values, source[key])
|
|
}
|
|
return values
|
|
}
|
|
|
|
func writeMetricFamily(b *strings.Builder, name, metricType, help string, samples []metricSample) {
|
|
b.WriteString("# HELP ")
|
|
b.WriteString(name)
|
|
b.WriteString(" ")
|
|
b.WriteString(help)
|
|
b.WriteString("\n")
|
|
b.WriteString("# TYPE ")
|
|
b.WriteString(name)
|
|
b.WriteString(" ")
|
|
b.WriteString(metricType)
|
|
b.WriteString("\n")
|
|
for _, sample := range samples {
|
|
b.WriteString(name)
|
|
b.WriteString(renderLabels(sample.labels))
|
|
b.WriteString(" ")
|
|
b.WriteString(fmt.Sprintf("%g", sample.value))
|
|
b.WriteString("\n")
|
|
}
|
|
}
|
|
|
|
func renderLabels(labels map[string]string) string {
|
|
if len(labels) == 0 {
|
|
return ""
|
|
}
|
|
keys := make([]string, 0, len(labels))
|
|
for key := range labels {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
parts := make([]string, 0, len(keys))
|
|
for _, key := range keys {
|
|
parts = append(parts, fmt.Sprintf("%s=%q", key, labels[key]))
|
|
}
|
|
return "{" + strings.Join(parts, ",") + "}"
|
|
}
|
|
|
|
func metricKey(labels map[string]string) string {
|
|
return renderLabels(labels)
|
|
}
|
|
|
|
func incMetric(target map[string]metricSample, labels map[string]string) {
|
|
key := metricKey(labels)
|
|
sample, ok := target[key]
|
|
if !ok {
|
|
target[key] = metricSample{labels: cloneLabels(labels), value: 1}
|
|
return
|
|
}
|
|
sample.value++
|
|
target[key] = sample
|
|
}
|
|
|
|
func setMetric(target map[string]metricSample, labels map[string]string, value float64) {
|
|
key := metricKey(labels)
|
|
target[key] = metricSample{labels: cloneLabels(labels), value: value}
|
|
}
|
|
|
|
func cloneLabels(labels map[string]string) map[string]string {
|
|
out := make(map[string]string, len(labels))
|
|
for key, value := range labels {
|
|
out[key] = value
|
|
}
|
|
return out
|
|
}
|