package service import ( "fmt" "io" "sort" "strings" "sync" "time" "metis/pkg/facts" ) // Metrics captures the small Prometheus surface exported by Metis. type Metrics struct { mu sync.RWMutex builds map[string]int flashes map[string]int snapshots map[string]int lastSnapshotUnix map[string]float64 watches map[string]int lastWatchSuccess float64 classDriftCounts map[string]int lastWatchChangeSize float64 } // NewMetrics builds a zero-value metrics registry. func NewMetrics() *Metrics { return &Metrics{ builds: map[string]int{}, flashes: map[string]int{}, snapshots: map[string]int{}, lastSnapshotUnix: map[string]float64{}, watches: map[string]int{}, classDriftCounts: map[string]int{}, } } func (m *Metrics) RecordBuild(node, status string) { m.mu.Lock() defer m.mu.Unlock() m.builds[counterKey(node, status)]++ } func (m *Metrics) RecordFlash(node, host, status string) { m.mu.Lock() defer m.mu.Unlock() m.flashes[counterKey(node, host, status)]++ } func (m *Metrics) RecordSnapshot(node, status string, ts time.Time) { m.mu.Lock() defer m.mu.Unlock() m.snapshots[counterKey(node, status)]++ if !ts.IsZero() { m.lastSnapshotUnix[node] = float64(ts.Unix()) } } func (m *Metrics) RecordWatch(status string) { m.mu.Lock() defer m.mu.Unlock() m.watches[counterKey(status)]++ if status == "ok" { m.lastWatchSuccess = float64(time.Now().UTC().Unix()) } } func (m *Metrics) SetDriftTargets(targets map[string]facts.Targets, changed int) { m.mu.Lock() defer m.mu.Unlock() m.classDriftCounts = map[string]int{} for class, target := range targets { count := 0 if strings.TrimSpace(target.Kernel) != "" { count++ } if strings.TrimSpace(target.OSImage) != "" { count++ } if strings.TrimSpace(target.Containerd) != "" { count++ } if strings.TrimSpace(target.K3sVersion) != "" { count++ } count += len(target.Packages) m.classDriftCounts[class] = count } m.lastWatchChangeSize = float64(changed) } // Render writes a Prometheus text exposition response. func (m *Metrics) Render(w io.Writer) { m.mu.RLock() defer m.mu.RUnlock() fmt.Fprintln(w, "# HELP metis_builds_total Replacement image builds by node and status") fmt.Fprintln(w, "# TYPE metis_builds_total counter") for _, key := range sortedKeys(m.builds) { parts := splitKey(key, 2) node, status := parts[0], parts[1] fmt.Fprintf(w, "metis_builds_total{node=%q,status=%q} %d\n", node, status, m.builds[key]) } fmt.Fprintln(w, "# HELP metis_flashes_total Replacement flashes by node, host, and status") fmt.Fprintln(w, "# TYPE metis_flashes_total counter") for _, key := range sortedKeys(m.flashes) { parts := splitKey(key, 3) node, host, status := parts[0], parts[1], parts[2] fmt.Fprintf(w, "metis_flashes_total{node=%q,host=%q,status=%q} %d\n", node, host, status, m.flashes[key]) } fmt.Fprintln(w, "# HELP metis_sentinel_snapshots_total Sentinel snapshots accepted by node and status") fmt.Fprintln(w, "# TYPE metis_sentinel_snapshots_total counter") for _, key := range sortedKeys(m.snapshots) { parts := splitKey(key, 2) node, status := parts[0], parts[1] fmt.Fprintf(w, "metis_sentinel_snapshots_total{node=%q,status=%q} %d\n", node, status, m.snapshots[key]) } fmt.Fprintln(w, "# HELP metis_sentinel_snapshot_timestamp_seconds Last accepted sentinel snapshot timestamp by node") fmt.Fprintln(w, "# TYPE metis_sentinel_snapshot_timestamp_seconds gauge") for _, node := range sortedFloatKeys(m.lastSnapshotUnix) { fmt.Fprintf(w, "metis_sentinel_snapshot_timestamp_seconds{node=%q} %.0f\n", node, m.lastSnapshotUnix[node]) } fmt.Fprintln(w, "# HELP metis_sentinel_watch_total Sentinel watch runs by status") fmt.Fprintln(w, "# TYPE metis_sentinel_watch_total counter") for _, key := range sortedKeys(m.watches) { status := splitKey(key, 1)[0] fmt.Fprintf(w, "metis_sentinel_watch_total{status=%q} %d\n", status, m.watches[key]) } fmt.Fprintln(w, "# HELP metis_sentinel_watch_last_success_timestamp_seconds Last successful sentinel watch timestamp") fmt.Fprintln(w, "# TYPE metis_sentinel_watch_last_success_timestamp_seconds gauge") fmt.Fprintf(w, "metis_sentinel_watch_last_success_timestamp_seconds %.0f\n", m.lastWatchSuccess) fmt.Fprintln(w, "# HELP metis_sentinel_watch_changed_classes Number of class target sets changed by the last watch") fmt.Fprintln(w, "# TYPE metis_sentinel_watch_changed_classes gauge") fmt.Fprintf(w, "metis_sentinel_watch_changed_classes %.0f\n", m.lastWatchChangeSize) fmt.Fprintln(w, "# HELP metis_class_target_fields Count of populated target fields per class") fmt.Fprintln(w, "# TYPE metis_class_target_fields gauge") for _, class := range sortedFloatKeysInt(m.classDriftCounts) { fmt.Fprintf(w, "metis_class_target_fields{class=%q} %d\n", class, m.classDriftCounts[class]) } } func counterKey(parts ...string) string { return strings.Join(parts, "\x00") } func splitKey(key string, want int) []string { parts := strings.Split(key, "\x00") for len(parts) < want { parts = append(parts, "") } return parts } func sortedKeys[T any](m map[string]T) []string { keys := make([]string, 0, len(m)) for key := range m { keys = append(keys, key) } sort.Strings(keys) return keys } func sortedFloatKeys(m map[string]float64) []string { keys := make([]string, 0, len(m)) for key := range m { keys = append(keys, key) } sort.Strings(keys) return keys } func sortedFloatKeysInt(m map[string]int) []string { keys := make([]string, 0, len(m)) for key := range m { keys = append(keys, key) } sort.Strings(keys) return keys }