monitoring: export gitops state from ananke
This commit is contained in:
parent
3cc980795a
commit
087728d481
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@ -311,6 +311,10 @@ PY
|
|||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
coverage_path = Path("build/coverage-percent.txt")
|
||||||
|
if coverage_path.exists():
|
||||||
|
print(coverage_path.read_text(encoding="utf-8", errors="ignore").strip() or "0.0")
|
||||||
|
raise SystemExit(0)
|
||||||
log_path = Path("build/quality-gate.out")
|
log_path = Path("build/quality-gate.out")
|
||||||
text = log_path.read_text(encoding="utf-8", errors="ignore") if log_path.exists() else ""
|
text = log_path.read_text(encoding="utf-8", errors="ignore") if log_path.exists() else ""
|
||||||
values = [float(match.group(1)) for match in re.finditer(r"([0-9]+(?:\\.[0-9]+)?)%", text)]
|
values = [float(match.group(1)) for match in re.finditer(r"([0-9]+(?:\\.[0-9]+)?)%", text)]
|
||||||
|
|||||||
@ -197,6 +197,9 @@ metrics:
|
|||||||
enabled: true
|
enabled: true
|
||||||
bind_addr: 0.0.0.0:9560
|
bind_addr: 0.0.0.0:9560
|
||||||
path: /metrics
|
path: /metrics
|
||||||
|
gitops:
|
||||||
|
enabled: true
|
||||||
|
poll_seconds: 60
|
||||||
state:
|
state:
|
||||||
dir: /var/lib/ananke
|
dir: /var/lib/ananke
|
||||||
reports_dir: /var/lib/ananke/reports
|
reports_dir: /var/lib/ananke/reports
|
||||||
|
|||||||
@ -329,6 +329,9 @@ metrics:
|
|||||||
enabled: true
|
enabled: true
|
||||||
bind_addr: 0.0.0.0:9560
|
bind_addr: 0.0.0.0:9560
|
||||||
path: /metrics
|
path: /metrics
|
||||||
|
gitops:
|
||||||
|
enabled: true
|
||||||
|
poll_seconds: 60
|
||||||
state:
|
state:
|
||||||
dir: /var/lib/ananke
|
dir: /var/lib/ananke
|
||||||
reports_dir: /var/lib/ananke/reports
|
reports_dir: /var/lib/ananke/reports
|
||||||
|
|||||||
@ -329,6 +329,9 @@ metrics:
|
|||||||
enabled: true
|
enabled: true
|
||||||
bind_addr: 0.0.0.0:9560
|
bind_addr: 0.0.0.0:9560
|
||||||
path: /metrics
|
path: /metrics
|
||||||
|
gitops:
|
||||||
|
enabled: true
|
||||||
|
poll_seconds: 60
|
||||||
state:
|
state:
|
||||||
dir: /var/lib/ananke
|
dir: /var/lib/ananke
|
||||||
reports_dir: /var/lib/ananke/reports
|
reports_dir: /var/lib/ananke/reports
|
||||||
|
|||||||
@ -270,6 +270,9 @@ func (c *Config) applyDefaults() {
|
|||||||
if c.Metrics.Path == "" {
|
if c.Metrics.Path == "" {
|
||||||
c.Metrics.Path = "/metrics"
|
c.Metrics.Path = "/metrics"
|
||||||
}
|
}
|
||||||
|
if c.GitOps.PollSeconds <= 0 {
|
||||||
|
c.GitOps.PollSeconds = 60
|
||||||
|
}
|
||||||
if c.State.Dir == "" {
|
if c.State.Dir == "" {
|
||||||
c.State.Dir = "/var/lib/ananke"
|
c.State.Dir = "/var/lib/ananke"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -156,6 +156,10 @@ func defaults() Config {
|
|||||||
BindAddr: "0.0.0.0:9560",
|
BindAddr: "0.0.0.0:9560",
|
||||||
Path: "/metrics",
|
Path: "/metrics",
|
||||||
},
|
},
|
||||||
|
GitOps: GitOps{
|
||||||
|
Enabled: true,
|
||||||
|
PollSeconds: 60,
|
||||||
|
},
|
||||||
State: State{
|
State: State{
|
||||||
Dir: "/var/lib/ananke",
|
Dir: "/var/lib/ananke",
|
||||||
ReportsDir: "/var/lib/ananke/reports",
|
ReportsDir: "/var/lib/ananke/reports",
|
||||||
|
|||||||
@ -23,6 +23,7 @@ type Config struct {
|
|||||||
UPS UPS `yaml:"ups"`
|
UPS UPS `yaml:"ups"`
|
||||||
Coordination Coordination `yaml:"coordination"`
|
Coordination Coordination `yaml:"coordination"`
|
||||||
Metrics Metrics `yaml:"metrics"`
|
Metrics Metrics `yaml:"metrics"`
|
||||||
|
GitOps GitOps `yaml:"gitops"`
|
||||||
State State `yaml:"state"`
|
State State `yaml:"state"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,6 +175,11 @@ type Metrics struct {
|
|||||||
Path string `yaml:"path"`
|
Path string `yaml:"path"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GitOps struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
PollSeconds int `yaml:"poll_seconds"`
|
||||||
|
}
|
||||||
|
|
||||||
type State struct {
|
type State struct {
|
||||||
Dir string `yaml:"dir"`
|
Dir string `yaml:"dir"`
|
||||||
ReportsDir string `yaml:"reports_dir"`
|
ReportsDir string `yaml:"reports_dir"`
|
||||||
|
|||||||
@ -27,6 +27,50 @@ type Sample struct {
|
|||||||
UpdatedAt time.Time
|
UpdatedAt time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GitOpsSnapshot struct {
|
||||||
|
UpdatedAt time.Time
|
||||||
|
ScrapeSuccess bool
|
||||||
|
Errors map[string]string
|
||||||
|
FluxSources []GitOpsFluxSource
|
||||||
|
Kustomizations []GitOpsKustomization
|
||||||
|
HelmReleases []GitOpsHelmRelease
|
||||||
|
}
|
||||||
|
|
||||||
|
type GitOpsFluxSource struct {
|
||||||
|
Namespace string
|
||||||
|
Name string
|
||||||
|
URL string
|
||||||
|
Branch string
|
||||||
|
Revision string
|
||||||
|
Ready bool
|
||||||
|
Reason string
|
||||||
|
Suspended bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type GitOpsKustomization struct {
|
||||||
|
Namespace string
|
||||||
|
Name string
|
||||||
|
Path string
|
||||||
|
SourceNamespace string
|
||||||
|
SourceName string
|
||||||
|
Revision string
|
||||||
|
Ready bool
|
||||||
|
Reason string
|
||||||
|
Suspended bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type GitOpsHelmRelease struct {
|
||||||
|
Namespace string
|
||||||
|
Name string
|
||||||
|
Chart string
|
||||||
|
Version string
|
||||||
|
AppVersion string
|
||||||
|
Revision string
|
||||||
|
Ready bool
|
||||||
|
Reason string
|
||||||
|
Suspended bool
|
||||||
|
}
|
||||||
|
|
||||||
type Exporter struct {
|
type Exporter struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
shutdownBudgetSec int
|
shutdownBudgetSec int
|
||||||
@ -34,6 +78,7 @@ type Exporter struct {
|
|||||||
lastShutdownReason string
|
lastShutdownReason string
|
||||||
lastShutdownAt time.Time
|
lastShutdownAt time.Time
|
||||||
samples map[string]Sample
|
samples map[string]Sample
|
||||||
|
gitops GitOpsSnapshot
|
||||||
}
|
}
|
||||||
|
|
||||||
// New runs one orchestration or CLI step.
|
// New runs one orchestration or CLI step.
|
||||||
@ -66,6 +111,22 @@ func (e *Exporter) UpdateSample(s Sample) {
|
|||||||
e.samples[s.Name] = s
|
e.samples[s.Name] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateGitOpsSnapshot records the most recent Flux object-state scrape.
|
||||||
|
// Signature: (e *Exporter) UpdateGitOpsSnapshot(snapshot GitOpsSnapshot).
|
||||||
|
// Why: Grafana needs object readiness and branch/revision state, while Flux's
|
||||||
|
// controller metrics only expose controller health in this cluster.
|
||||||
|
func (e *Exporter) UpdateGitOpsSnapshot(snapshot GitOpsSnapshot) {
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
if snapshot.UpdatedAt.IsZero() {
|
||||||
|
snapshot.UpdatedAt = time.Now().UTC()
|
||||||
|
}
|
||||||
|
if snapshot.Errors == nil {
|
||||||
|
snapshot.Errors = map[string]string{}
|
||||||
|
}
|
||||||
|
e.gitops = snapshot
|
||||||
|
}
|
||||||
|
|
||||||
// MarkShutdown runs one orchestration or CLI step.
|
// MarkShutdown runs one orchestration or CLI step.
|
||||||
// Signature: (e *Exporter) MarkShutdown(reason string).
|
// Signature: (e *Exporter) MarkShutdown(reason string).
|
||||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
@ -164,11 +225,99 @@ func (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request) {
|
|||||||
}
|
}
|
||||||
b.WriteString(fmt.Sprintf("ananke_ups_error%s %d\n", labels, boolNum(s.LastError != "")))
|
b.WriteString(fmt.Sprintf("ananke_ups_error%s %d\n", labels, boolNum(s.LastError != "")))
|
||||||
}
|
}
|
||||||
|
appendGitOpsMetrics(&b, e.gitops)
|
||||||
appendQualityGateMetrics(&b)
|
appendQualityGateMetrics(&b)
|
||||||
|
|
||||||
_, _ = w.Write([]byte(b.String()))
|
_, _ = w.Write([]byte(b.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// appendGitOpsMetrics writes Flux object-state metrics collected by the
|
||||||
|
// long-running daemon loop.
|
||||||
|
// Signature: appendGitOpsMetrics(dst *strings.Builder, snapshot GitOpsSnapshot).
|
||||||
|
// Why: this keeps the expensive Kubernetes API reads out of the HTTP scrape path
|
||||||
|
// while still making current GitOps health cheap for Grafana to query.
|
||||||
|
func appendGitOpsMetrics(dst *strings.Builder, snapshot GitOpsSnapshot) {
|
||||||
|
if dst.Len() > 0 {
|
||||||
|
dst.WriteString("\n")
|
||||||
|
}
|
||||||
|
dst.WriteString("# HELP ananke_gitops_last_scrape_timestamp_seconds Unix timestamp of the latest GitOps object-state scrape.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_last_scrape_timestamp_seconds gauge\n")
|
||||||
|
if snapshot.UpdatedAt.IsZero() {
|
||||||
|
dst.WriteString("ananke_gitops_last_scrape_timestamp_seconds 0\n")
|
||||||
|
} else {
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_last_scrape_timestamp_seconds %d\n", snapshot.UpdatedAt.Unix()))
|
||||||
|
}
|
||||||
|
dst.WriteString("# HELP ananke_gitops_scrape_success Whether the latest GitOps object-state scrape completed without errors.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_scrape_success gauge\n")
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_scrape_success %d\n", boolNum(snapshot.ScrapeSuccess)))
|
||||||
|
dst.WriteString("# HELP ananke_gitops_scrape_error Whether a GitOps resource family failed during the latest scrape.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_scrape_error gauge\n")
|
||||||
|
resources := []string{"gitrepository", "kustomization", "helmrelease"}
|
||||||
|
for _, resource := range resources {
|
||||||
|
_, failed := snapshot.Errors[resource]
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_scrape_error{resource=%q} %d\n", resource, boolNum(failed)))
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.WriteString("# HELP ananke_gitops_flux_source_info Current Flux GitRepository source metadata.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_flux_source_info gauge\n")
|
||||||
|
dst.WriteString("# HELP ananke_gitops_flux_source_ready Whether a Flux GitRepository source is Ready.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_flux_source_ready gauge\n")
|
||||||
|
dst.WriteString("# HELP ananke_gitops_flux_source_suspended Whether a Flux GitRepository source is suspended.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_flux_source_suspended gauge\n")
|
||||||
|
sort.Slice(snapshot.FluxSources, func(i, j int) bool {
|
||||||
|
return snapshot.FluxSources[i].Namespace+"/"+snapshot.FluxSources[i].Name < snapshot.FluxSources[j].Namespace+"/"+snapshot.FluxSources[j].Name
|
||||||
|
})
|
||||||
|
for _, source := range snapshot.FluxSources {
|
||||||
|
infoLabels := fmt.Sprintf("{namespace=%q,name=%q,url=%q,branch=%q,revision=%q,ready=%q,reason=%q}",
|
||||||
|
safe(source.Namespace), safe(source.Name), safe(source.URL), safe(source.Branch),
|
||||||
|
safe(source.Revision), readyLabel(source.Ready), safe(defaultLabel(source.Reason, "unknown")))
|
||||||
|
baseLabels := fmt.Sprintf("{namespace=%q,name=%q}", safe(source.Namespace), safe(source.Name))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_flux_source_info%s 1\n", infoLabels))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_flux_source_ready%s %d\n", baseLabels, boolNum(source.Ready)))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_flux_source_suspended%s %d\n", baseLabels, boolNum(source.Suspended)))
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.WriteString("# HELP ananke_gitops_kustomization_info Current Flux Kustomization metadata.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_kustomization_info gauge\n")
|
||||||
|
dst.WriteString("# HELP ananke_gitops_kustomization_ready Whether a Flux Kustomization is Ready.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_kustomization_ready gauge\n")
|
||||||
|
dst.WriteString("# HELP ananke_gitops_kustomization_suspended Whether a Flux Kustomization is suspended.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_kustomization_suspended gauge\n")
|
||||||
|
sort.Slice(snapshot.Kustomizations, func(i, j int) bool {
|
||||||
|
return snapshot.Kustomizations[i].Namespace+"/"+snapshot.Kustomizations[i].Name < snapshot.Kustomizations[j].Namespace+"/"+snapshot.Kustomizations[j].Name
|
||||||
|
})
|
||||||
|
for _, kustomization := range snapshot.Kustomizations {
|
||||||
|
infoLabels := fmt.Sprintf("{namespace=%q,name=%q,path=%q,source_namespace=%q,source_name=%q,revision=%q,ready=%q,reason=%q}",
|
||||||
|
safe(kustomization.Namespace), safe(kustomization.Name), safe(kustomization.Path),
|
||||||
|
safe(kustomization.SourceNamespace), safe(kustomization.SourceName), safe(kustomization.Revision),
|
||||||
|
readyLabel(kustomization.Ready), safe(defaultLabel(kustomization.Reason, "unknown")))
|
||||||
|
baseLabels := fmt.Sprintf("{namespace=%q,name=%q}", safe(kustomization.Namespace), safe(kustomization.Name))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_kustomization_info%s 1\n", infoLabels))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_kustomization_ready%s %d\n", baseLabels, boolNum(kustomization.Ready)))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_kustomization_suspended%s %d\n", baseLabels, boolNum(kustomization.Suspended)))
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.WriteString("# HELP ananke_gitops_helmrelease_info Current Flux HelmRelease metadata.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_helmrelease_info gauge\n")
|
||||||
|
dst.WriteString("# HELP ananke_gitops_helmrelease_ready Whether a Flux HelmRelease is Ready.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_helmrelease_ready gauge\n")
|
||||||
|
dst.WriteString("# HELP ananke_gitops_helmrelease_suspended Whether a Flux HelmRelease is suspended.\n")
|
||||||
|
dst.WriteString("# TYPE ananke_gitops_helmrelease_suspended gauge\n")
|
||||||
|
sort.Slice(snapshot.HelmReleases, func(i, j int) bool {
|
||||||
|
return snapshot.HelmReleases[i].Namespace+"/"+snapshot.HelmReleases[i].Name < snapshot.HelmReleases[j].Namespace+"/"+snapshot.HelmReleases[j].Name
|
||||||
|
})
|
||||||
|
for _, release := range snapshot.HelmReleases {
|
||||||
|
infoLabels := fmt.Sprintf("{namespace=%q,name=%q,chart=%q,version=%q,app_version=%q,revision=%q,ready=%q,reason=%q}",
|
||||||
|
safe(release.Namespace), safe(release.Name), safe(release.Chart), safe(release.Version),
|
||||||
|
safe(release.AppVersion), safe(release.Revision), readyLabel(release.Ready),
|
||||||
|
safe(defaultLabel(release.Reason, "unknown")))
|
||||||
|
baseLabels := fmt.Sprintf("{namespace=%q,name=%q}", safe(release.Namespace), safe(release.Name))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_helmrelease_info%s 1\n", infoLabels))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_helmrelease_ready%s %d\n", baseLabels, boolNum(release.Ready)))
|
||||||
|
dst.WriteString(fmt.Sprintf("ananke_gitops_helmrelease_suspended%s %d\n", baseLabels, boolNum(release.Suspended)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// appendQualityGateMetrics runs one orchestration or CLI step.
|
// appendQualityGateMetrics runs one orchestration or CLI step.
|
||||||
// Signature: appendQualityGateMetrics(dst *strings.Builder).
|
// Signature: appendQualityGateMetrics(dst *strings.Builder).
|
||||||
// Why: quality-gate pass/fail telemetry should appear alongside UPS metrics so
|
// Why: quality-gate pass/fail telemetry should appear alongside UPS metrics so
|
||||||
@ -228,3 +377,26 @@ func safe(in string) string {
|
|||||||
out := strings.ReplaceAll(in, "\\", "\\\\")
|
out := strings.ReplaceAll(in, "\\", "\\\\")
|
||||||
return strings.ReplaceAll(out, "\"", "\\\"")
|
return strings.ReplaceAll(out, "\"", "\\\"")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// readyLabel formats a boolean readiness state as a stable label value.
|
||||||
|
// Signature: readyLabel(ready bool) string.
|
||||||
|
// Why: info metrics need a human-readable status label without changing the
|
||||||
|
// numeric ready gauges used for alerting and aggregation.
|
||||||
|
func readyLabel(ready bool) string {
|
||||||
|
if ready {
|
||||||
|
return "true"
|
||||||
|
}
|
||||||
|
return "false"
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultLabel returns a safe fallback for empty metric label values.
|
||||||
|
// Signature: defaultLabel(value string, fallback string) string.
|
||||||
|
// Why: Flux status fields can be absent during startup, but Prometheus labels
|
||||||
|
// should remain explicit rather than silently becoming empty strings.
|
||||||
|
func defaultLabel(value string, fallback string) string {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|||||||
@ -97,6 +97,9 @@ func (d *Daemon) Run(ctx context.Context) error {
|
|||||||
onBatterySince := map[string]time.Time{}
|
onBatterySince := map[string]time.Time{}
|
||||||
breachCount := map[string]int{}
|
breachCount := map[string]int{}
|
||||||
lastAutoHeal := time.Time{}
|
lastAutoHeal := time.Time{}
|
||||||
|
lastGitOpsPoll := time.Time{}
|
||||||
|
gitOpsPollRunning := false
|
||||||
|
gitOpsDone := make(chan struct{}, 1)
|
||||||
for _, t := range d.targets {
|
for _, t := range d.targets {
|
||||||
lastGood[t.Name] = time.Now()
|
lastGood[t.Name] = time.Now()
|
||||||
}
|
}
|
||||||
@ -108,6 +111,8 @@ func (d *Daemon) Run(ctx context.Context) error {
|
|||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
|
case <-gitOpsDone:
|
||||||
|
gitOpsPollRunning = false
|
||||||
case <-t.C:
|
case <-t.C:
|
||||||
budget := d.orch.EstimatedEmergencyShutdownSeconds()
|
budget := d.orch.EstimatedEmergencyShutdownSeconds()
|
||||||
threshold := int(math.Ceil(float64(budget) * d.cfg.UPS.RuntimeSafetyFactor))
|
threshold := int(math.Ceil(float64(budget) * d.cfg.UPS.RuntimeSafetyFactor))
|
||||||
@ -201,6 +206,7 @@ func (d *Daemon) Run(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
d.maybeRunPostStartAutoHeal(ctx, &lastAutoHeal, anyOnBattery)
|
d.maybeRunPostStartAutoHeal(ctx, &lastAutoHeal, anyOnBattery)
|
||||||
|
gitOpsPollRunning = d.maybeStartGitOpsSnapshot(ctx, &lastGitOpsPoll, gitOpsPollRunning, gitOpsDone)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
355
internal/service/gitops_snapshot.go
Normal file
355
internal/service/gitops_snapshot.go
Normal file
@ -0,0 +1,355 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
var gitOpsKubectlOutput = runGitOpsKubectlOutput
|
||||||
|
|
||||||
|
type gitOpsResourceList struct {
|
||||||
|
Items []gitOpsResource `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type gitOpsResource struct {
|
||||||
|
Metadata struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Namespace string `json:"namespace"`
|
||||||
|
} `json:"metadata"`
|
||||||
|
Spec struct {
|
||||||
|
Suspend bool `json:"suspend"`
|
||||||
|
Ref struct {
|
||||||
|
Branch string `json:"branch"`
|
||||||
|
Tag string `json:"tag"`
|
||||||
|
SemVer string `json:"semver"`
|
||||||
|
} `json:"ref"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
SourceRef struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Namespace string `json:"namespace"`
|
||||||
|
} `json:"sourceRef"`
|
||||||
|
Chart struct {
|
||||||
|
Spec struct {
|
||||||
|
Chart string `json:"chart"`
|
||||||
|
Version string `json:"version"`
|
||||||
|
} `json:"spec"`
|
||||||
|
} `json:"chart"`
|
||||||
|
} `json:"spec"`
|
||||||
|
Status struct {
|
||||||
|
Artifact struct {
|
||||||
|
Revision string `json:"revision"`
|
||||||
|
} `json:"artifact"`
|
||||||
|
LastAppliedRevision string `json:"lastAppliedRevision"`
|
||||||
|
LastAttemptedRevision string `json:"lastAttemptedRevision"`
|
||||||
|
Conditions []struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
} `json:"conditions"`
|
||||||
|
History []struct {
|
||||||
|
ChartName string `json:"chartName"`
|
||||||
|
ChartVersion string `json:"chartVersion"`
|
||||||
|
AppVersion string `json:"appVersion"`
|
||||||
|
Digest string `json:"digest"`
|
||||||
|
} `json:"history"`
|
||||||
|
} `json:"status"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// maybeStartGitOpsSnapshot starts a bounded background scrape when the GitOps
|
||||||
|
// sample interval has elapsed.
|
||||||
|
// Signature: (d *Daemon) maybeStartGitOpsSnapshot(ctx context.Context, lastRun *time.Time, running bool, done chan<- struct{}) bool.
|
||||||
|
// Why: Kubernetes API reads must not block UPS polling, especially during
|
||||||
|
// emergency power events when the daemon's first job is still safe shutdown.
|
||||||
|
func (d *Daemon) maybeStartGitOpsSnapshot(ctx context.Context, lastRun *time.Time, running bool, done chan<- struct{}) bool {
|
||||||
|
if !d.cfg.GitOps.Enabled || d.exporter == nil || running {
|
||||||
|
return running
|
||||||
|
}
|
||||||
|
interval := time.Duration(d.cfg.GitOps.PollSeconds) * time.Second
|
||||||
|
if interval <= 0 {
|
||||||
|
interval = 60 * time.Second
|
||||||
|
}
|
||||||
|
now := time.Now()
|
||||||
|
if lastRun != nil && !lastRun.IsZero() && now.Sub(*lastRun) < interval {
|
||||||
|
return running
|
||||||
|
}
|
||||||
|
if lastRun != nil {
|
||||||
|
*lastRun = now
|
||||||
|
}
|
||||||
|
cfg := d.cfg
|
||||||
|
exporter := d.exporter
|
||||||
|
logger := d.log
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
select {
|
||||||
|
case done <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
scrapeCtx, cancel := context.WithTimeout(ctx, 25*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
snapshot := collectGitOpsSnapshot(scrapeCtx, cfg)
|
||||||
|
exporter.UpdateGitOpsSnapshot(snapshot)
|
||||||
|
if !snapshot.ScrapeSuccess && logger != nil {
|
||||||
|
logger.Printf("warning: gitops metrics scrape partial: %s", gitOpsErrorSummary(snapshot.Errors))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectGitOpsSnapshot queries Flux custom resources and converts them into
|
||||||
|
// Prometheus-safe metric state.
|
||||||
|
// Signature: collectGitOpsSnapshot(ctx context.Context, cfg config.Config) metrics.GitOpsSnapshot.
|
||||||
|
// Why: the Grafana overview needs current branch/readiness/suspend state, which
|
||||||
|
// is object status rather than controller operational telemetry.
|
||||||
|
func collectGitOpsSnapshot(ctx context.Context, cfg config.Config) metrics.GitOpsSnapshot {
|
||||||
|
snapshot := metrics.GitOpsSnapshot{
|
||||||
|
UpdatedAt: time.Now().UTC(),
|
||||||
|
ScrapeSuccess: true,
|
||||||
|
Errors: map[string]string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := gitOpsKubectlJSON(ctx, cfg, "get", "gitrepositories.source.toolkit.fluxcd.io", "-A", "-o", "json"); err != nil {
|
||||||
|
snapshot.Errors["gitrepository"] = err.Error()
|
||||||
|
} else if sources, err := parseGitRepositories(raw); err != nil {
|
||||||
|
snapshot.Errors["gitrepository"] = err.Error()
|
||||||
|
} else {
|
||||||
|
snapshot.FluxSources = sources
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := gitOpsKubectlJSON(ctx, cfg, "get", "kustomizations.kustomize.toolkit.fluxcd.io", "-A", "-o", "json"); err != nil {
|
||||||
|
snapshot.Errors["kustomization"] = err.Error()
|
||||||
|
} else if kustomizations, err := parseKustomizations(raw); err != nil {
|
||||||
|
snapshot.Errors["kustomization"] = err.Error()
|
||||||
|
} else {
|
||||||
|
snapshot.Kustomizations = kustomizations
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := gitOpsKubectlJSON(ctx, cfg, "get", "helmreleases.helm.toolkit.fluxcd.io", "-A", "-o", "json"); err != nil {
|
||||||
|
snapshot.Errors["helmrelease"] = err.Error()
|
||||||
|
} else if releases, err := parseHelmReleases(raw); err != nil {
|
||||||
|
snapshot.Errors["helmrelease"] = err.Error()
|
||||||
|
} else {
|
||||||
|
snapshot.HelmReleases = releases
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot.ScrapeSuccess = len(snapshot.Errors) == 0
|
||||||
|
return snapshot
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseGitRepositories extracts Git source branch/revision/readiness from Flux
|
||||||
|
// GitRepository JSON.
|
||||||
|
// Signature: parseGitRepositories(raw []byte) ([]metrics.GitOpsFluxSource, error).
|
||||||
|
// Why: keeping status parsing isolated makes it testable without a live Flux
|
||||||
|
// API and protects the daemon loop from brittle JSON traversal code.
|
||||||
|
func parseGitRepositories(raw []byte) ([]metrics.GitOpsFluxSource, error) {
|
||||||
|
var list gitOpsResourceList
|
||||||
|
if err := json.Unmarshal(raw, &list); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode gitrepositories: %w", err)
|
||||||
|
}
|
||||||
|
out := make([]metrics.GitOpsFluxSource, 0, len(list.Items))
|
||||||
|
for _, item := range list.Items {
|
||||||
|
ready, reason := fluxReady(item.Status.Conditions)
|
||||||
|
out = append(out, metrics.GitOpsFluxSource{
|
||||||
|
Namespace: defaultString(item.Metadata.Namespace, "default"),
|
||||||
|
Name: item.Metadata.Name,
|
||||||
|
URL: item.Spec.URL,
|
||||||
|
Branch: sourceBranch(item),
|
||||||
|
Revision: item.Status.Artifact.Revision,
|
||||||
|
Ready: ready,
|
||||||
|
Reason: reason,
|
||||||
|
Suspended: item.Spec.Suspend,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Slice(out, func(i, j int) bool {
|
||||||
|
return out[i].Namespace+"/"+out[i].Name < out[j].Namespace+"/"+out[j].Name
|
||||||
|
})
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseKustomizations extracts Kustomization health and source metadata from
|
||||||
|
// Flux Kustomization JSON.
|
||||||
|
// Signature: parseKustomizations(raw []byte) ([]metrics.GitOpsKustomization, error).
|
||||||
|
// Why: Kustomization status drives both the overview summary and the detailed
|
||||||
|
// GitOps table, so the parser keeps the metric contract in one place.
|
||||||
|
func parseKustomizations(raw []byte) ([]metrics.GitOpsKustomization, error) {
|
||||||
|
var list gitOpsResourceList
|
||||||
|
if err := json.Unmarshal(raw, &list); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode kustomizations: %w", err)
|
||||||
|
}
|
||||||
|
out := make([]metrics.GitOpsKustomization, 0, len(list.Items))
|
||||||
|
for _, item := range list.Items {
|
||||||
|
ready, reason := fluxReady(item.Status.Conditions)
|
||||||
|
namespace := defaultString(item.Metadata.Namespace, "default")
|
||||||
|
out = append(out, metrics.GitOpsKustomization{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: item.Metadata.Name,
|
||||||
|
Path: item.Spec.Path,
|
||||||
|
SourceNamespace: defaultString(item.Spec.SourceRef.Namespace, namespace),
|
||||||
|
SourceName: item.Spec.SourceRef.Name,
|
||||||
|
Revision: firstNonEmpty(item.Status.LastAppliedRevision, item.Status.LastAttemptedRevision),
|
||||||
|
Ready: ready,
|
||||||
|
Reason: reason,
|
||||||
|
Suspended: item.Spec.Suspend,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Slice(out, func(i, j int) bool {
|
||||||
|
return out[i].Namespace+"/"+out[i].Name < out[j].Namespace+"/"+out[j].Name
|
||||||
|
})
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseHelmReleases extracts HelmRelease health and chart metadata from Flux
|
||||||
|
// HelmRelease JSON.
|
||||||
|
// Signature: parseHelmReleases(raw []byte) ([]metrics.GitOpsHelmRelease, error).
|
||||||
|
// Why: HelmRelease status has different chart fields than Kustomizations, and
|
||||||
|
// isolating it keeps dashboard metrics stable as Flux payloads evolve.
|
||||||
|
func parseHelmReleases(raw []byte) ([]metrics.GitOpsHelmRelease, error) {
|
||||||
|
var list gitOpsResourceList
|
||||||
|
if err := json.Unmarshal(raw, &list); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode helmreleases: %w", err)
|
||||||
|
}
|
||||||
|
out := make([]metrics.GitOpsHelmRelease, 0, len(list.Items))
|
||||||
|
for _, item := range list.Items {
|
||||||
|
ready, reason := fluxReady(item.Status.Conditions)
|
||||||
|
chart := item.Spec.Chart.Spec.Chart
|
||||||
|
version := item.Spec.Chart.Spec.Version
|
||||||
|
appVersion := ""
|
||||||
|
revision := firstNonEmpty(item.Status.LastAppliedRevision, item.Status.LastAttemptedRevision)
|
||||||
|
if len(item.Status.History) > 0 {
|
||||||
|
latest := item.Status.History[0]
|
||||||
|
chart = firstNonEmpty(latest.ChartName, chart)
|
||||||
|
version = firstNonEmpty(latest.ChartVersion, version)
|
||||||
|
appVersion = latest.AppVersion
|
||||||
|
revision = firstNonEmpty(latest.Digest, revision)
|
||||||
|
}
|
||||||
|
out = append(out, metrics.GitOpsHelmRelease{
|
||||||
|
Namespace: defaultString(item.Metadata.Namespace, "default"),
|
||||||
|
Name: item.Metadata.Name,
|
||||||
|
Chart: chart,
|
||||||
|
Version: version,
|
||||||
|
AppVersion: appVersion,
|
||||||
|
Revision: revision,
|
||||||
|
Ready: ready,
|
||||||
|
Reason: reason,
|
||||||
|
Suspended: item.Spec.Suspend,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Slice(out, func(i, j int) bool {
|
||||||
|
return out[i].Namespace+"/"+out[i].Name < out[j].Namespace+"/"+out[j].Name
|
||||||
|
})
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// gitOpsKubectlJSON runs kubectl with the configured kubeconfig and a short
|
||||||
|
// request timeout.
|
||||||
|
// Signature: gitOpsKubectlJSON(ctx context.Context, cfg config.Config, args ...string) ([]byte, error).
|
||||||
|
// Why: every GitOps scrape should fail quickly instead of stealing time from
|
||||||
|
// UPS safety polling during outage recovery.
|
||||||
|
func gitOpsKubectlJSON(ctx context.Context, cfg config.Config, args ...string) ([]byte, error) {
|
||||||
|
finalArgs := []string{}
|
||||||
|
if strings.TrimSpace(cfg.Kubeconfig) != "" {
|
||||||
|
finalArgs = append(finalArgs, "--kubeconfig", cfg.Kubeconfig)
|
||||||
|
}
|
||||||
|
finalArgs = append(finalArgs, "--request-timeout=8s")
|
||||||
|
finalArgs = append(finalArgs, args...)
|
||||||
|
return gitOpsKubectlOutput(ctx, "kubectl", finalArgs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// runGitOpsKubectlOutput is the production command runner for GitOps snapshot
|
||||||
|
// collection.
|
||||||
|
// Signature: runGitOpsKubectlOutput(ctx context.Context, name string, args ...string) ([]byte, error).
|
||||||
|
// Why: preserving stderr in errors makes live diagnosis possible when Flux CRD
|
||||||
|
// reads fail on a recovering control plane.
|
||||||
|
func runGitOpsKubectlOutput(ctx context.Context, name string, args ...string) ([]byte, error) {
|
||||||
|
cmd := exec.CommandContext(ctx, name, args...)
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
trimmed := strings.TrimSpace(string(out))
|
||||||
|
if trimmed == "" {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("%w: %s", err, trimmed)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fluxReady resolves a Flux Ready condition into a boolean and reason.
|
||||||
|
// Signature: fluxReady(conditions []struct { Type string; Status string; Reason string }) (bool, string).
|
||||||
|
// Why: Flux resources all use Ready conditions, so one helper keeps status
|
||||||
|
// interpretation consistent across GitRepository, Kustomization, and HelmRelease.
|
||||||
|
func fluxReady(conditions []struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
}) (bool, string) {
|
||||||
|
for _, condition := range conditions {
|
||||||
|
if condition.Type == "Ready" {
|
||||||
|
return strings.EqualFold(condition.Status, "True"), defaultString(condition.Reason, "ReadyUnknown")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, "ReadyMissing"
|
||||||
|
}
|
||||||
|
|
||||||
|
// sourceBranch returns the most useful source reference label for a
|
||||||
|
// GitRepository.
|
||||||
|
// Signature: sourceBranch(item gitOpsResource) string.
|
||||||
|
// Why: branch is preferred for Atlas, but tags and semver refs should still
|
||||||
|
// render clearly if Flux is ever pinned differently.
|
||||||
|
func sourceBranch(item gitOpsResource) string {
|
||||||
|
return firstNonEmpty(item.Spec.Ref.Branch, item.Spec.Ref.Tag, item.Spec.Ref.SemVer, "unknown")
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultString trims a value and returns a fallback when it is empty.
|
||||||
|
// Signature: defaultString(value string, fallback string) string.
|
||||||
|
// Why: Flux omits some optional fields; dashboards are easier to read with
|
||||||
|
// explicit fallback labels.
|
||||||
|
func defaultString(value string, fallback string) string {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
// firstNonEmpty returns the first non-empty trimmed value.
|
||||||
|
// Signature: firstNonEmpty(values ...string) string.
|
||||||
|
// Why: Flux has several revision fields depending on resource kind and
|
||||||
|
// reconciliation stage, so callers can express preferred fallback order.
|
||||||
|
func firstNonEmpty(values ...string) string {
|
||||||
|
for _, value := range values {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value != "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// gitOpsErrorSummary renders resource-family scrape errors for daemon logs.
|
||||||
|
// Signature: gitOpsErrorSummary(errors map[string]string) string.
|
||||||
|
// Why: a compact sorted summary keeps recurring scrape warnings readable in
|
||||||
|
// journald during partial cluster recovery.
|
||||||
|
func gitOpsErrorSummary(errors map[string]string) string {
|
||||||
|
if len(errors) == 0 {
|
||||||
|
return "none"
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(errors))
|
||||||
|
for key := range errors {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
parts := make([]string, 0, len(keys))
|
||||||
|
for _, key := range keys {
|
||||||
|
parts = append(parts, key+"="+errors[key])
|
||||||
|
}
|
||||||
|
return strings.Join(parts, "; ")
|
||||||
|
}
|
||||||
127
internal/service/testing_hooks_gitops.go
Normal file
127
internal/service/testing_hooks_gitops.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
type TestHookGitOpsRunner func(context.Context, string, ...string) ([]byte, error)
|
||||||
|
|
||||||
|
// TestHookParseGitRepositories exposes GitRepository parsing to the top-level
|
||||||
|
// testing module.
|
||||||
|
// Signature: TestHookParseGitRepositories(raw []byte) ([]metrics.GitOpsFluxSource, error).
|
||||||
|
// Why: Ananke keeps split-module tests outside internal packages, but the
|
||||||
|
// parser needs direct contract coverage without a live cluster.
|
||||||
|
func TestHookParseGitRepositories(raw []byte) ([]metrics.GitOpsFluxSource, error) {
|
||||||
|
return parseGitRepositories(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookParseKustomizations exposes Kustomization parsing to the top-level
|
||||||
|
// testing module.
|
||||||
|
// Signature: TestHookParseKustomizations(raw []byte) ([]metrics.GitOpsKustomization, error).
|
||||||
|
// Why: dashboard-facing labels should be verified from representative Flux JSON
|
||||||
|
// without relying on the current production cluster shape.
|
||||||
|
func TestHookParseKustomizations(raw []byte) ([]metrics.GitOpsKustomization, error) {
|
||||||
|
return parseKustomizations(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookParseHelmReleases exposes HelmRelease parsing to the top-level
|
||||||
|
// testing module.
|
||||||
|
// Signature: TestHookParseHelmReleases(raw []byte) ([]metrics.GitOpsHelmRelease, error).
|
||||||
|
// Why: Helm status fields vary by reconciliation phase, and split-module tests
|
||||||
|
// need a stable seam for parser coverage.
|
||||||
|
func TestHookParseHelmReleases(raw []byte) ([]metrics.GitOpsHelmRelease, error) {
|
||||||
|
return parseHelmReleases(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookCollectGitOpsSnapshotWithRunner runs snapshot collection with an
|
||||||
|
// injected kubectl runner.
|
||||||
|
// Signature: TestHookCollectGitOpsSnapshotWithRunner(ctx context.Context, cfg config.Config, runner TestHookGitOpsRunner) metrics.GitOpsSnapshot.
|
||||||
|
// Why: this covers success and partial-failure collection paths without shelling
|
||||||
|
// out to kubectl from unit tests.
|
||||||
|
func TestHookCollectGitOpsSnapshotWithRunner(ctx context.Context, cfg config.Config, runner TestHookGitOpsRunner) metrics.GitOpsSnapshot {
|
||||||
|
original := gitOpsKubectlOutput
|
||||||
|
defer func() { gitOpsKubectlOutput = original }()
|
||||||
|
gitOpsKubectlOutput = runner
|
||||||
|
return collectGitOpsSnapshot(ctx, cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookMaybeStartGitOpsSnapshot starts the daemon's background GitOps scrape
|
||||||
|
// from split-module tests.
|
||||||
|
// Signature: TestHookMaybeStartGitOpsSnapshot(ctx context.Context, cfg config.Config, exporter *metrics.Exporter, logger *log.Logger, lastRun *time.Time, running bool, done chan<- struct{}) bool.
|
||||||
|
// Why: the daemon intentionally keeps scraping asynchronous so GitOps telemetry
|
||||||
|
// cannot delay UPS polling; the behavior needs direct coverage.
|
||||||
|
func TestHookMaybeStartGitOpsSnapshot(ctx context.Context, cfg config.Config, exporter *metrics.Exporter, logger *log.Logger, lastRun *time.Time, running bool, done chan<- struct{}) bool {
|
||||||
|
d := &Daemon{cfg: cfg, exporter: exporter, log: logger}
|
||||||
|
return d.maybeStartGitOpsSnapshot(ctx, lastRun, running, done)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookMaybeStartGitOpsSnapshotWithRunner starts a background GitOps scrape
|
||||||
|
// with an injected runner and restores the production runner after completion.
|
||||||
|
// Signature: TestHookMaybeStartGitOpsSnapshotWithRunner(ctx context.Context, cfg config.Config, exporter *metrics.Exporter, logger *log.Logger, lastRun *time.Time, running bool, done chan<- struct{}, runner TestHookGitOpsRunner) bool.
|
||||||
|
// Why: the scrape is asynchronous, so split-module tests need a seam that keeps
|
||||||
|
// fake kubectl behavior installed until the goroutine exits.
|
||||||
|
func TestHookMaybeStartGitOpsSnapshotWithRunner(ctx context.Context, cfg config.Config, exporter *metrics.Exporter, logger *log.Logger, lastRun *time.Time, running bool, done chan<- struct{}, runner TestHookGitOpsRunner) bool {
|
||||||
|
original := gitOpsKubectlOutput
|
||||||
|
gitOpsKubectlOutput = runner
|
||||||
|
proxyDone := make(chan struct{}, 1)
|
||||||
|
d := &Daemon{cfg: cfg, exporter: exporter, log: logger}
|
||||||
|
started := d.maybeStartGitOpsSnapshot(ctx, lastRun, running, proxyDone)
|
||||||
|
if !started || running {
|
||||||
|
gitOpsKubectlOutput = original
|
||||||
|
return started
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
<-proxyDone
|
||||||
|
gitOpsKubectlOutput = original
|
||||||
|
select {
|
||||||
|
case done <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return started
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookRunGitOpsKubectlOutput exposes the production command runner to
|
||||||
|
// split-module tests.
|
||||||
|
// Signature: TestHookRunGitOpsKubectlOutput(ctx context.Context, name string, args ...string) ([]byte, error).
|
||||||
|
// Why: stderr preservation and empty-stderr failure behavior are part of the
|
||||||
|
// operator-facing diagnostics contract.
|
||||||
|
func TestHookRunGitOpsKubectlOutput(ctx context.Context, name string, args ...string) ([]byte, error) {
|
||||||
|
return runGitOpsKubectlOutput(ctx, name, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookGitOpsDefaultString exposes defaultString to split-module tests.
|
||||||
|
// Signature: TestHookGitOpsDefaultString(value string, fallback string) string.
|
||||||
|
// Why: fallback label behavior is intentionally tiny but important for readable
|
||||||
|
// Grafana tables during startup.
|
||||||
|
func TestHookGitOpsDefaultString(value string, fallback string) string {
|
||||||
|
return defaultString(value, fallback)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookGitOpsFirstNonEmpty exposes firstNonEmpty to split-module tests.
|
||||||
|
// Signature: TestHookGitOpsFirstNonEmpty(values ...string) string.
|
||||||
|
// Why: revision fallback order should remain deterministic as Flux status
|
||||||
|
// payloads change.
|
||||||
|
func TestHookGitOpsFirstNonEmpty(values ...string) string {
|
||||||
|
return firstNonEmpty(values...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookGitOpsErrorSummary exposes gitOpsErrorSummary to split-module tests.
|
||||||
|
// Signature: TestHookGitOpsErrorSummary(errors map[string]string) string.
|
||||||
|
// Why: sorted scrape warnings are easier to inspect in journald during recovery.
|
||||||
|
func TestHookGitOpsErrorSummary(errors map[string]string) string {
|
||||||
|
return gitOpsErrorSummary(errors)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookFluxReadyMissing exposes the missing Ready-condition branch to tests.
|
||||||
|
// Signature: TestHookFluxReadyMissing() (bool, string).
|
||||||
|
// Why: absent Ready conditions should be treated as not-ready and labelled
|
||||||
|
// explicitly instead of being mistaken for healthy.
|
||||||
|
func TestHookFluxReadyMissing() (bool, string) {
|
||||||
|
return fluxReady(nil)
|
||||||
|
}
|
||||||
@ -172,6 +172,14 @@ def _read_coverage_percent(path: str) -> float:
|
|||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_repo_path(repo_root: Path, path: str) -> Path:
|
||||||
|
"""Resolve quality-gate artifact paths relative to the repository root."""
|
||||||
|
candidate = Path(path)
|
||||||
|
if candidate.is_absolute():
|
||||||
|
return candidate
|
||||||
|
return repo_root / candidate
|
||||||
|
|
||||||
|
|
||||||
def _iter_source_files(repo_root: Path):
|
def _iter_source_files(repo_root: Path):
|
||||||
for rel_root in SOURCE_SCAN_ROOTS:
|
for rel_root in SOURCE_SCAN_ROOTS:
|
||||||
base = repo_root / rel_root
|
base = repo_root / rel_root
|
||||||
@ -367,7 +375,9 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
args = parse_args(argv or sys.argv[1:])
|
args = parse_args(argv or sys.argv[1:])
|
||||||
repo_root = Path(__file__).resolve().parents[1]
|
repo_root = Path(__file__).resolve().parents[1]
|
||||||
build_dir = repo_root / "build"
|
build_dir = repo_root / "build"
|
||||||
gate_rc = _read_exit_code(Path(os.getenv("ANANKE_QUALITY_EXIT_CODE_PATH", str(build_dir / "quality-gate.rc"))))
|
gate_rc = _read_exit_code(
|
||||||
|
_resolve_repo_path(repo_root, os.getenv("ANANKE_QUALITY_EXIT_CODE_PATH", str(build_dir / "quality-gate.rc")))
|
||||||
|
)
|
||||||
current_ok = 1 if gate_rc == 0 else 0
|
current_ok = 1 if gate_rc == 0 else 0
|
||||||
current_failed = 0 if gate_rc == 0 else 1
|
current_failed = 0 if gate_rc == 0 else 1
|
||||||
|
|
||||||
@ -420,13 +430,18 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
elif not already_recorded:
|
elif not already_recorded:
|
||||||
resolved_ok += current_ok
|
resolved_ok += current_ok
|
||||||
resolved_failed += current_failed
|
resolved_failed += current_failed
|
||||||
coverage_percent = _read_coverage_percent(args.coverage_percent_file)
|
coverage_percent = _read_coverage_percent(str(_resolve_repo_path(repo_root, args.coverage_percent_file)))
|
||||||
source_files_total = _count_source_files(repo_root)
|
source_files_total = _count_source_files(repo_root)
|
||||||
source_lines_over_500 = _count_source_files_over_limit(repo_root, max_lines=500)
|
source_lines_over_500 = _count_source_files_over_limit(repo_root, max_lines=500)
|
||||||
quality_output = Path(os.getenv("ANANKE_QUALITY_OUTPUT_FILE", str(build_dir / "quality-gate.out")))
|
quality_output = _resolve_repo_path(
|
||||||
|
repo_root,
|
||||||
|
os.getenv("ANANKE_QUALITY_OUTPUT_FILE", str(build_dir / "quality-gate.out")),
|
||||||
|
)
|
||||||
tests = _parse_go_test_counts(quality_output)
|
tests = _parse_go_test_counts(quality_output)
|
||||||
test_cases = _parse_go_test_cases(quality_output)
|
test_cases = _parse_go_test_cases(quality_output)
|
||||||
docs_status = _read_status(Path(os.getenv("ANANKE_QUALITY_DOCS_STATUS_PATH", str(build_dir / "docs-naming.status"))))
|
docs_status = _read_status(
|
||||||
|
_resolve_repo_path(repo_root, os.getenv("ANANKE_QUALITY_DOCS_STATUS_PATH", str(build_dir / "docs-naming.status")))
|
||||||
|
)
|
||||||
unit_tests_failed = _unit_tests_failed(quality_output, coverage_percent)
|
unit_tests_failed = _unit_tests_failed(quality_output, coverage_percent)
|
||||||
checks = {
|
checks = {
|
||||||
"tests": "failed" if unit_tests_failed or tests["failed"] > 0 or tests["errors"] > 0 else "ok",
|
"tests": "failed" if unit_tests_failed or tests["failed"] > 0 or tests["errors"] > 0 else "ok",
|
||||||
|
|||||||
@ -61,6 +61,18 @@ class PublishQualityMetricsTest(unittest.TestCase):
|
|||||||
self.server.server_close()
|
self.server.server_close()
|
||||||
self.thread.join(timeout=5)
|
self.thread.join(timeout=5)
|
||||||
|
|
||||||
|
def test_relative_artifact_paths_are_repo_rooted(self) -> None:
|
||||||
|
repo_root = Path("/tmp/ananke-repo")
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
publisher._resolve_repo_path(repo_root, "build/coverage-percent.txt"),
|
||||||
|
repo_root / "build" / "coverage-percent.txt",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
publisher._resolve_repo_path(repo_root, "/tmp/coverage-percent.txt"),
|
||||||
|
Path("/tmp/coverage-percent.txt"),
|
||||||
|
)
|
||||||
|
|
||||||
def _env_for_gate_status(self, status: int = 0) -> dict[str, str]:
|
def _env_for_gate_status(self, status: int = 0) -> dict[str, str]:
|
||||||
tmp_dir = tempfile.TemporaryDirectory()
|
tmp_dir = tempfile.TemporaryDirectory()
|
||||||
self.addCleanup(tmp_dir.cleanup)
|
self.addCleanup(tmp_dir.cleanup)
|
||||||
|
|||||||
@ -56,12 +56,13 @@ read_quality_counter() {
|
|||||||
write_quality_metrics() {
|
write_quality_metrics() {
|
||||||
local exit_code="$1"
|
local exit_code="$1"
|
||||||
|
|
||||||
local metrics_dir state_dir
|
local metrics_dir state_dir write_metrics
|
||||||
metrics_dir="$(dirname "${QUALITY_METRICS_FILE}")"
|
metrics_dir="$(dirname "${QUALITY_METRICS_FILE}")"
|
||||||
state_dir="$(dirname "${QUALITY_STATE_FILE}")"
|
state_dir="$(dirname "${QUALITY_STATE_FILE}")"
|
||||||
|
write_metrics="${QUALITY_METRICS_ENABLED}"
|
||||||
mkdir -p "${state_dir}" >/dev/null 2>&1 || return 0
|
mkdir -p "${state_dir}" >/dev/null 2>&1 || return 0
|
||||||
if [[ "${QUALITY_METRICS_ENABLED}" == "1" ]]; then
|
if [[ "${write_metrics}" == "1" ]]; then
|
||||||
mkdir -p "${metrics_dir}" >/dev/null 2>&1 || return 0
|
mkdir -p "${metrics_dir}" >/dev/null 2>&1 || write_metrics=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local ok failed total last_success now success_percent
|
local ok failed total last_success now success_percent
|
||||||
@ -84,12 +85,12 @@ write_quality_metrics() {
|
|||||||
QUALITY_SUCCESS_PERCENT="${success_percent}"
|
QUALITY_SUCCESS_PERCENT="${success_percent}"
|
||||||
|
|
||||||
local tmp_metrics="" tmp_state
|
local tmp_metrics="" tmp_state
|
||||||
if [[ "${QUALITY_METRICS_ENABLED}" == "1" ]]; then
|
if [[ "${write_metrics}" == "1" ]]; then
|
||||||
tmp_metrics="$(mktemp "${metrics_dir}/quality-gate.prom.XXXXXX")"
|
tmp_metrics="$(mktemp "${metrics_dir}/quality-gate.prom.XXXXXX")"
|
||||||
fi
|
fi
|
||||||
tmp_state="$(mktemp "${state_dir}/quality-gate.state.XXXXXX")"
|
tmp_state="$(mktemp "${state_dir}/quality-gate.state.XXXXXX")"
|
||||||
|
|
||||||
if [[ "${QUALITY_METRICS_ENABLED}" == "1" ]]; then
|
if [[ "${write_metrics}" == "1" ]]; then
|
||||||
cat > "${tmp_metrics}" <<EOF
|
cat > "${tmp_metrics}" <<EOF
|
||||||
# HELP ananke_quality_gate_runs_total Total Ananke quality gate runs by status.
|
# HELP ananke_quality_gate_runs_total Total Ananke quality gate runs by status.
|
||||||
# TYPE ananke_quality_gate_runs_total counter
|
# TYPE ananke_quality_gate_runs_total counter
|
||||||
@ -115,7 +116,7 @@ last_run=${now}
|
|||||||
EOF
|
EOF
|
||||||
|
|
||||||
mv -f "${tmp_state}" "${QUALITY_STATE_FILE}"
|
mv -f "${tmp_state}" "${QUALITY_STATE_FILE}"
|
||||||
if [[ "${QUALITY_METRICS_ENABLED}" == "1" ]]; then
|
if [[ "${write_metrics}" == "1" ]]; then
|
||||||
mv -f "${tmp_metrics}" "${QUALITY_METRICS_FILE}"
|
mv -f "${tmp_metrics}" "${QUALITY_METRICS_FILE}"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
@ -146,6 +147,8 @@ publish_quality_metrics() {
|
|||||||
quality_gate_finalize() {
|
quality_gate_finalize() {
|
||||||
local exit_code="$1"
|
local exit_code="$1"
|
||||||
set +e
|
set +e
|
||||||
|
printf '%s\n' "${exit_code}" > "${BUILD_DIR}/quality-gate.rc" 2>/dev/null || true
|
||||||
|
cd "${REPO_DIR}" 2>/dev/null || true
|
||||||
write_quality_metrics "${exit_code}" || true
|
write_quality_metrics "${exit_code}" || true
|
||||||
publish_quality_metrics || true
|
publish_quality_metrics || true
|
||||||
exit "${exit_code}"
|
exit "${exit_code}"
|
||||||
@ -193,4 +196,7 @@ printf '%s\n' "${coverage_percent}" > "${COVERAGE_PERCENT_FILE}"
|
|||||||
|
|
||||||
echo "[quality] per-file coverage gate (95%)"
|
echo "[quality] per-file coverage gate (95%)"
|
||||||
cd testing
|
cd testing
|
||||||
ANANKE_ENFORCE_COVERAGE=1 ANANKE_PER_FILE_COVERAGE_TARGET=95 go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
ANANKE_ENFORCE_COVERAGE=1 \
|
||||||
|
ANANKE_PER_FILE_COVERAGE_TARGET=95 \
|
||||||
|
ANANKE_PER_FILE_COVERAGE_PERCENT_FILE="${COVERAGE_PERCENT_FILE}" \
|
||||||
|
go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||||
|
|||||||
@ -121,6 +121,7 @@ func TestPerFileCoverageReport(t *testing.T) {
|
|||||||
"./metrics",
|
"./metrics",
|
||||||
"./hygiene",
|
"./hygiene",
|
||||||
"./orchestrator",
|
"./orchestrator",
|
||||||
|
"./service",
|
||||||
"./state",
|
"./state",
|
||||||
"./sshutil",
|
"./sshutil",
|
||||||
"./ups",
|
"./ups",
|
||||||
@ -167,17 +168,26 @@ func TestPerFileCoverageReport(t *testing.T) {
|
|||||||
var report bytes.Buffer
|
var report bytes.Buffer
|
||||||
report.WriteString("per-file coverage\n")
|
report.WriteString("per-file coverage\n")
|
||||||
under := make([]string, 0)
|
under := make([]string, 0)
|
||||||
|
minPercent := 100.0
|
||||||
for _, file := range keys {
|
for _, file := range keys {
|
||||||
fc := byFile[file]
|
fc := byFile[file]
|
||||||
pct := 100.0
|
pct := 100.0
|
||||||
if fc.total > 0 {
|
if fc.total > 0 {
|
||||||
pct = (fc.covered / fc.total) * 100.0
|
pct = (fc.covered / fc.total) * 100.0
|
||||||
}
|
}
|
||||||
|
if pct < minPercent {
|
||||||
|
minPercent = pct
|
||||||
|
}
|
||||||
report.WriteString(fmt.Sprintf("- %s: %.1f%%\n", file, pct))
|
report.WriteString(fmt.Sprintf("- %s: %.1f%%\n", file, pct))
|
||||||
if pct < target {
|
if pct < target {
|
||||||
under = append(under, fmt.Sprintf("%s (%.1f%% < %.1f%%)", file, pct, target))
|
under = append(under, fmt.Sprintf("%s (%.1f%% < %.1f%%)", file, pct, target))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if outputPath := strings.TrimSpace(os.Getenv("ANANKE_PER_FILE_COVERAGE_PERCENT_FILE")); outputPath != "" {
|
||||||
|
if err := os.WriteFile(outputPath, []byte(fmt.Sprintf("%.1f\n", minPercent)), 0o644); err != nil {
|
||||||
|
t.Fatalf("write per-file coverage percent: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
t.Log(report.String())
|
t.Log(report.String())
|
||||||
|
|
||||||
if len(under) > 0 && enforce {
|
if len(under) > 0 && enforce {
|
||||||
|
|||||||
@ -104,3 +104,64 @@ func TestExporterHelperContracts(t *testing.T) {
|
|||||||
t.Fatalf("unexpected escaped string: %q", got)
|
t.Fatalf("unexpected escaped string: %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestExporterEmitsGitOpsMetrics runs one orchestration or CLI step.
|
||||||
|
// Signature: TestExporterEmitsGitOpsMetrics(t *testing.T).
|
||||||
|
// Why: the overview dashboard depends on Ananke-owned Flux object-state metrics
|
||||||
|
// rather than the narrower controller metrics exposed by Flux itself.
|
||||||
|
func TestExporterEmitsGitOpsMetrics(t *testing.T) {
|
||||||
|
exporter := metrics.New()
|
||||||
|
exporter.UpdateGitOpsSnapshot(metrics.GitOpsSnapshot{
|
||||||
|
UpdatedAt: time.Unix(1710000100, 0).UTC(),
|
||||||
|
ScrapeSuccess: true,
|
||||||
|
FluxSources: []metrics.GitOpsFluxSource{{
|
||||||
|
Namespace: "flux-system",
|
||||||
|
Name: "flux-system",
|
||||||
|
URL: "ssh://git@example/repo.git",
|
||||||
|
Branch: "main",
|
||||||
|
Revision: "main@sha1:abc123",
|
||||||
|
Ready: true,
|
||||||
|
Reason: "Succeeded",
|
||||||
|
}},
|
||||||
|
Kustomizations: []metrics.GitOpsKustomization{{
|
||||||
|
Namespace: "flux-system",
|
||||||
|
Name: "monitoring",
|
||||||
|
Path: "./services/monitoring",
|
||||||
|
SourceNamespace: "flux-system",
|
||||||
|
SourceName: "flux-system",
|
||||||
|
Revision: "main@sha1:abc123",
|
||||||
|
Ready: true,
|
||||||
|
Reason: "ReconciliationSucceeded",
|
||||||
|
}},
|
||||||
|
HelmReleases: []metrics.GitOpsHelmRelease{{
|
||||||
|
Namespace: "monitoring",
|
||||||
|
Name: "grafana",
|
||||||
|
Chart: "grafana",
|
||||||
|
Version: "8.5.0",
|
||||||
|
AppVersion: "11.4.0",
|
||||||
|
Revision: "sha256:abc123",
|
||||||
|
Ready: false,
|
||||||
|
Reason: "Progressing",
|
||||||
|
Suspended: true,
|
||||||
|
}},
|
||||||
|
})
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
exporter.Handler("/metrics").ServeHTTP(rr, req)
|
||||||
|
body := rr.Body.String()
|
||||||
|
|
||||||
|
mustContain := []string{
|
||||||
|
"ananke_gitops_last_scrape_timestamp_seconds 1710000100",
|
||||||
|
"ananke_gitops_scrape_success 1",
|
||||||
|
`ananke_gitops_flux_source_info{namespace="flux-system",name="flux-system",url="ssh://git@example/repo.git",branch="main",revision="main@sha1:abc123",ready="true",reason="Succeeded"} 1`,
|
||||||
|
`ananke_gitops_kustomization_ready{namespace="flux-system",name="monitoring"} 1`,
|
||||||
|
`ananke_gitops_helmrelease_info{namespace="monitoring",name="grafana",chart="grafana",version="8.5.0",app_version="11.4.0",revision="sha256:abc123",ready="false",reason="Progressing"} 1`,
|
||||||
|
`ananke_gitops_helmrelease_suspended{namespace="monitoring",name="grafana"} 1`,
|
||||||
|
}
|
||||||
|
for _, fragment := range mustContain {
|
||||||
|
if !strings.Contains(body, fragment) {
|
||||||
|
t.Fatalf("missing GitOps metric fragment %q in output:\n%s", fragment, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
316
testing/service/gitops_snapshot_test.go
Normal file
316
testing/service/gitops_snapshot_test.go
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
package servicequality
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/service"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestParseGitRepositories runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseGitRepositories(t *testing.T).
|
||||||
|
// Why: branch, revision, and Ready status are the labels the Grafana GitOps
|
||||||
|
// panels rely on for the current Flux source.
|
||||||
|
func TestParseGitRepositories(t *testing.T) {
|
||||||
|
raw := []byte(`{
|
||||||
|
"items": [{
|
||||||
|
"metadata": {"name": "flux-system", "namespace": "flux-system"},
|
||||||
|
"spec": {"url": "ssh://git@example/repo.git", "ref": {"branch": "main"}},
|
||||||
|
"status": {
|
||||||
|
"artifact": {"revision": "main@sha1:abc123"},
|
||||||
|
"conditions": [{"type": "Ready", "status": "True", "reason": "Succeeded"}]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}`)
|
||||||
|
got, err := service.TestHookParseGitRepositories(raw)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseGitRepositories failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != 1 || got[0].Branch != "main" || got[0].Revision != "main@sha1:abc123" || !got[0].Ready {
|
||||||
|
t.Fatalf("unexpected GitRepository parse result: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseKustomizations runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseKustomizations(t *testing.T).
|
||||||
|
// Why: Kustomization source and suspend labels power the detailed GitOps table.
|
||||||
|
func TestParseKustomizations(t *testing.T) {
|
||||||
|
raw := []byte(`{
|
||||||
|
"items": [{
|
||||||
|
"metadata": {"name": "monitoring", "namespace": "flux-system"},
|
||||||
|
"spec": {
|
||||||
|
"path": "./services/monitoring",
|
||||||
|
"sourceRef": {"name": "flux-system"},
|
||||||
|
"suspend": true
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"lastAppliedRevision": "main@sha1:def456",
|
||||||
|
"conditions": [{"type": "Ready", "status": "False", "reason": "DependencyNotReady"}]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}`)
|
||||||
|
got, err := service.TestHookParseKustomizations(raw)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseKustomizations failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != 1 || got[0].SourceNamespace != "flux-system" || got[0].Ready || !got[0].Suspended {
|
||||||
|
t.Fatalf("unexpected Kustomization parse result: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseHelmReleases runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseHelmReleases(t *testing.T).
|
||||||
|
// Why: HelmRelease chart fields have separate status fallbacks from plain Flux
|
||||||
|
// Kustomizations and need their own contract coverage.
|
||||||
|
func TestParseHelmReleases(t *testing.T) {
|
||||||
|
raw := []byte(`{
|
||||||
|
"items": [{
|
||||||
|
"metadata": {"name": "grafana", "namespace": "monitoring"},
|
||||||
|
"spec": {
|
||||||
|
"chart": {"spec": {"chart": "grafana", "version": "~8.5.0"}}
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"lastAttemptedRevision": "8.5.8",
|
||||||
|
"history": [{"chartName": "grafana", "chartVersion": "8.5.8", "appVersion": "11.4.0", "digest": "sha256:abc"}],
|
||||||
|
"conditions": [{"type": "Ready", "status": "True", "reason": "InstallSucceeded"}]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}`)
|
||||||
|
got, err := service.TestHookParseHelmReleases(raw)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseHelmReleases failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != 1 || got[0].Chart != "grafana" || got[0].Version != "8.5.8" || got[0].Revision != "sha256:abc" || !got[0].Ready {
|
||||||
|
t.Fatalf("unexpected HelmRelease parse result: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCollectGitOpsSnapshotPartialFailure runs one orchestration or CLI step.
|
||||||
|
// Signature: TestCollectGitOpsSnapshotPartialFailure(t *testing.T).
|
||||||
|
// Why: a recovering cluster may serve some Flux resource families before
|
||||||
|
// others, and Ananke should publish a partial snapshot instead of failing closed.
|
||||||
|
func TestCollectGitOpsSnapshotPartialFailure(t *testing.T) {
|
||||||
|
runner := func(_ context.Context, _ string, args ...string) ([]byte, error) {
|
||||||
|
joined := strings.Join(args, " ")
|
||||||
|
switch {
|
||||||
|
case strings.Contains(joined, "gitrepositories"):
|
||||||
|
return []byte(`{"items":[]}`), nil
|
||||||
|
case strings.Contains(joined, "kustomizations"):
|
||||||
|
return nil, errors.New("api unavailable")
|
||||||
|
case strings.Contains(joined, "helmreleases"):
|
||||||
|
return []byte(`{"items":[]}`), nil
|
||||||
|
default:
|
||||||
|
return nil, errors.New("unexpected command")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot := service.TestHookCollectGitOpsSnapshotWithRunner(context.Background(), config.Config{Kubeconfig: "/tmp/kubeconfig"}, runner)
|
||||||
|
if snapshot.ScrapeSuccess {
|
||||||
|
t.Fatalf("expected partial failure")
|
||||||
|
}
|
||||||
|
if snapshot.Errors["kustomization"] == "" {
|
||||||
|
t.Fatalf("expected kustomization error, got %+v", snapshot.Errors)
|
||||||
|
}
|
||||||
|
if summary := service.TestHookGitOpsErrorSummary(snapshot.Errors); !strings.Contains(summary, "kustomization=api unavailable") {
|
||||||
|
t.Fatalf("unexpected error summary: %s", summary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCollectGitOpsSnapshotParseFailures runs one orchestration or CLI step.
|
||||||
|
// Signature: TestCollectGitOpsSnapshotParseFailures(t *testing.T).
|
||||||
|
// Why: malformed API payloads should be surfaced per resource family so the
|
||||||
|
// exporter can distinguish data errors from transport failures.
|
||||||
|
func TestCollectGitOpsSnapshotParseFailures(t *testing.T) {
|
||||||
|
runner := func(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
||||||
|
return []byte(`not-json`), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot := service.TestHookCollectGitOpsSnapshotWithRunner(context.Background(), config.Config{}, runner)
|
||||||
|
if snapshot.ScrapeSuccess {
|
||||||
|
t.Fatalf("expected parse failures")
|
||||||
|
}
|
||||||
|
for _, key := range []string{"gitrepository", "kustomization", "helmrelease"} {
|
||||||
|
if snapshot.Errors[key] == "" {
|
||||||
|
t.Fatalf("expected %s parse error, got %+v", key, snapshot.Errors)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCollectGitOpsSnapshotSuccess runs one orchestration or CLI step.
|
||||||
|
// Signature: TestCollectGitOpsSnapshotSuccess(t *testing.T).
|
||||||
|
// Why: successful collection should preserve non-branch Git refs as the source
|
||||||
|
// label rather than hiding them as unknown.
|
||||||
|
func TestCollectGitOpsSnapshotSuccess(t *testing.T) {
|
||||||
|
runner := func(_ context.Context, _ string, args ...string) ([]byte, error) {
|
||||||
|
joined := strings.Join(args, " ")
|
||||||
|
switch {
|
||||||
|
case strings.Contains(joined, "gitrepositories"):
|
||||||
|
return []byte(`{"items":[{"metadata":{"name":"flux-system","namespace":"flux-system"},"spec":{"ref":{"tag":"v1.0.0"}}}]}`), nil
|
||||||
|
case strings.Contains(joined, "kustomizations"):
|
||||||
|
return []byte(`{"items":[]}`), nil
|
||||||
|
case strings.Contains(joined, "helmreleases"):
|
||||||
|
return []byte(`{"items":[]}`), nil
|
||||||
|
default:
|
||||||
|
return nil, errors.New("unexpected command")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot := service.TestHookCollectGitOpsSnapshotWithRunner(context.Background(), config.Config{}, runner)
|
||||||
|
if !snapshot.ScrapeSuccess || len(snapshot.FluxSources) != 1 || snapshot.FluxSources[0].Branch != "v1.0.0" {
|
||||||
|
t.Fatalf("unexpected snapshot: %+v", snapshot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMaybeStartGitOpsSnapshot runs one orchestration or CLI step.
|
||||||
|
// Signature: TestMaybeStartGitOpsSnapshot(t *testing.T).
|
||||||
|
// Why: Ananke must collect GitOps status asynchronously so a slow Kubernetes API
|
||||||
|
// cannot delay UPS polling.
|
||||||
|
func TestMaybeStartGitOpsSnapshot(t *testing.T) {
|
||||||
|
runner := func(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
||||||
|
return []byte(`{"items":[]}`), nil
|
||||||
|
}
|
||||||
|
exporter := metrics.New()
|
||||||
|
done := make(chan struct{}, 1)
|
||||||
|
lastRun := time.Time{}
|
||||||
|
cfg := config.Config{GitOps: config.GitOps{Enabled: true, PollSeconds: 1}}
|
||||||
|
withRunner := func() bool {
|
||||||
|
return service.TestHookCollectGitOpsSnapshotWithRunner(context.Background(), config.Config{}, runner).ScrapeSuccess
|
||||||
|
}
|
||||||
|
if !withRunner() {
|
||||||
|
t.Fatalf("expected injected runner sanity check to pass")
|
||||||
|
}
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshotWithRunner(context.Background(), cfg, exporter, log.New(io.Discard, "", 0), &lastRun, false, done, runner); !running {
|
||||||
|
t.Fatalf("expected scrape to start")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatalf("timed out waiting for scrape")
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
exporter.Handler("/metrics").ServeHTTP(rr, req)
|
||||||
|
if !strings.Contains(rr.Body.String(), "ananke_gitops_scrape_success") {
|
||||||
|
t.Fatalf("expected GitOps scrape metric, got:\n%s", rr.Body.String())
|
||||||
|
}
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshot(context.Background(), cfg, exporter, log.New(io.Discard, "", 0), &lastRun, true, done); !running {
|
||||||
|
t.Fatalf("running scrape should stay marked running")
|
||||||
|
}
|
||||||
|
cfg.GitOps.Enabled = false
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshot(context.Background(), cfg, exporter, nil, &lastRun, false, done); running {
|
||||||
|
t.Fatalf("disabled GitOps scrape should not start")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunGitOpsKubectlOutput runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunGitOpsKubectlOutput(t *testing.T).
|
||||||
|
// Why: command diagnostics should preserve stderr for operator-visible errors.
|
||||||
|
func TestRunGitOpsKubectlOutput(t *testing.T) {
|
||||||
|
out, err := service.TestHookRunGitOpsKubectlOutput(context.Background(), "sh", "-c", "printf ok")
|
||||||
|
if err != nil || string(out) != "ok" {
|
||||||
|
t.Fatalf("unexpected command success result: out=%q err=%v", out, err)
|
||||||
|
}
|
||||||
|
if _, err := service.TestHookRunGitOpsKubectlOutput(context.Background(), "sh", "-c", "echo bad >&2; exit 7"); err == nil || !strings.Contains(err.Error(), "bad") {
|
||||||
|
t.Fatalf("expected stderr to be preserved in error, got %v", err)
|
||||||
|
}
|
||||||
|
if _, err := service.TestHookRunGitOpsKubectlOutput(context.Background(), "sh", "-c", "exit 8"); err == nil {
|
||||||
|
t.Fatalf("expected empty-stderr command failure")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGitOpsParseErrors runs one orchestration or CLI step.
|
||||||
|
// Signature: TestGitOpsParseErrors(t *testing.T).
|
||||||
|
// Why: invalid JSON should return parse errors for every Flux resource parser.
|
||||||
|
func TestGitOpsParseErrors(t *testing.T) {
|
||||||
|
if _, err := service.TestHookParseGitRepositories([]byte(`not-json`)); err == nil {
|
||||||
|
t.Fatalf("expected GitRepository parse error")
|
||||||
|
}
|
||||||
|
if _, err := service.TestHookParseKustomizations([]byte(`not-json`)); err == nil {
|
||||||
|
t.Fatalf("expected Kustomization parse error")
|
||||||
|
}
|
||||||
|
if _, err := service.TestHookParseHelmReleases([]byte(`not-json`)); err == nil {
|
||||||
|
t.Fatalf("expected HelmRelease parse error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMaybeStartGitOpsSnapshotSkipsFreshSampleAndNilExporter runs one
|
||||||
|
// orchestration or CLI step.
|
||||||
|
// Signature: TestMaybeStartGitOpsSnapshotSkipsFreshSampleAndNilExporter(t *testing.T).
|
||||||
|
// Why: disabled or already-fresh scrapes should be cheap no-ops in the daemon
|
||||||
|
// loop.
|
||||||
|
func TestMaybeStartGitOpsSnapshotSkipsFreshSampleAndNilExporter(t *testing.T) {
|
||||||
|
cfg := config.Config{GitOps: config.GitOps{Enabled: true, PollSeconds: 60}}
|
||||||
|
done := make(chan struct{}, 1)
|
||||||
|
lastRun := time.Now()
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshot(context.Background(), cfg, nil, nil, &lastRun, false, done); running {
|
||||||
|
t.Fatalf("nil exporter should not start scrape")
|
||||||
|
}
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshot(context.Background(), cfg, metrics.New(), nil, &lastRun, false, done); running {
|
||||||
|
t.Fatalf("fresh sample should not start another scrape")
|
||||||
|
}
|
||||||
|
runner := func(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
||||||
|
return []byte(`{"items":[]}`), nil
|
||||||
|
}
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshotWithRunner(context.Background(), cfg, metrics.New(), nil, &lastRun, true, done, runner); !running {
|
||||||
|
t.Fatalf("already-running scrape should remain marked running")
|
||||||
|
}
|
||||||
|
cfg.GitOps.Enabled = false
|
||||||
|
lastRun = time.Time{}
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshotWithRunner(context.Background(), cfg, metrics.New(), nil, &lastRun, false, done, runner); running {
|
||||||
|
t.Fatalf("disabled scrape should not start through runner hook")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMaybeStartGitOpsSnapshotLogsPartialFailureWithoutLogger runs one
|
||||||
|
// orchestration or CLI step.
|
||||||
|
// Signature: TestMaybeStartGitOpsSnapshotLogsPartialFailureWithoutLogger(t *testing.T).
|
||||||
|
// Why: scrape failure handling should not depend on a logger being present.
|
||||||
|
func TestMaybeStartGitOpsSnapshotLogsPartialFailureWithoutLogger(t *testing.T) {
|
||||||
|
done := make(chan struct{}, 1)
|
||||||
|
lastRun := time.Time{}
|
||||||
|
cfg := config.Config{GitOps: config.GitOps{Enabled: true, PollSeconds: 1}}
|
||||||
|
runner := func(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
||||||
|
return nil, errors.New("api down")
|
||||||
|
}
|
||||||
|
if running := service.TestHookMaybeStartGitOpsSnapshotWithRunner(context.Background(), cfg, metrics.New(), nil, &lastRun, false, done, runner); !running {
|
||||||
|
t.Fatalf("expected scrape to start")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatalf("timed out waiting for failed scrape")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGitOpsHelpers runs one orchestration or CLI step.
|
||||||
|
// Signature: TestGitOpsHelpers(t *testing.T).
|
||||||
|
// Why: tiny label helpers carry dashboard readability contracts and are worth
|
||||||
|
// locking down.
|
||||||
|
func TestGitOpsHelpers(t *testing.T) {
|
||||||
|
if got := service.TestHookGitOpsDefaultString(" ", "fallback"); got != "fallback" {
|
||||||
|
t.Fatalf("unexpected defaultString fallback: %q", got)
|
||||||
|
}
|
||||||
|
if got := service.TestHookGitOpsFirstNonEmpty(" ", "second"); got != "second" {
|
||||||
|
t.Fatalf("unexpected firstNonEmpty result: %q", got)
|
||||||
|
}
|
||||||
|
if got := service.TestHookGitOpsFirstNonEmpty(" ", ""); got != "" {
|
||||||
|
t.Fatalf("unexpected empty firstNonEmpty result: %q", got)
|
||||||
|
}
|
||||||
|
if got := service.TestHookGitOpsErrorSummary(nil); got != "none" {
|
||||||
|
t.Fatalf("unexpected empty summary: %q", got)
|
||||||
|
}
|
||||||
|
ready, reason := service.TestHookFluxReadyMissing()
|
||||||
|
if ready || reason != "ReadyMissing" {
|
||||||
|
t.Fatalf("unexpected empty Ready condition parse: ready=%t reason=%s", ready, reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user