ananke: refactor orchestrator, enforce quality gates, and harden startup checks
This commit is contained in:
parent
baead1426e
commit
c2c79e5821
19
Makefile
19
Makefile
@ -1,4 +1,4 @@
|
|||||||
.PHONY: build test fmt tidy install drill-list drill-run
|
.PHONY: build test test-all quality-gate hygiene lint coverage-report coverage-gate fmt tidy install drill-list drill-run
|
||||||
|
|
||||||
build:
|
build:
|
||||||
go build -o dist/ananke ./cmd/ananke
|
go build -o dist/ananke ./cmd/ananke
|
||||||
@ -6,6 +6,23 @@ build:
|
|||||||
test:
|
test:
|
||||||
go test ./...
|
go test ./...
|
||||||
|
|
||||||
|
test-all: test hygiene lint coverage-report
|
||||||
|
|
||||||
|
quality-gate:
|
||||||
|
./scripts/quality_gate.sh
|
||||||
|
|
||||||
|
hygiene:
|
||||||
|
cd testing && go test ./hygiene
|
||||||
|
|
||||||
|
lint:
|
||||||
|
./scripts/lint.sh
|
||||||
|
|
||||||
|
coverage-report:
|
||||||
|
cd testing && go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||||
|
|
||||||
|
coverage-gate:
|
||||||
|
cd testing && ANANKE_ENFORCE_COVERAGE=1 ANANKE_PER_FILE_COVERAGE_TARGET=95 go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||||
|
|
||||||
fmt:
|
fmt:
|
||||||
gofmt -w ./cmd ./internal
|
gofmt -w ./cmd ./internal
|
||||||
|
|
||||||
|
|||||||
@ -48,6 +48,9 @@ startup:
|
|||||||
api_poll_seconds: 2
|
api_poll_seconds: 2
|
||||||
shutdown_cooldown_seconds: 45
|
shutdown_cooldown_seconds: 45
|
||||||
minimum_battery_percent: 20
|
minimum_battery_percent: 20
|
||||||
|
require_node_inventory_reachability: true
|
||||||
|
node_inventory_reachability_wait_seconds: 300
|
||||||
|
node_inventory_reachability_poll_seconds: 5
|
||||||
required_node_labels:
|
required_node_labels:
|
||||||
titan-09:
|
titan-09:
|
||||||
ananke.bstein.dev/harbor-bootstrap: "true"
|
ananke.bstein.dev/harbor-bootstrap: "true"
|
||||||
@ -78,6 +81,15 @@ startup:
|
|||||||
service_checklist_wait_seconds: 420
|
service_checklist_wait_seconds: 420
|
||||||
service_checklist_poll_seconds: 5
|
service_checklist_poll_seconds: 5
|
||||||
service_checklist_stability_seconds: 120
|
service_checklist_stability_seconds: 120
|
||||||
|
service_checklist_auth:
|
||||||
|
mode: keycloak_robotuser
|
||||||
|
keycloak_base_url: https://sso.bstein.dev
|
||||||
|
realm: atlas
|
||||||
|
robot_username: robotuser
|
||||||
|
admin_secret_namespace: sso
|
||||||
|
admin_secret_name: keycloak-admin
|
||||||
|
admin_secret_username_key: username
|
||||||
|
admin_secret_password_key: password
|
||||||
service_checklist:
|
service_checklist:
|
||||||
- name: gitea-api
|
- name: gitea-api
|
||||||
url: https://scm.bstein.dev/api/healthz
|
url: https://scm.bstein.dev/api/healthz
|
||||||
@ -99,10 +111,20 @@ startup:
|
|||||||
accepted_statuses: [401]
|
accepted_statuses: [401]
|
||||||
body_contains: unauthorized
|
body_contains: unauthorized
|
||||||
timeout_seconds: 12
|
timeout_seconds: 12
|
||||||
- name: longhorn-auth
|
- name: longhorn-api-user-session
|
||||||
url: https://longhorn.bstein.dev/
|
url: https://longhorn.bstein.dev/v1
|
||||||
accepted_statuses: [200, 302]
|
accepted_statuses: [200]
|
||||||
|
require_robot_auth: true
|
||||||
|
follow_redirects: true
|
||||||
|
final_url_contains: /v1
|
||||||
|
final_url_not_contains: /oauth2/sign_in
|
||||||
|
body_contains: '"id":"v1"'
|
||||||
timeout_seconds: 12
|
timeout_seconds: 12
|
||||||
|
require_critical_service_endpoints: true
|
||||||
|
critical_service_endpoint_wait_seconds: 420
|
||||||
|
critical_service_endpoint_poll_seconds: 5
|
||||||
|
critical_service_endpoints:
|
||||||
|
- monitoring/victoria-metrics-single-server
|
||||||
require_ingress_checklist: true
|
require_ingress_checklist: true
|
||||||
ingress_checklist_wait_seconds: 420
|
ingress_checklist_wait_seconds: 420
|
||||||
ingress_checklist_poll_seconds: 5
|
ingress_checklist_poll_seconds: 5
|
||||||
@ -139,10 +161,6 @@ shutdown:
|
|||||||
drain_parallelism: 6
|
drain_parallelism: 6
|
||||||
scale_parallelism: 8
|
scale_parallelism: 8
|
||||||
ssh_parallelism: 8
|
ssh_parallelism: 8
|
||||||
poweroff_enabled: false
|
|
||||||
poweroff_delay_seconds: 25
|
|
||||||
poweroff_local_host: false
|
|
||||||
extra_poweroff_hosts: []
|
|
||||||
ups:
|
ups:
|
||||||
enabled: true
|
enabled: true
|
||||||
provider: nut
|
provider: nut
|
||||||
@ -170,6 +188,7 @@ metrics:
|
|||||||
path: /metrics
|
path: /metrics
|
||||||
state:
|
state:
|
||||||
dir: /var/lib/ananke
|
dir: /var/lib/ananke
|
||||||
|
reports_dir: /var/lib/ananke/reports
|
||||||
run_history_path: /var/lib/ananke/runs.json
|
run_history_path: /var/lib/ananke/runs.json
|
||||||
lock_path: /var/lib/ananke/ananke.lock
|
lock_path: /var/lib/ananke/ananke.lock
|
||||||
intent_path: /var/lib/ananke/intent.json
|
intent_path: /var/lib/ananke/intent.json
|
||||||
|
|||||||
@ -114,6 +114,9 @@ startup:
|
|||||||
api_poll_seconds: 2
|
api_poll_seconds: 2
|
||||||
shutdown_cooldown_seconds: 45
|
shutdown_cooldown_seconds: 45
|
||||||
minimum_battery_percent: 20
|
minimum_battery_percent: 20
|
||||||
|
require_node_inventory_reachability: true
|
||||||
|
node_inventory_reachability_wait_seconds: 300
|
||||||
|
node_inventory_reachability_poll_seconds: 5
|
||||||
required_node_labels:
|
required_node_labels:
|
||||||
titan-09:
|
titan-09:
|
||||||
ananke.bstein.dev/harbor-bootstrap: "true"
|
ananke.bstein.dev/harbor-bootstrap: "true"
|
||||||
@ -144,6 +147,15 @@ startup:
|
|||||||
service_checklist_wait_seconds: 420
|
service_checklist_wait_seconds: 420
|
||||||
service_checklist_poll_seconds: 5
|
service_checklist_poll_seconds: 5
|
||||||
service_checklist_stability_seconds: 120
|
service_checklist_stability_seconds: 120
|
||||||
|
service_checklist_auth:
|
||||||
|
mode: keycloak_robotuser
|
||||||
|
keycloak_base_url: https://sso.bstein.dev
|
||||||
|
realm: atlas
|
||||||
|
robot_username: robotuser
|
||||||
|
admin_secret_namespace: sso
|
||||||
|
admin_secret_name: keycloak-admin
|
||||||
|
admin_secret_username_key: username
|
||||||
|
admin_secret_password_key: password
|
||||||
service_checklist:
|
service_checklist:
|
||||||
- name: gitea-api
|
- name: gitea-api
|
||||||
url: https://scm.bstein.dev/api/healthz
|
url: https://scm.bstein.dev/api/healthz
|
||||||
@ -165,10 +177,20 @@ startup:
|
|||||||
accepted_statuses: [401]
|
accepted_statuses: [401]
|
||||||
body_contains: unauthorized
|
body_contains: unauthorized
|
||||||
timeout_seconds: 12
|
timeout_seconds: 12
|
||||||
- name: longhorn-auth
|
- name: longhorn-api-user-session
|
||||||
url: https://longhorn.bstein.dev/
|
url: https://longhorn.bstein.dev/v1
|
||||||
accepted_statuses: [200, 302]
|
accepted_statuses: [200]
|
||||||
|
require_robot_auth: true
|
||||||
|
follow_redirects: true
|
||||||
|
final_url_contains: /v1
|
||||||
|
final_url_not_contains: /oauth2/sign_in
|
||||||
|
body_contains: '"id":"v1"'
|
||||||
timeout_seconds: 12
|
timeout_seconds: 12
|
||||||
|
require_critical_service_endpoints: true
|
||||||
|
critical_service_endpoint_wait_seconds: 420
|
||||||
|
critical_service_endpoint_poll_seconds: 5
|
||||||
|
critical_service_endpoints:
|
||||||
|
- monitoring/victoria-metrics-single-server
|
||||||
require_ingress_checklist: true
|
require_ingress_checklist: true
|
||||||
ingress_checklist_wait_seconds: 420
|
ingress_checklist_wait_seconds: 420
|
||||||
ingress_checklist_poll_seconds: 5
|
ingress_checklist_poll_seconds: 5
|
||||||
@ -205,10 +227,6 @@ shutdown:
|
|||||||
drain_parallelism: 6
|
drain_parallelism: 6
|
||||||
scale_parallelism: 8
|
scale_parallelism: 8
|
||||||
ssh_parallelism: 8
|
ssh_parallelism: 8
|
||||||
poweroff_enabled: false
|
|
||||||
poweroff_delay_seconds: 25
|
|
||||||
poweroff_local_host: false
|
|
||||||
extra_poweroff_hosts: []
|
|
||||||
ups:
|
ups:
|
||||||
enabled: true
|
enabled: true
|
||||||
provider: nut
|
provider: nut
|
||||||
@ -236,6 +254,7 @@ metrics:
|
|||||||
path: /metrics
|
path: /metrics
|
||||||
state:
|
state:
|
||||||
dir: /var/lib/ananke
|
dir: /var/lib/ananke
|
||||||
|
reports_dir: /var/lib/ananke/reports
|
||||||
run_history_path: /var/lib/ananke/runs.json
|
run_history_path: /var/lib/ananke/runs.json
|
||||||
lock_path: /var/lib/ananke/ananke.lock
|
lock_path: /var/lib/ananke/ananke.lock
|
||||||
intent_path: /var/lib/ananke/intent.json
|
intent_path: /var/lib/ananke/intent.json
|
||||||
|
|||||||
@ -114,6 +114,9 @@ startup:
|
|||||||
api_poll_seconds: 2
|
api_poll_seconds: 2
|
||||||
shutdown_cooldown_seconds: 45
|
shutdown_cooldown_seconds: 45
|
||||||
minimum_battery_percent: 20
|
minimum_battery_percent: 20
|
||||||
|
require_node_inventory_reachability: true
|
||||||
|
node_inventory_reachability_wait_seconds: 300
|
||||||
|
node_inventory_reachability_poll_seconds: 5
|
||||||
required_node_labels:
|
required_node_labels:
|
||||||
titan-09:
|
titan-09:
|
||||||
ananke.bstein.dev/harbor-bootstrap: "true"
|
ananke.bstein.dev/harbor-bootstrap: "true"
|
||||||
@ -144,6 +147,15 @@ startup:
|
|||||||
service_checklist_wait_seconds: 420
|
service_checklist_wait_seconds: 420
|
||||||
service_checklist_poll_seconds: 5
|
service_checklist_poll_seconds: 5
|
||||||
service_checklist_stability_seconds: 120
|
service_checklist_stability_seconds: 120
|
||||||
|
service_checklist_auth:
|
||||||
|
mode: keycloak_robotuser
|
||||||
|
keycloak_base_url: https://sso.bstein.dev
|
||||||
|
realm: atlas
|
||||||
|
robot_username: robotuser
|
||||||
|
admin_secret_namespace: sso
|
||||||
|
admin_secret_name: keycloak-admin
|
||||||
|
admin_secret_username_key: username
|
||||||
|
admin_secret_password_key: password
|
||||||
service_checklist:
|
service_checklist:
|
||||||
- name: gitea-api
|
- name: gitea-api
|
||||||
url: https://scm.bstein.dev/api/healthz
|
url: https://scm.bstein.dev/api/healthz
|
||||||
@ -165,10 +177,20 @@ startup:
|
|||||||
accepted_statuses: [401]
|
accepted_statuses: [401]
|
||||||
body_contains: unauthorized
|
body_contains: unauthorized
|
||||||
timeout_seconds: 12
|
timeout_seconds: 12
|
||||||
- name: longhorn-auth
|
- name: longhorn-api-user-session
|
||||||
url: https://longhorn.bstein.dev/
|
url: https://longhorn.bstein.dev/v1
|
||||||
accepted_statuses: [200, 302]
|
accepted_statuses: [200]
|
||||||
|
require_robot_auth: true
|
||||||
|
follow_redirects: true
|
||||||
|
final_url_contains: /v1
|
||||||
|
final_url_not_contains: /oauth2/sign_in
|
||||||
|
body_contains: '"id":"v1"'
|
||||||
timeout_seconds: 12
|
timeout_seconds: 12
|
||||||
|
require_critical_service_endpoints: true
|
||||||
|
critical_service_endpoint_wait_seconds: 420
|
||||||
|
critical_service_endpoint_poll_seconds: 5
|
||||||
|
critical_service_endpoints:
|
||||||
|
- monitoring/victoria-metrics-single-server
|
||||||
require_ingress_checklist: true
|
require_ingress_checklist: true
|
||||||
ingress_checklist_wait_seconds: 420
|
ingress_checklist_wait_seconds: 420
|
||||||
ingress_checklist_poll_seconds: 5
|
ingress_checklist_poll_seconds: 5
|
||||||
@ -205,10 +227,6 @@ shutdown:
|
|||||||
drain_parallelism: 6
|
drain_parallelism: 6
|
||||||
scale_parallelism: 8
|
scale_parallelism: 8
|
||||||
ssh_parallelism: 8
|
ssh_parallelism: 8
|
||||||
poweroff_enabled: false
|
|
||||||
poweroff_delay_seconds: 25
|
|
||||||
poweroff_local_host: false
|
|
||||||
extra_poweroff_hosts: []
|
|
||||||
ups:
|
ups:
|
||||||
enabled: true
|
enabled: true
|
||||||
provider: nut
|
provider: nut
|
||||||
@ -236,6 +254,7 @@ metrics:
|
|||||||
path: /metrics
|
path: /metrics
|
||||||
state:
|
state:
|
||||||
dir: /var/lib/ananke
|
dir: /var/lib/ananke
|
||||||
|
reports_dir: /var/lib/ananke/reports
|
||||||
run_history_path: /var/lib/ananke/runs.json
|
run_history_path: /var/lib/ananke/runs.json
|
||||||
lock_path: /var/lib/ananke/ananke.lock
|
lock_path: /var/lib/ananke/ananke.lock
|
||||||
intent_path: /var/lib/ananke/intent.json
|
intent_path: /var/lib/ananke/intent.json
|
||||||
|
|||||||
286
internal/cluster/orchestrator_service_auth.go
Normal file
286
internal/cluster/orchestrator_service_auth.go
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
package cluster
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/cookiejar"
|
||||||
|
neturl "net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
type keycloakTokenResponse struct {
|
||||||
|
AccessToken string `json:"access_token"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type keycloakUser struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type keycloakImpersonationResponse struct {
|
||||||
|
Redirect string `json:"redirect"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type kubernetesSecret struct {
|
||||||
|
Data map[string]string `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// checklistAuthHTTPClient runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error).
|
||||||
|
// Why: startup checklist checks that require real user behavior need an
|
||||||
|
// authenticated robotuser browser-like session before probing service pages.
|
||||||
|
func (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) {
|
||||||
|
jar, err := cookiejar.New(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("create cookie jar: %w", err)
|
||||||
|
}
|
||||||
|
transport := &http.Transport{}
|
||||||
|
if insecureSkipTLS {
|
||||||
|
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
||||||
|
}
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: timeout,
|
||||||
|
Transport: transport,
|
||||||
|
Jar: jar,
|
||||||
|
}
|
||||||
|
if err := o.authenticateRobotChecklistSession(ctx, client); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return client, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// authenticateRobotChecklistSession runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error.
|
||||||
|
// Why: authenticated checklist probes must reflect what a human sees after
|
||||||
|
// Keycloak login, not only pre-auth redirects.
|
||||||
|
func (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error {
|
||||||
|
auth := o.cfg.Startup.ServiceChecklistAuth
|
||||||
|
mode := strings.TrimSpace(auth.Mode)
|
||||||
|
if mode == "" || mode == "none" {
|
||||||
|
return fmt.Errorf("startup checklist auth mode is disabled")
|
||||||
|
}
|
||||||
|
if mode != "keycloak_robotuser" {
|
||||||
|
return fmt.Errorf("unsupported startup checklist auth mode %q", mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
adminUser, adminPassword, err := o.keycloakAdminCredentials(ctx, auth)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
adminToken, err := o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
robotUserID, err := o.keycloakRobotUserID(ctx, client, auth, adminToken)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
redirectURL, err := o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(redirectURL) == "" {
|
||||||
|
redirectURL = keycloakBaseURL(auth) + "/realms/" + strings.TrimSpace(auth.Realm) + "/account/"
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirectURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("build robot redirect request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("initialize robot session redirect: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// keycloakAdminCredentials runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error).
|
||||||
|
// Why: robotuser impersonation uses a cluster-managed admin secret so startup
|
||||||
|
// checks do not rely on interactive credentials.
|
||||||
|
func (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) {
|
||||||
|
namespace := strings.TrimSpace(auth.AdminSecretNamespace)
|
||||||
|
name := strings.TrimSpace(auth.AdminSecretName)
|
||||||
|
userKey := strings.TrimSpace(auth.AdminSecretUsernameKey)
|
||||||
|
passwordKey := strings.TrimSpace(auth.AdminSecretPasswordKey)
|
||||||
|
|
||||||
|
username, err := o.kubernetesSecretValue(ctx, namespace, name, userKey)
|
||||||
|
if err != nil {
|
||||||
|
return "", "", fmt.Errorf("read keycloak admin username from secret %s/%s: %w", namespace, name, err)
|
||||||
|
}
|
||||||
|
password, err := o.kubernetesSecretValue(ctx, namespace, name, passwordKey)
|
||||||
|
if err != nil {
|
||||||
|
return "", "", fmt.Errorf("read keycloak admin password from secret %s/%s: %w", namespace, name, err)
|
||||||
|
}
|
||||||
|
return username, password, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// kubernetesSecretValue runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error).
|
||||||
|
// Why: checklist auth depends on secret-backed credentials and should decode
|
||||||
|
// them directly from Kubernetes rather than shelling out to external tools.
|
||||||
|
func (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) {
|
||||||
|
out, err := o.kubectl(ctx, 25*time.Second, "-n", namespace, "get", "secret", name, "-o", "json")
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("kubectl get secret: %w", err)
|
||||||
|
}
|
||||||
|
var doc kubernetesSecret
|
||||||
|
if err := json.Unmarshal([]byte(out), &doc); err != nil {
|
||||||
|
return "", fmt.Errorf("decode secret json: %w", err)
|
||||||
|
}
|
||||||
|
encoded, ok := doc.Data[key]
|
||||||
|
if !ok {
|
||||||
|
return "", fmt.Errorf("key %q not present in secret", key)
|
||||||
|
}
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(encoded))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("decode base64 secret value: %w", err)
|
||||||
|
}
|
||||||
|
value := strings.TrimSpace(string(decoded))
|
||||||
|
if value == "" {
|
||||||
|
return "", fmt.Errorf("decoded value is empty")
|
||||||
|
}
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// keycloakAdminToken runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error).
|
||||||
|
// Why: admin API access is needed to impersonate robotuser for deterministic
|
||||||
|
// user-journey checks across OIDC-gated services.
|
||||||
|
func (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) {
|
||||||
|
form := neturl.Values{}
|
||||||
|
form.Set("grant_type", "password")
|
||||||
|
form.Set("client_id", "admin-cli")
|
||||||
|
form.Set("username", adminUser)
|
||||||
|
form.Set("password", adminPassword)
|
||||||
|
|
||||||
|
tokenURL := keycloakBaseURL(auth) + "/realms/master/protocol/openid-connect/token"
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(form.Encode()))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("build admin token request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("request admin token: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||||
|
if resp.StatusCode/100 != 2 {
|
||||||
|
return "", fmt.Errorf("admin token request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var payload keycloakTokenResponse
|
||||||
|
if err := json.Unmarshal(body, &payload); err != nil {
|
||||||
|
return "", fmt.Errorf("decode admin token response: %w", err)
|
||||||
|
}
|
||||||
|
token := strings.TrimSpace(payload.AccessToken)
|
||||||
|
if token == "" {
|
||||||
|
return "", fmt.Errorf("admin token response missing access_token")
|
||||||
|
}
|
||||||
|
return token, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// keycloakRobotUserID runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error).
|
||||||
|
// Why: impersonation requires the concrete user id and should fail fast when
|
||||||
|
// robotuser is missing from the realm.
|
||||||
|
func (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) {
|
||||||
|
base := keycloakBaseURL(auth)
|
||||||
|
realm := strings.TrimSpace(auth.Realm)
|
||||||
|
username := strings.TrimSpace(auth.RobotUsername)
|
||||||
|
query := neturl.Values{}
|
||||||
|
query.Set("username", username)
|
||||||
|
query.Set("exact", "true")
|
||||||
|
usersURL := base + "/admin/realms/" + realm + "/users?" + query.Encode()
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, usersURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("build robot user lookup request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Authorization", "Bearer "+adminToken)
|
||||||
|
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("lookup robot user: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||||
|
if resp.StatusCode/100 != 2 {
|
||||||
|
return "", fmt.Errorf("robot user lookup failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var users []keycloakUser
|
||||||
|
if err := json.Unmarshal(body, &users); err != nil {
|
||||||
|
return "", fmt.Errorf("decode robot user lookup response: %w", err)
|
||||||
|
}
|
||||||
|
if len(users) == 0 || strings.TrimSpace(users[0].ID) == "" {
|
||||||
|
return "", fmt.Errorf("robot user %q not found in realm %q", username, realm)
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(users[0].ID), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// keycloakImpersonationRedirect runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error).
|
||||||
|
// Why: opening a real impersonated browser session guarantees checks evaluate
|
||||||
|
// post-login app behavior instead of only auth-gateway redirects.
|
||||||
|
func (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) {
|
||||||
|
base := keycloakBaseURL(auth)
|
||||||
|
realm := strings.TrimSpace(auth.Realm)
|
||||||
|
impersonateURL := base + "/admin/realms/" + realm + "/users/" + strings.TrimSpace(robotUserID) + "/impersonation"
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, impersonateURL, http.NoBody)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("build robot impersonation request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Authorization", "Bearer "+adminToken)
|
||||||
|
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("request robot impersonation: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||||
|
if resp.StatusCode/100 != 2 {
|
||||||
|
return "", fmt.Errorf("robot impersonation failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var payload keycloakImpersonationResponse
|
||||||
|
if err := json.Unmarshal(body, &payload); err != nil {
|
||||||
|
return "", fmt.Errorf("decode robot impersonation response: %w", err)
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(payload.Redirect), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// keycloakBaseURL runs one orchestration or CLI step.
|
||||||
|
// Signature: keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string.
|
||||||
|
// Why: centralizing URL normalization keeps auth request construction stable.
|
||||||
|
func keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string {
|
||||||
|
return strings.TrimRight(strings.TrimSpace(auth.KeycloakBaseURL), "/")
|
||||||
|
}
|
||||||
|
|
||||||
|
// compactHTTPBody runs one orchestration or CLI step.
|
||||||
|
// Signature: compactHTTPBody(raw []byte) string.
|
||||||
|
// Why: checklist auth errors should include a readable body summary without
|
||||||
|
// leaking multi-line payload noise into orchestrator logs.
|
||||||
|
func compactHTTPBody(raw []byte) string {
|
||||||
|
text := strings.TrimSpace(string(raw))
|
||||||
|
if text == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.Join(strings.Fields(text), " ")
|
||||||
|
}
|
||||||
@ -184,6 +184,16 @@ func (o *Orchestrator) serviceCheckReady(ctx context.Context, check config.Servi
|
|||||||
return false, fmt.Sprintf("location header contained forbidden marker %q", locationNotContains)
|
return false, fmt.Sprintf("location header contained forbidden marker %q", locationNotContains)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finalURLContains := strings.TrimSpace(check.FinalURLContains)
|
||||||
|
if finalURLContains != "" && !checklistContains(result.FinalURL, finalURLContains) {
|
||||||
|
return false, fmt.Sprintf("final url missing expected marker %q", finalURLContains)
|
||||||
|
}
|
||||||
|
|
||||||
|
finalURLNotContains := strings.TrimSpace(check.FinalURLNotContains)
|
||||||
|
if finalURLNotContains != "" && checklistContains(result.FinalURL, finalURLNotContains) {
|
||||||
|
return false, fmt.Sprintf("final url contained forbidden marker %q", finalURLNotContains)
|
||||||
|
}
|
||||||
|
|
||||||
bodyContains := strings.TrimSpace(check.BodyContains)
|
bodyContains := strings.TrimSpace(check.BodyContains)
|
||||||
if bodyContains != "" && !checklistContains(result.Body, bodyContains) {
|
if bodyContains != "" && !checklistContains(result.Body, bodyContains) {
|
||||||
return false, fmt.Sprintf("response missing expected marker %q", bodyContains)
|
return false, fmt.Sprintf("response missing expected marker %q", bodyContains)
|
||||||
@ -201,6 +211,7 @@ type checklistHTTPProbeResult struct {
|
|||||||
Status int
|
Status int
|
||||||
Body string
|
Body string
|
||||||
Location string
|
Location string
|
||||||
|
FinalURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
// httpChecklistProbeResult runs one orchestration or CLI step.
|
// httpChecklistProbeResult runs one orchestration or CLI step.
|
||||||
@ -209,13 +220,14 @@ type checklistHTTPProbeResult struct {
|
|||||||
// addition to status/body so startup can validate real user-facing behavior.
|
// addition to status/body so startup can validate real user-facing behavior.
|
||||||
func (o *Orchestrator) httpChecklistProbeResult(ctx context.Context, check config.ServiceChecklistCheck) (checklistHTTPProbeResult, error) {
|
func (o *Orchestrator) httpChecklistProbeResult(ctx context.Context, check config.ServiceChecklistCheck) (checklistHTTPProbeResult, error) {
|
||||||
result := checklistHTTPProbeResult{}
|
result := checklistHTTPProbeResult{}
|
||||||
status, body, location, err := o.httpChecklistProbeWithLocation(ctx, check)
|
status, body, location, finalURL, err := o.httpChecklistProbeWithLocation(ctx, check)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, err
|
return result, err
|
||||||
}
|
}
|
||||||
result.Status = status
|
result.Status = status
|
||||||
result.Body = body
|
result.Body = body
|
||||||
result.Location = location
|
result.Location = location
|
||||||
|
result.FinalURL = finalURL
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,50 +235,66 @@ func (o *Orchestrator) httpChecklistProbeResult(ctx context.Context, check confi
|
|||||||
// Signature: (o *Orchestrator) httpChecklistProbe(ctx context.Context, check config.ServiceChecklistCheck) (int, string, error).
|
// Signature: (o *Orchestrator) httpChecklistProbe(ctx context.Context, check config.ServiceChecklistCheck) (int, string, error).
|
||||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (o *Orchestrator) httpChecklistProbe(ctx context.Context, check config.ServiceChecklistCheck) (int, string, error) {
|
func (o *Orchestrator) httpChecklistProbe(ctx context.Context, check config.ServiceChecklistCheck) (int, string, error) {
|
||||||
status, body, _, err := o.httpChecklistProbeWithLocation(ctx, check)
|
status, body, _, _, err := o.httpChecklistProbeWithLocation(ctx, check)
|
||||||
return status, body, err
|
return status, body, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// httpChecklistProbeWithLocation runs one orchestration or CLI step.
|
// httpChecklistProbeWithLocation runs one orchestration or CLI step.
|
||||||
// Signature: (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, error).
|
// Signature: (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error).
|
||||||
// Why: redirects and auth gates require location-header assertions to prevent
|
// Why: redirects and auth gates require location-header assertions to prevent
|
||||||
// startup false-positives on partially healthy protected services.
|
// startup false-positives on partially healthy protected services.
|
||||||
func (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, error) {
|
func (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error) {
|
||||||
timeout := time.Duration(check.TimeoutSeconds) * time.Second
|
timeout := time.Duration(check.TimeoutSeconds) * time.Second
|
||||||
if timeout <= 0 {
|
if timeout <= 0 {
|
||||||
timeout = 12 * time.Second
|
timeout = 12 * time.Second
|
||||||
}
|
}
|
||||||
|
|
||||||
|
followRedirects := check.FollowRedirects || check.RequireRobotAuth
|
||||||
|
var client *http.Client
|
||||||
|
if check.RequireRobotAuth {
|
||||||
|
authClient, authErr := o.checklistAuthHTTPClient(ctx, timeout, check.InsecureSkipTLS)
|
||||||
|
if authErr != nil {
|
||||||
|
return 0, "", "", "", fmt.Errorf("initialize robotuser checklist session: %w", authErr)
|
||||||
|
}
|
||||||
|
client = authClient
|
||||||
|
} else {
|
||||||
transport := &http.Transport{}
|
transport := &http.Transport{}
|
||||||
if check.InsecureSkipTLS {
|
if check.InsecureSkipTLS {
|
||||||
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
||||||
}
|
}
|
||||||
client := &http.Client{
|
client = &http.Client{
|
||||||
Timeout: timeout,
|
Timeout: timeout,
|
||||||
Transport: transport,
|
Transport: transport,
|
||||||
CheckRedirect: func(_ *http.Request, _ []*http.Request) error {
|
}
|
||||||
|
}
|
||||||
|
if !followRedirects {
|
||||||
|
client.CheckRedirect = func(_ *http.Request, _ []*http.Request) error {
|
||||||
return http.ErrUseLastResponse
|
return http.ErrUseLastResponse
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, strings.TrimSpace(check.URL), nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, strings.TrimSpace(check.URL), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, "", "", fmt.Errorf("build request: %w", err)
|
return 0, "", "", "", fmt.Errorf("build request: %w", err)
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||||
|
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, "", "", fmt.Errorf("request failed: %w", err)
|
return 0, "", "", "", fmt.Errorf("request failed: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
body, readErr := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
body, readErr := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||||
if readErr != nil {
|
if readErr != nil {
|
||||||
return resp.StatusCode, "", "", fmt.Errorf("read response body: %w", readErr)
|
return resp.StatusCode, "", "", "", fmt.Errorf("read response body: %w", readErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
return resp.StatusCode, string(body), strings.TrimSpace(resp.Header.Get("Location")), nil
|
finalURL := strings.TrimSpace(req.URL.String())
|
||||||
|
if resp.Request != nil && resp.Request.URL != nil {
|
||||||
|
finalURL = strings.TrimSpace(resp.Request.URL.String())
|
||||||
|
}
|
||||||
|
return resp.StatusCode, string(body), strings.TrimSpace(resp.Header.Get("Location")), finalURL, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// checklistContains runs one orchestration or CLI step.
|
// checklistContains runs one orchestration or CLI step.
|
||||||
|
|||||||
@ -329,6 +329,80 @@ func TestServiceCheckReadyRejectsMissingLocationMarker(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestServiceCheckReadyRequiresFinalURLContains runs one orchestration or CLI step.
|
||||||
|
// Signature: TestServiceCheckReadyRequiresFinalURLContains(t *testing.T).
|
||||||
|
// Why: authenticated user-journey checks depend on final URL assertions after
|
||||||
|
// redirects complete, not only on initial response status.
|
||||||
|
func TestServiceCheckReadyRequiresFinalURLContains(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path == "/" {
|
||||||
|
http.Redirect(w, r, "/app/home", http.StatusFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.URL.Path == "/app/home" {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("OpenSearch Dashboards"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
orch := &Orchestrator{
|
||||||
|
log: log.New(os.Stdout, "", 0),
|
||||||
|
}
|
||||||
|
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
|
||||||
|
Name: "logging-ui-user-session",
|
||||||
|
URL: srv.URL,
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
FollowRedirects: true,
|
||||||
|
FinalURLContains: "/app/home",
|
||||||
|
BodyContains: "OpenSearch Dashboards",
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
})
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected final-url-aware service check to pass, detail=%s", detail)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestServiceCheckReadyRejectsForbiddenFinalURLMarker runs one orchestration or CLI step.
|
||||||
|
// Signature: TestServiceCheckReadyRejectsForbiddenFinalURLMarker(t *testing.T).
|
||||||
|
// Why: user-session checks should fail when final URL indicates auth/login loop
|
||||||
|
// instead of the expected post-login app route.
|
||||||
|
func TestServiceCheckReadyRejectsForbiddenFinalURLMarker(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path == "/" {
|
||||||
|
http.Redirect(w, r, "/oauth2/sign_in", http.StatusFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.URL.Path == "/oauth2/sign_in" {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("sign in"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
orch := &Orchestrator{
|
||||||
|
log: log.New(os.Stdout, "", 0),
|
||||||
|
}
|
||||||
|
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
|
||||||
|
Name: "logging-ui-user-session",
|
||||||
|
URL: srv.URL,
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
FollowRedirects: true,
|
||||||
|
FinalURLNotContains: "/oauth2/sign_in",
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
})
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("expected forbidden final-url marker check to fail")
|
||||||
|
}
|
||||||
|
if !strings.Contains(detail, "final url contained forbidden marker") {
|
||||||
|
t.Fatalf("expected final-url forbidden marker detail, got %q", detail)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestChecklistFailureHostFromIngressDetail runs one orchestration or CLI step.
|
// TestChecklistFailureHostFromIngressDetail runs one orchestration or CLI step.
|
||||||
// Signature: TestChecklistFailureHostFromIngressDetail(t *testing.T).
|
// Signature: TestChecklistFailureHostFromIngressDetail(t *testing.T).
|
||||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
@ -385,59 +459,3 @@ func TestChecklistFailureHostUnknown(t *testing.T) {
|
|||||||
t.Fatalf("expected empty host for unknown check, got %q", got)
|
t.Fatalf("expected empty host for unknown check, got %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestStuckVaultInitReasonDetectsHungInit runs one orchestration or CLI step.
|
|
||||||
// Signature: TestStuckVaultInitReasonDetectsHungInit(t *testing.T).
|
|
||||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
|
||||||
func TestStuckVaultInitReasonDetectsHungInit(t *testing.T) {
|
|
||||||
var pod podResource
|
|
||||||
pod.Status.Phase = "Pending"
|
|
||||||
pod.Metadata.Annotations = map[string]string{
|
|
||||||
"vault.hashicorp.com/agent-inject": "true",
|
|
||||||
}
|
|
||||||
pod.Status.InitContainerStatuses = []podContainerStatus{
|
|
||||||
{
|
|
||||||
Name: "vault-agent-init",
|
|
||||||
State: podContainerState{
|
|
||||||
Running: &podContainerRunningState{
|
|
||||||
StartedAt: time.Now().Add(-10 * time.Minute),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
reason := stuckVaultInitReason(pod, 3*time.Minute)
|
|
||||||
if reason != "VaultInitStuck" {
|
|
||||||
t.Fatalf("expected VaultInitStuck reason, got %q", reason)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods runs one orchestration or CLI step.
|
|
||||||
// Signature: TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T).
|
|
||||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
|
||||||
func TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T) {
|
|
||||||
var pod podResource
|
|
||||||
pod.Status.Phase = "Pending"
|
|
||||||
pod.Metadata.Annotations = map[string]string{
|
|
||||||
"vault.hashicorp.com/agent-inject": "true",
|
|
||||||
}
|
|
||||||
pod.Status.InitContainerStatuses = []podContainerStatus{
|
|
||||||
{
|
|
||||||
Name: "vault-agent-init",
|
|
||||||
State: podContainerState{
|
|
||||||
Running: &podContainerRunningState{
|
|
||||||
StartedAt: time.Now().Add(-30 * time.Second),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
|
||||||
t.Fatalf("expected no reason for fresh init, got %q", reason)
|
|
||||||
}
|
|
||||||
|
|
||||||
pod.Metadata.Annotations["vault.hashicorp.com/agent-inject"] = "false"
|
|
||||||
pod.Status.InitContainerStatuses[0].State.Running.StartedAt = time.Now().Add(-10 * time.Minute)
|
|
||||||
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
|
||||||
t.Fatalf("expected no reason for non-vault pod, got %q", reason)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
62
internal/cluster/orchestrator_vault_test.go
Normal file
62
internal/cluster/orchestrator_vault_test.go
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package cluster
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestStuckVaultInitReasonDetectsHungInit runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStuckVaultInitReasonDetectsHungInit(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestStuckVaultInitReasonDetectsHungInit(t *testing.T) {
|
||||||
|
var pod podResource
|
||||||
|
pod.Status.Phase = "Pending"
|
||||||
|
pod.Metadata.Annotations = map[string]string{
|
||||||
|
"vault.hashicorp.com/agent-inject": "true",
|
||||||
|
}
|
||||||
|
pod.Status.InitContainerStatuses = []podContainerStatus{
|
||||||
|
{
|
||||||
|
Name: "vault-agent-init",
|
||||||
|
State: podContainerState{
|
||||||
|
Running: &podContainerRunningState{
|
||||||
|
StartedAt: time.Now().Add(-10 * time.Minute),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reason := stuckVaultInitReason(pod, 3*time.Minute)
|
||||||
|
if reason != "VaultInitStuck" {
|
||||||
|
t.Fatalf("expected VaultInitStuck reason, got %q", reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T) {
|
||||||
|
var pod podResource
|
||||||
|
pod.Status.Phase = "Pending"
|
||||||
|
pod.Metadata.Annotations = map[string]string{
|
||||||
|
"vault.hashicorp.com/agent-inject": "true",
|
||||||
|
}
|
||||||
|
pod.Status.InitContainerStatuses = []podContainerStatus{
|
||||||
|
{
|
||||||
|
Name: "vault-agent-init",
|
||||||
|
State: podContainerState{
|
||||||
|
Running: &podContainerRunningState{
|
||||||
|
StartedAt: time.Now().Add(-30 * time.Second),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
||||||
|
t.Fatalf("expected no reason for fresh init, got %q", reason)
|
||||||
|
}
|
||||||
|
|
||||||
|
pod.Metadata.Annotations["vault.hashicorp.com/agent-inject"] = "false"
|
||||||
|
pod.Status.InitContainerStatuses[0].State.Running.StartedAt = time.Now().Add(-10 * time.Minute)
|
||||||
|
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
||||||
|
t.Fatalf("expected no reason for non-vault pod, got %q", reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
79
internal/cluster/testing_hooks_auth.go
Normal file
79
internal/cluster/testing_hooks_auth.go
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
package cluster
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestHookChecklistAuthHTTPClient runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookChecklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error).
|
||||||
|
// Why: exposes checklist auth client/session bootstrap internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookChecklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) {
|
||||||
|
return o.checklistAuthHTTPClient(ctx, timeout, insecureSkipTLS)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookAuthenticateRobotChecklistSession runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookAuthenticateRobotChecklistSession(ctx context.Context, client *http.Client) error.
|
||||||
|
// Why: exposes robotuser auth session internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookAuthenticateRobotChecklistSession(ctx context.Context, client *http.Client) error {
|
||||||
|
return o.authenticateRobotChecklistSession(ctx, client)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookKubernetesSecretValue runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookKubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error).
|
||||||
|
// Why: exposes Kubernetes secret decode internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookKubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) {
|
||||||
|
return o.kubernetesSecretValue(ctx, namespace, name, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookKeycloakAdminCredentials runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookKeycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error).
|
||||||
|
// Why: exposes secret-backed credential resolution internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookKeycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) {
|
||||||
|
return o.keycloakAdminCredentials(ctx, auth)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookKeycloakAdminToken runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookKeycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error).
|
||||||
|
// Why: exposes Keycloak admin token acquisition internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookKeycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) {
|
||||||
|
return o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookKeycloakRobotUserID runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookKeycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error).
|
||||||
|
// Why: exposes Keycloak robot-user lookup internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookKeycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) {
|
||||||
|
return o.keycloakRobotUserID(ctx, client, auth, adminToken)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookKeycloakImpersonationRedirect runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookKeycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error).
|
||||||
|
// Why: exposes Keycloak impersonation internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookKeycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) {
|
||||||
|
return o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookHTTPChecklistProbeWithLocation runs one orchestration or CLI step.
|
||||||
|
// Signature: (o *Orchestrator) TestHookHTTPChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error).
|
||||||
|
// Why: exposes redirect-aware checklist probe internals to top-level tests.
|
||||||
|
func (o *Orchestrator) TestHookHTTPChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error) {
|
||||||
|
return o.httpChecklistProbeWithLocation(ctx, check)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookKeycloakBaseURL runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookKeycloakBaseURL(auth config.ServiceChecklistAuthSettings) string.
|
||||||
|
// Why: exposes base URL normalizer helper to top-level tests.
|
||||||
|
func TestHookKeycloakBaseURL(auth config.ServiceChecklistAuthSettings) string {
|
||||||
|
return keycloakBaseURL(auth)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookCompactHTTPBody runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookCompactHTTPBody(raw []byte) string.
|
||||||
|
// Why: exposes compact HTTP body helper to top-level tests.
|
||||||
|
func TestHookCompactHTTPBody(raw []byte) string {
|
||||||
|
return compactHTTPBody(raw)
|
||||||
|
}
|
||||||
@ -97,6 +97,30 @@ func (c *Config) applyDefaults() {
|
|||||||
if c.Startup.ServiceChecklistStabilitySec < 0 {
|
if c.Startup.ServiceChecklistStabilitySec < 0 {
|
||||||
c.Startup.ServiceChecklistStabilitySec = 0
|
c.Startup.ServiceChecklistStabilitySec = 0
|
||||||
}
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Mode) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.Mode = "keycloak_robotuser"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.KeycloakBaseURL) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.KeycloakBaseURL = "https://sso.bstein.dev"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Realm) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.Realm = "atlas"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.RobotUsername) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.RobotUsername = "robotuser"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretNamespace) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.AdminSecretNamespace = "sso"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretName) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.AdminSecretName = "keycloak-admin"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey = "username"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey) == "" {
|
||||||
|
c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey = "password"
|
||||||
|
}
|
||||||
c.Startup.ServiceChecklist = mergeServiceChecklistDefaults(c.Startup.ServiceChecklist, defaultServiceChecklist())
|
c.Startup.ServiceChecklist = mergeServiceChecklistDefaults(c.Startup.ServiceChecklist, defaultServiceChecklist())
|
||||||
for i := range c.Startup.ServiceChecklist {
|
for i := range c.Startup.ServiceChecklist {
|
||||||
if c.Startup.ServiceChecklist[i].TimeoutSeconds <= 0 {
|
if c.Startup.ServiceChecklist[i].TimeoutSeconds <= 0 {
|
||||||
|
|||||||
@ -207,6 +207,58 @@ func TestValidateRejectsBadServiceChecklistURL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestValidateRejectsUnknownServiceChecklistAuthMode runs one orchestration or CLI step.
|
||||||
|
// Signature: TestValidateRejectsUnknownServiceChecklistAuthMode(t *testing.T).
|
||||||
|
// Why: authenticated user-journey checklist gates should fail fast when auth
|
||||||
|
// mode is invalid to avoid silent false-positive startup passes.
|
||||||
|
func TestValidateRejectsUnknownServiceChecklistAuthMode(t *testing.T) {
|
||||||
|
cfg := defaults()
|
||||||
|
cfg.Startup.ServiceChecklistAuth.Mode = "bad-mode"
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected validation error for invalid service checklist auth mode")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestValidateRejectsFinalURLMarkersWithoutRedirectFollow runs one orchestration or CLI step.
|
||||||
|
// Signature: TestValidateRejectsFinalURLMarkersWithoutRedirectFollow(t *testing.T).
|
||||||
|
// Why: final-url assertions only make sense when redirect following is enabled.
|
||||||
|
func TestValidateRejectsFinalURLMarkersWithoutRedirectFollow(t *testing.T) {
|
||||||
|
cfg := defaults()
|
||||||
|
cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
|
||||||
|
{
|
||||||
|
Name: "bad-final-url",
|
||||||
|
URL: "https://logs.bstein.dev/",
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
FinalURLContains: "/app/home",
|
||||||
|
TimeoutSeconds: 12,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected validation error for final_url_* markers without redirect follow")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled runs one orchestration or CLI step.
|
||||||
|
// Signature: TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled(t *testing.T).
|
||||||
|
// Why: robot-auth checks must be blocked when checklist auth mode is disabled.
|
||||||
|
func TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled(t *testing.T) {
|
||||||
|
cfg := defaults()
|
||||||
|
cfg.Startup.ServiceChecklistAuth.Mode = "none"
|
||||||
|
cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
|
||||||
|
{
|
||||||
|
Name: "logs-ui",
|
||||||
|
URL: "https://logs.bstein.dev/",
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
TimeoutSeconds: 12,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected validation error for robot-auth checklist check when auth mode is none")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestValidateRejectsBadIgnoreFluxKustomizationFormat runs one orchestration or CLI step.
|
// TestValidateRejectsBadIgnoreFluxKustomizationFormat runs one orchestration or CLI step.
|
||||||
// Signature: TestValidateRejectsBadIgnoreFluxKustomizationFormat(t *testing.T).
|
// Signature: TestValidateRejectsBadIgnoreFluxKustomizationFormat(t *testing.T).
|
||||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
@ -291,8 +343,8 @@ func TestApplyDefaultsMergesServiceChecklistDefaults(t *testing.T) {
|
|||||||
if _, ok := names["custom-smoke"]; !ok {
|
if _, ok := names["custom-smoke"]; !ok {
|
||||||
t.Fatalf("expected custom checklist entry to be preserved")
|
t.Fatalf("expected custom checklist entry to be preserved")
|
||||||
}
|
}
|
||||||
if _, ok := names["logging-oidc-redirect"]; !ok {
|
if _, ok := names["logging-ui-user-session"]; !ok {
|
||||||
t.Fatalf("expected default logging redirect check to be merged in")
|
t.Fatalf("expected default logging user-session check to be merged in")
|
||||||
}
|
}
|
||||||
if _, ok := names["vaultwarden-ui"]; !ok {
|
if _, ok := names["vaultwarden-ui"]; !ok {
|
||||||
t.Fatalf("expected default vaultwarden check to be merged in")
|
t.Fatalf("expected default vaultwarden check to be merged in")
|
||||||
|
|||||||
@ -81,6 +81,16 @@ func defaults() Config {
|
|||||||
ServiceChecklistWaitSeconds: 420,
|
ServiceChecklistWaitSeconds: 420,
|
||||||
ServiceChecklistPollSeconds: 5,
|
ServiceChecklistPollSeconds: 5,
|
||||||
ServiceChecklistStabilitySec: 120,
|
ServiceChecklistStabilitySec: 120,
|
||||||
|
ServiceChecklistAuth: ServiceChecklistAuthSettings{
|
||||||
|
Mode: "keycloak_robotuser",
|
||||||
|
KeycloakBaseURL: "https://sso.bstein.dev",
|
||||||
|
Realm: "atlas",
|
||||||
|
RobotUsername: "robotuser",
|
||||||
|
AdminSecretNamespace: "sso",
|
||||||
|
AdminSecretName: "keycloak-admin",
|
||||||
|
AdminSecretUsernameKey: "username",
|
||||||
|
AdminSecretPasswordKey: "password",
|
||||||
|
},
|
||||||
ServiceChecklist: defaultServiceChecklist(),
|
ServiceChecklist: defaultServiceChecklist(),
|
||||||
RequireCriticalServiceEndpoints: true,
|
RequireCriticalServiceEndpoints: true,
|
||||||
CriticalServiceEndpointWaitSec: 420,
|
CriticalServiceEndpointWaitSec: 420,
|
||||||
|
|||||||
@ -44,10 +44,12 @@ func defaultServiceChecklist() []ServiceChecklistCheck {
|
|||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "auth-gateway-redirect",
|
Name: "auth-gateway-user-session",
|
||||||
URL: "https://auth.bstein.dev/",
|
URL: "https://auth.bstein.dev/",
|
||||||
AcceptedStatuses: []int{302},
|
AcceptedStatuses: []int{200},
|
||||||
LocationContains: "https://sso.bstein.dev/realms/atlas/",
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
BodyContains: "Authenticated",
|
||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -121,17 +123,32 @@ func defaultServiceChecklist() []ServiceChecklistCheck {
|
|||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "logging-oidc-redirect",
|
Name: "logging-ui-user-session",
|
||||||
URL: "https://logs.bstein.dev/",
|
URL: "https://logs.bstein.dev/",
|
||||||
AcceptedStatuses: []int{302},
|
AcceptedStatuses: []int{200},
|
||||||
LocationContains: "client_id=logs",
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
FinalURLNotContains: "/protocol/openid-connect/auth",
|
||||||
|
BodyContains: "OpenSearch Dashboards",
|
||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "longhorn-oidc-redirect",
|
Name: "logging-api-user-session",
|
||||||
URL: "https://longhorn.bstein.dev/",
|
URL: "https://logs.bstein.dev/api/status",
|
||||||
AcceptedStatuses: []int{302},
|
AcceptedStatuses: []int{200},
|
||||||
LocationContains: "https://sso.bstein.dev/realms/atlas/",
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
BodyContains: "\"state\":\"green\"",
|
||||||
|
TimeoutSeconds: 12,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "longhorn-api-user-session",
|
||||||
|
URL: "https://longhorn.bstein.dev/v1",
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
FinalURLNotContains: "/protocol/openid-connect/auth",
|
||||||
|
BodyContains: "\"id\":\"v1\"",
|
||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -190,17 +207,24 @@ func defaultServiceChecklist() []ServiceChecklistCheck {
|
|||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "sentinel-oidc-redirect",
|
Name: "sentinel-user-session",
|
||||||
URL: "https://sentinel.bstein.dev/",
|
URL: "https://sentinel.bstein.dev/healthz",
|
||||||
AcceptedStatuses: []int{302},
|
AcceptedStatuses: []int{200},
|
||||||
LocationContains: "client_id=metis",
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
FinalURLNotContains: "/protocol/openid-connect/auth",
|
||||||
|
BodyContains: "ok",
|
||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "keycloak-admin-redirect",
|
Name: "keycloak-admin-user-session",
|
||||||
URL: "https://sso.bstein.dev/",
|
URL: "https://sso.bstein.dev/admin/",
|
||||||
AcceptedStatuses: []int{302},
|
AcceptedStatuses: []int{200},
|
||||||
LocationContains: "https://sso.bstein.dev/admin/",
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
FinalURLContains: "/admin/master/console/",
|
||||||
|
FinalURLNotContains: "/login-actions/authenticate",
|
||||||
|
BodyContains: "Keycloak Administration Console",
|
||||||
TimeoutSeconds: 12,
|
TimeoutSeconds: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -253,23 +277,23 @@ func mergeServiceChecklistDefaults(existing, defaults []ServiceChecklistCheck) [
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
byName := map[string]struct{}{}
|
defaultByName := map[string]struct{}{}
|
||||||
for _, check := range existing {
|
|
||||||
name := strings.TrimSpace(check.Name)
|
|
||||||
if name == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
byName[name] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
out := make([]ServiceChecklistCheck, 0, len(existing)+len(defaults))
|
|
||||||
out = append(out, existing...)
|
|
||||||
for _, check := range defaults {
|
for _, check := range defaults {
|
||||||
name := strings.TrimSpace(check.Name)
|
name := strings.TrimSpace(check.Name)
|
||||||
if name == "" {
|
if name == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if _, exists := byName[name]; exists {
|
defaultByName[name] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]ServiceChecklistCheck, 0, len(defaults)+len(existing))
|
||||||
|
out = append(out, defaults...)
|
||||||
|
for _, check := range existing {
|
||||||
|
name := strings.TrimSpace(check.Name)
|
||||||
|
if name == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := defaultByName[name]; exists {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
out = append(out, check)
|
out = append(out, check)
|
||||||
|
|||||||
33
internal/config/testing_hooks.go
Normal file
33
internal/config/testing_hooks.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
// TestHookDefaultServiceChecklist runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookDefaultServiceChecklist() []ServiceChecklistCheck.
|
||||||
|
// Why: exposes default service checklist catalog to top-level tests.
|
||||||
|
func TestHookDefaultServiceChecklist() []ServiceChecklistCheck {
|
||||||
|
out := make([]ServiceChecklistCheck, 0, len(defaultServiceChecklist()))
|
||||||
|
out = append(out, defaultServiceChecklist()...)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookDefaultCriticalServiceEndpoints runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookDefaultCriticalServiceEndpoints() []string.
|
||||||
|
// Why: exposes default critical endpoint catalog to top-level tests.
|
||||||
|
func TestHookDefaultCriticalServiceEndpoints() []string {
|
||||||
|
out := make([]string, 0, len(defaultCriticalServiceEndpoints()))
|
||||||
|
out = append(out, defaultCriticalServiceEndpoints()...)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookMergeServiceChecklistDefaults runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookMergeServiceChecklistDefaults(existing, defaults []ServiceChecklistCheck) []ServiceChecklistCheck.
|
||||||
|
// Why: exposes checklist merge helper to top-level tests.
|
||||||
|
func TestHookMergeServiceChecklistDefaults(existing, defaults []ServiceChecklistCheck) []ServiceChecklistCheck {
|
||||||
|
return mergeServiceChecklistDefaults(existing, defaults)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookMergeStringDefaults runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookMergeStringDefaults(existing, defaults []string) []string.
|
||||||
|
// Why: exposes string merge helper to top-level tests.
|
||||||
|
func TestHookMergeStringDefaults(existing, defaults []string) []string {
|
||||||
|
return mergeStringDefaults(existing, defaults)
|
||||||
|
}
|
||||||
@ -56,6 +56,7 @@ type Startup struct {
|
|||||||
ServiceChecklistWaitSeconds int `yaml:"service_checklist_wait_seconds"`
|
ServiceChecklistWaitSeconds int `yaml:"service_checklist_wait_seconds"`
|
||||||
ServiceChecklistPollSeconds int `yaml:"service_checklist_poll_seconds"`
|
ServiceChecklistPollSeconds int `yaml:"service_checklist_poll_seconds"`
|
||||||
ServiceChecklistStabilitySec int `yaml:"service_checklist_stability_seconds"`
|
ServiceChecklistStabilitySec int `yaml:"service_checklist_stability_seconds"`
|
||||||
|
ServiceChecklistAuth ServiceChecklistAuthSettings `yaml:"service_checklist_auth"`
|
||||||
ServiceChecklist []ServiceChecklistCheck `yaml:"service_checklist"`
|
ServiceChecklist []ServiceChecklistCheck `yaml:"service_checklist"`
|
||||||
RequireCriticalServiceEndpoints bool `yaml:"require_critical_service_endpoints"`
|
RequireCriticalServiceEndpoints bool `yaml:"require_critical_service_endpoints"`
|
||||||
CriticalServiceEndpointWaitSec int `yaml:"critical_service_endpoint_wait_seconds"`
|
CriticalServiceEndpointWaitSec int `yaml:"critical_service_endpoint_wait_seconds"`
|
||||||
@ -91,14 +92,29 @@ type ServiceChecklistCheck struct {
|
|||||||
Name string `yaml:"name"`
|
Name string `yaml:"name"`
|
||||||
URL string `yaml:"url"`
|
URL string `yaml:"url"`
|
||||||
AcceptedStatuses []int `yaml:"accepted_statuses"`
|
AcceptedStatuses []int `yaml:"accepted_statuses"`
|
||||||
|
RequireRobotAuth bool `yaml:"require_robot_auth"`
|
||||||
|
FollowRedirects bool `yaml:"follow_redirects"`
|
||||||
LocationContains string `yaml:"location_contains"`
|
LocationContains string `yaml:"location_contains"`
|
||||||
LocationNotContains string `yaml:"location_not_contains"`
|
LocationNotContains string `yaml:"location_not_contains"`
|
||||||
|
FinalURLContains string `yaml:"final_url_contains"`
|
||||||
|
FinalURLNotContains string `yaml:"final_url_not_contains"`
|
||||||
BodyContains string `yaml:"body_contains"`
|
BodyContains string `yaml:"body_contains"`
|
||||||
BodyNotContains string `yaml:"body_not_contains"`
|
BodyNotContains string `yaml:"body_not_contains"`
|
||||||
TimeoutSeconds int `yaml:"timeout_seconds"`
|
TimeoutSeconds int `yaml:"timeout_seconds"`
|
||||||
InsecureSkipTLS bool `yaml:"insecure_skip_tls"`
|
InsecureSkipTLS bool `yaml:"insecure_skip_tls"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ServiceChecklistAuthSettings struct {
|
||||||
|
Mode string `yaml:"mode"`
|
||||||
|
KeycloakBaseURL string `yaml:"keycloak_base_url"`
|
||||||
|
Realm string `yaml:"realm"`
|
||||||
|
RobotUsername string `yaml:"robot_username"`
|
||||||
|
AdminSecretNamespace string `yaml:"admin_secret_namespace"`
|
||||||
|
AdminSecretName string `yaml:"admin_secret_name"`
|
||||||
|
AdminSecretUsernameKey string `yaml:"admin_secret_username_key"`
|
||||||
|
AdminSecretPasswordKey string `yaml:"admin_secret_password_key"`
|
||||||
|
}
|
||||||
|
|
||||||
type Shutdown struct {
|
type Shutdown struct {
|
||||||
DefaultBudgetSeconds int `yaml:"default_budget_seconds"`
|
DefaultBudgetSeconds int `yaml:"default_budget_seconds"`
|
||||||
HistoryMinSamples int `yaml:"history_min_samples"`
|
HistoryMinSamples int `yaml:"history_min_samples"`
|
||||||
|
|||||||
@ -136,6 +136,35 @@ func (c Config) Validate() error {
|
|||||||
if c.Startup.RequireServiceChecklist && len(c.Startup.ServiceChecklist) == 0 {
|
if c.Startup.RequireServiceChecklist && len(c.Startup.ServiceChecklist) == 0 {
|
||||||
return fmt.Errorf("config.startup.service_checklist must not be empty when require_service_checklist is true")
|
return fmt.Errorf("config.startup.service_checklist must not be empty when require_service_checklist is true")
|
||||||
}
|
}
|
||||||
|
authMode := strings.TrimSpace(c.Startup.ServiceChecklistAuth.Mode)
|
||||||
|
if authMode != "none" && authMode != "keycloak_robotuser" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.mode must be none or keycloak_robotuser")
|
||||||
|
}
|
||||||
|
if authMode == "keycloak_robotuser" {
|
||||||
|
baseURL := strings.TrimSpace(c.Startup.ServiceChecklistAuth.KeycloakBaseURL)
|
||||||
|
parsed, err := neturl.Parse(baseURL)
|
||||||
|
if err != nil || parsed.Scheme == "" || parsed.Host == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.keycloak_base_url is invalid: %q", baseURL)
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Realm) == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.realm must not be empty")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.RobotUsername) == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.robot_username must not be empty")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretNamespace) == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_namespace must not be empty")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretName) == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_name must not be empty")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey) == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_username_key must not be empty")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey) == "" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_password_key must not be empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
for i, check := range c.Startup.ServiceChecklist {
|
for i, check := range c.Startup.ServiceChecklist {
|
||||||
if strings.TrimSpace(check.Name) == "" {
|
if strings.TrimSpace(check.Name) == "" {
|
||||||
return fmt.Errorf("config.startup.service_checklist[%d].name must not be empty", i)
|
return fmt.Errorf("config.startup.service_checklist[%d].name must not be empty", i)
|
||||||
@ -151,6 +180,13 @@ func (c Config) Validate() error {
|
|||||||
if check.TimeoutSeconds <= 0 {
|
if check.TimeoutSeconds <= 0 {
|
||||||
return fmt.Errorf("config.startup.service_checklist[%d].timeout_seconds must be > 0", i)
|
return fmt.Errorf("config.startup.service_checklist[%d].timeout_seconds must be > 0", i)
|
||||||
}
|
}
|
||||||
|
if check.RequireRobotAuth && authMode == "none" {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist[%d] requires robot auth but service_checklist_auth.mode is none", i)
|
||||||
|
}
|
||||||
|
if (strings.TrimSpace(check.FinalURLContains) != "" || strings.TrimSpace(check.FinalURLNotContains) != "") &&
|
||||||
|
!(check.FollowRedirects || check.RequireRobotAuth) {
|
||||||
|
return fmt.Errorf("config.startup.service_checklist[%d] uses final_url_* markers without redirects enabled", i)
|
||||||
|
}
|
||||||
for _, code := range check.AcceptedStatuses {
|
for _, code := range check.AcceptedStatuses {
|
||||||
if code < 100 || code > 599 {
|
if code < 100 || code > 599 {
|
||||||
return fmt.Errorf("config.startup.service_checklist[%d].accepted_statuses contains invalid HTTP code %d", i, code)
|
return fmt.Errorf("config.startup.service_checklist[%d].accepted_statuses contains invalid HTTP code %d", i, code)
|
||||||
|
|||||||
@ -15,6 +15,9 @@ type Runner struct {
|
|||||||
Logger *log.Logger
|
Logger *log.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run runs one orchestration or CLI step.
|
||||||
|
// Signature: (r *Runner) Run(ctx context.Context, name string, args ...string) (string, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (r *Runner) Run(ctx context.Context, name string, args ...string) (string, error) {
|
func (r *Runner) Run(ctx context.Context, name string, args ...string) (string, error) {
|
||||||
if r.DryRun {
|
if r.DryRun {
|
||||||
r.logf("DRY-RUN: %s %s", name, strings.Join(args, " "))
|
r.logf("DRY-RUN: %s %s", name, strings.Join(args, " "))
|
||||||
@ -37,11 +40,17 @@ func (r *Runner) Run(ctx context.Context, name string, args ...string) (string,
|
|||||||
return trimmed, nil
|
return trimmed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CommandExists runs one orchestration or CLI step.
|
||||||
|
// Signature: (r *Runner) CommandExists(name string) bool.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (r *Runner) CommandExists(name string) bool {
|
func (r *Runner) CommandExists(name string) bool {
|
||||||
_, err := exec.LookPath(name)
|
_, err := exec.LookPath(name)
|
||||||
return err == nil
|
return err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// logf runs one orchestration or CLI step.
|
||||||
|
// Signature: (r *Runner) logf(format string, args ...any).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (r *Runner) logf(format string, args ...any) {
|
func (r *Runner) logf(format string, args ...any) {
|
||||||
if r.Logger != nil {
|
if r.Logger != nil {
|
||||||
r.Logger.Printf(format, args...)
|
r.Logger.Printf(format, args...)
|
||||||
|
|||||||
53
internal/execx/runner_additional_test.go
Normal file
53
internal/execx/runner_additional_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package execx
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestRunnerRunFailureWithoutOutput runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerRunFailureWithoutOutput(t *testing.T).
|
||||||
|
// Why: covers error branch where command fails without producing output.
|
||||||
|
func TestRunnerRunFailureWithoutOutput(t *testing.T) {
|
||||||
|
r := &Runner{}
|
||||||
|
out, err := r.Run(context.Background(), "sh", "-c", "exit 3")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected failure")
|
||||||
|
}
|
||||||
|
if out != "" {
|
||||||
|
t.Fatalf("expected empty output, got %q", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunnerLogfNoLogger runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerLogfNoLogger(t *testing.T).
|
||||||
|
// Why: covers no-op logging path.
|
||||||
|
func TestRunnerLogfNoLogger(t *testing.T) {
|
||||||
|
r := &Runner{}
|
||||||
|
r.logf("hello %s", "world")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunnerCommandMissing runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerCommandMissing(t *testing.T).
|
||||||
|
// Why: covers false branch of command existence checks.
|
||||||
|
func TestRunnerCommandMissing(t *testing.T) {
|
||||||
|
r := &Runner{}
|
||||||
|
if r.CommandExists("definitely-not-a-real-command-ananke") {
|
||||||
|
t.Fatalf("expected missing command to be false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunnerInjectsKubeconfigEnv runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerInjectsKubeconfigEnv(t *testing.T).
|
||||||
|
// Why: covers kubeconfig environment injection branch in command runner.
|
||||||
|
func TestRunnerInjectsKubeconfigEnv(t *testing.T) {
|
||||||
|
r := &Runner{Kubeconfig: "/tmp/test-kubeconfig"}
|
||||||
|
out, err := r.Run(context.Background(), "sh", "-c", "printf %s \"$KUBECONFIG\"")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runner command failed: %v", err)
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(out) != "/tmp/test-kubeconfig" {
|
||||||
|
t.Fatalf("expected kubeconfig env to propagate, got %q", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
68
internal/execx/runner_test.go
Normal file
68
internal/execx/runner_test.go
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
package execx
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestRunnerDryRun runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerDryRun(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestRunnerDryRun(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
r := &Runner{
|
||||||
|
DryRun: true,
|
||||||
|
Logger: log.New(&buf, "", 0),
|
||||||
|
}
|
||||||
|
out, err := r.Run(context.Background(), "echo", "hello")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dry-run should not fail: %v", err)
|
||||||
|
}
|
||||||
|
if out != "" {
|
||||||
|
t.Fatalf("expected empty dry-run output, got %q", out)
|
||||||
|
}
|
||||||
|
if !strings.Contains(buf.String(), "DRY-RUN: echo hello") {
|
||||||
|
t.Fatalf("expected dry-run log entry, got %q", buf.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunnerRunSuccess runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerRunSuccess(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestRunnerRunSuccess(t *testing.T) {
|
||||||
|
r := &Runner{}
|
||||||
|
out, err := r.Run(context.Background(), "sh", "-c", "printf ok")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected command success: %v", err)
|
||||||
|
}
|
||||||
|
if out != "ok" {
|
||||||
|
t.Fatalf("expected output ok, got %q", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunnerRunFailureIncludesOutput runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerRunFailureIncludesOutput(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestRunnerRunFailureIncludesOutput(t *testing.T) {
|
||||||
|
r := &Runner{}
|
||||||
|
out, err := r.Run(context.Background(), "sh", "-c", "echo boom >&2; exit 1")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected command failure")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(out) != "boom" {
|
||||||
|
t.Fatalf("expected stderr to be preserved, got %q", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRunnerCommandExists runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRunnerCommandExists(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestRunnerCommandExists(t *testing.T) {
|
||||||
|
r := &Runner{}
|
||||||
|
if !r.CommandExists("sh") {
|
||||||
|
t.Fatalf("expected shell command to exist")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -3,6 +3,7 @@ package metrics
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@ -35,18 +36,27 @@ type Exporter struct {
|
|||||||
samples map[string]Sample
|
samples map[string]Sample
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New runs one orchestration or CLI step.
|
||||||
|
// Signature: New() *Exporter.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func New() *Exporter {
|
func New() *Exporter {
|
||||||
return &Exporter{
|
return &Exporter{
|
||||||
samples: make(map[string]Sample),
|
samples: make(map[string]Sample),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateBudget runs one orchestration or CLI step.
|
||||||
|
// Signature: (e *Exporter) UpdateBudget(seconds int).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (e *Exporter) UpdateBudget(seconds int) {
|
func (e *Exporter) UpdateBudget(seconds int) {
|
||||||
e.mu.Lock()
|
e.mu.Lock()
|
||||||
defer e.mu.Unlock()
|
defer e.mu.Unlock()
|
||||||
e.shutdownBudgetSec = seconds
|
e.shutdownBudgetSec = seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateSample runs one orchestration or CLI step.
|
||||||
|
// Signature: (e *Exporter) UpdateSample(s Sample).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (e *Exporter) UpdateSample(s Sample) {
|
func (e *Exporter) UpdateSample(s Sample) {
|
||||||
e.mu.Lock()
|
e.mu.Lock()
|
||||||
defer e.mu.Unlock()
|
defer e.mu.Unlock()
|
||||||
@ -56,6 +66,9 @@ func (e *Exporter) UpdateSample(s Sample) {
|
|||||||
e.samples[s.Name] = s
|
e.samples[s.Name] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MarkShutdown runs one orchestration or CLI step.
|
||||||
|
// Signature: (e *Exporter) MarkShutdown(reason string).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (e *Exporter) MarkShutdown(reason string) {
|
func (e *Exporter) MarkShutdown(reason string) {
|
||||||
e.mu.Lock()
|
e.mu.Lock()
|
||||||
defer e.mu.Unlock()
|
defer e.mu.Unlock()
|
||||||
@ -64,6 +77,9 @@ func (e *Exporter) MarkShutdown(reason string) {
|
|||||||
e.lastShutdownAt = time.Now().UTC()
|
e.lastShutdownAt = time.Now().UTC()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handler runs one orchestration or CLI step.
|
||||||
|
// Signature: (e *Exporter) Handler(path string) http.Handler.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (e *Exporter) Handler(path string) http.Handler {
|
func (e *Exporter) Handler(path string) http.Handler {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
metricsPath := path
|
metricsPath := path
|
||||||
@ -78,6 +94,9 @@ func (e *Exporter) Handler(path string) http.Handler {
|
|||||||
return mux
|
return mux
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// serveMetrics runs one orchestration or CLI step.
|
||||||
|
// Signature: (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request) {
|
func (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request) {
|
||||||
e.mu.RLock()
|
e.mu.RLock()
|
||||||
defer e.mu.RUnlock()
|
defer e.mu.RUnlock()
|
||||||
@ -145,10 +164,40 @@ func (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request) {
|
|||||||
}
|
}
|
||||||
b.WriteString(fmt.Sprintf("ananke_ups_error%s %d\n", labels, boolNum(s.LastError != "")))
|
b.WriteString(fmt.Sprintf("ananke_ups_error%s %d\n", labels, boolNum(s.LastError != "")))
|
||||||
}
|
}
|
||||||
|
appendQualityGateMetrics(&b)
|
||||||
|
|
||||||
_, _ = w.Write([]byte(b.String()))
|
_, _ = w.Write([]byte(b.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// appendQualityGateMetrics runs one orchestration or CLI step.
|
||||||
|
// Signature: appendQualityGateMetrics(dst *strings.Builder).
|
||||||
|
// Why: quality-gate pass/fail telemetry should appear alongside UPS metrics so
|
||||||
|
// Grafana can track Ananke suite health over time.
|
||||||
|
func appendQualityGateMetrics(dst *strings.Builder) {
|
||||||
|
path := strings.TrimSpace(os.Getenv("ANANKE_QUALITY_METRICS_FILE"))
|
||||||
|
if path == "" {
|
||||||
|
path = "/var/lib/ananke/quality-gate.prom"
|
||||||
|
}
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
text := strings.TrimSpace(string(raw))
|
||||||
|
if text == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if dst.Len() > 0 {
|
||||||
|
dst.WriteString("\n")
|
||||||
|
}
|
||||||
|
dst.WriteString(text)
|
||||||
|
if !strings.HasSuffix(text, "\n") {
|
||||||
|
dst.WriteString("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// boolNum runs one orchestration or CLI step.
|
||||||
|
// Signature: boolNum(v bool) int.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func boolNum(v bool) int {
|
func boolNum(v bool) int {
|
||||||
if v {
|
if v {
|
||||||
return 1
|
return 1
|
||||||
@ -156,6 +205,9 @@ func boolNum(v bool) int {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// safe runs one orchestration or CLI step.
|
||||||
|
// Signature: safe(in string) string.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func safe(in string) string {
|
func safe(in string) string {
|
||||||
out := strings.ReplaceAll(in, "\\", "\\\\")
|
out := strings.ReplaceAll(in, "\\", "\\\\")
|
||||||
return strings.ReplaceAll(out, "\"", "\\\"")
|
return strings.ReplaceAll(out, "\"", "\\\"")
|
||||||
|
|||||||
86
internal/metrics/exporter_additional_test.go
Normal file
86
internal/metrics/exporter_additional_test.go
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestExporterHealthzAndEscaping runs one orchestration or CLI step.
|
||||||
|
// Signature: TestExporterHealthzAndEscaping(t *testing.T).
|
||||||
|
// Why: covers health endpoint and label escaping branches in metrics renderer.
|
||||||
|
func TestExporterHealthzAndEscaping(t *testing.T) {
|
||||||
|
e := New()
|
||||||
|
e.UpdateSample(Sample{
|
||||||
|
Name: `Sta"tera`,
|
||||||
|
Target: `statera\host`,
|
||||||
|
Status: `O"B`,
|
||||||
|
LastError: "x",
|
||||||
|
})
|
||||||
|
|
||||||
|
h := e.Handler("/custom")
|
||||||
|
healthReq := httptest.NewRequest(http.MethodGet, "/healthz", nil)
|
||||||
|
healthRR := httptest.NewRecorder()
|
||||||
|
h.ServeHTTP(healthRR, healthReq)
|
||||||
|
if healthRR.Code != http.StatusOK || strings.TrimSpace(healthRR.Body.String()) != "ok" {
|
||||||
|
t.Fatalf("unexpected health response: code=%d body=%q", healthRR.Code, healthRR.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsReq := httptest.NewRequest(http.MethodGet, "/custom", nil)
|
||||||
|
metricsRR := httptest.NewRecorder()
|
||||||
|
h.ServeHTTP(metricsRR, metricsReq)
|
||||||
|
body := metricsRR.Body.String()
|
||||||
|
if !strings.Contains(body, `source="Sta\\\"tera"`) {
|
||||||
|
t.Fatalf("expected escaped source label, got:\n%s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `target="statera\\\\host"`) {
|
||||||
|
t.Fatalf("expected escaped target label, got:\n%s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, "ananke_ups_error") {
|
||||||
|
t.Fatalf("expected error metric line in output")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBoolNumAndSafeHelpers runs one orchestration or CLI step.
|
||||||
|
// Signature: TestBoolNumAndSafeHelpers(t *testing.T).
|
||||||
|
// Why: directly covers remaining helper branches.
|
||||||
|
func TestBoolNumAndSafeHelpers(t *testing.T) {
|
||||||
|
if boolNum(true) != 1 || boolNum(false) != 0 {
|
||||||
|
t.Fatalf("unexpected boolNum values")
|
||||||
|
}
|
||||||
|
if got := safe(`a"b\c`); got != `a\"b\\c` {
|
||||||
|
t.Fatalf("unexpected escaped string: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExporterAppendsQualityGateMetrics runs one orchestration or CLI step.
|
||||||
|
// Signature: TestExporterAppendsQualityGateMetrics(t *testing.T).
|
||||||
|
// Why: verifies quality-gate metrics are surfaced on /metrics for Grafana suite
|
||||||
|
// pass-rate tracking.
|
||||||
|
func TestExporterAppendsQualityGateMetrics(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
metricsPath := filepath.Join(tmp, "quality-gate.prom")
|
||||||
|
content := strings.Join([]string{
|
||||||
|
`# HELP ananke_quality_gate_runs_total Total quality gate runs by status.`,
|
||||||
|
`# TYPE ananke_quality_gate_runs_total counter`,
|
||||||
|
`ananke_quality_gate_runs_total{suite="ananke",status="ok"} 10`,
|
||||||
|
`ananke_quality_gate_runs_total{suite="ananke",status="failed"} 2`,
|
||||||
|
"",
|
||||||
|
}, "\n")
|
||||||
|
if err := os.WriteFile(metricsPath, []byte(content), 0o600); err != nil {
|
||||||
|
t.Fatalf("write quality metrics file: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("ANANKE_QUALITY_METRICS_FILE", metricsPath)
|
||||||
|
|
||||||
|
e := New()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
e.Handler("/metrics").ServeHTTP(rr, req)
|
||||||
|
body := rr.Body.String()
|
||||||
|
if !strings.Contains(body, `ananke_quality_gate_runs_total{suite="ananke",status="ok"} 10`) {
|
||||||
|
t.Fatalf("expected quality gate metrics appended to exporter output, got:\n%s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -7,6 +7,9 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestExporterEmitsCoreMetrics runs one orchestration or CLI step.
|
||||||
|
// Signature: TestExporterEmitsCoreMetrics(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestExporterEmitsCoreMetrics(t *testing.T) {
|
func TestExporterEmitsCoreMetrics(t *testing.T) {
|
||||||
e := New()
|
e := New()
|
||||||
e.UpdateBudget(321)
|
e.UpdateBudget(321)
|
||||||
|
|||||||
@ -34,6 +34,19 @@ type Daemon struct {
|
|||||||
exporter *metrics.Exporter
|
exporter *metrics.Exporter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var sshConfigCandidates = []string{
|
||||||
|
"/home/atlas/.ssh/config",
|
||||||
|
"/home/tethys/.ssh/config",
|
||||||
|
}
|
||||||
|
|
||||||
|
var sshIdentityCandidates = []string{
|
||||||
|
"/home/atlas/.ssh/id_ed25519",
|
||||||
|
"/home/tethys/.ssh/id_ed25519",
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDaemon runs one orchestration or CLI step.
|
||||||
|
// Signature: NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target, logger *log.Logger) *Daemon.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target, logger *log.Logger) *Daemon {
|
func NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target, logger *log.Logger) *Daemon {
|
||||||
return &Daemon{
|
return &Daemon{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
@ -44,6 +57,9 @@ func NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) Run(ctx context.Context) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) Run(ctx context.Context) error {
|
func (d *Daemon) Run(ctx context.Context) error {
|
||||||
if !d.cfg.UPS.Enabled {
|
if !d.cfg.UPS.Enabled {
|
||||||
return fmt.Errorf("ups monitoring is disabled in config")
|
return fmt.Errorf("ups monitoring is disabled in config")
|
||||||
@ -152,6 +168,9 @@ func (d *Daemon) Run(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// triggerShutdown runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) triggerShutdown(ctx context.Context, reason string) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) triggerShutdown(ctx context.Context, reason string) error {
|
func (d *Daemon) triggerShutdown(ctx context.Context, reason string) error {
|
||||||
intent, err := state.ReadIntent(d.cfg.State.IntentPath)
|
intent, err := state.ReadIntent(d.cfg.State.IntentPath)
|
||||||
if err == nil && intent.State == state.IntentShuttingDown {
|
if err == nil && intent.State == state.IntentShuttingDown {
|
||||||
@ -190,6 +209,9 @@ func (d *Daemon) triggerShutdown(ctx context.Context, reason string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// forwardShutdown runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) forwardShutdown(ctx context.Context, reason string) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
||||||
timeout := time.Duration(d.cfg.Coordination.CommandTimeoutSeconds) * time.Second
|
timeout := time.Duration(d.cfg.Coordination.CommandTimeoutSeconds) * time.Second
|
||||||
if timeout <= 0 {
|
if timeout <= 0 {
|
||||||
@ -280,15 +302,14 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resolveSSHConfigFile runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) resolveSSHConfigFile() string.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) resolveSSHConfigFile() string {
|
func (d *Daemon) resolveSSHConfigFile() string {
|
||||||
if strings.TrimSpace(d.cfg.SSHConfigFile) != "" {
|
if strings.TrimSpace(d.cfg.SSHConfigFile) != "" {
|
||||||
return strings.TrimSpace(d.cfg.SSHConfigFile)
|
return strings.TrimSpace(d.cfg.SSHConfigFile)
|
||||||
}
|
}
|
||||||
candidates := []string{
|
for _, p := range sshConfigCandidates {
|
||||||
"/home/atlas/.ssh/config",
|
|
||||||
"/home/tethys/.ssh/config",
|
|
||||||
}
|
|
||||||
for _, p := range candidates {
|
|
||||||
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
|
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
@ -296,15 +317,14 @@ func (d *Daemon) resolveSSHConfigFile() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resolveSSHIdentityFile runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) resolveSSHIdentityFile() string.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) resolveSSHIdentityFile() string {
|
func (d *Daemon) resolveSSHIdentityFile() string {
|
||||||
if strings.TrimSpace(d.cfg.SSHIdentityFile) != "" {
|
if strings.TrimSpace(d.cfg.SSHIdentityFile) != "" {
|
||||||
return strings.TrimSpace(d.cfg.SSHIdentityFile)
|
return strings.TrimSpace(d.cfg.SSHIdentityFile)
|
||||||
}
|
}
|
||||||
candidates := []string{
|
for _, p := range sshIdentityCandidates {
|
||||||
"/home/atlas/.ssh/id_ed25519",
|
|
||||||
"/home/tethys/.ssh/id_ed25519",
|
|
||||||
}
|
|
||||||
for _, p := range candidates {
|
|
||||||
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
|
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
@ -312,6 +332,9 @@ func (d *Daemon) resolveSSHIdentityFile() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// targetList runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) targetList() string.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) targetList() string {
|
func (d *Daemon) targetList() string {
|
||||||
names := make([]string, 0, len(d.targets))
|
names := make([]string, 0, len(d.targets))
|
||||||
for _, t := range d.targets {
|
for _, t := range d.targets {
|
||||||
@ -320,6 +343,9 @@ func (d *Daemon) targetList() string {
|
|||||||
return strings.Join(names, ",")
|
return strings.Join(names, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startMetricsServer runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) startMetricsServer() error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (d *Daemon) startMetricsServer() error {
|
func (d *Daemon) startMetricsServer() error {
|
||||||
if d.cfg.Metrics.BindAddr == "" {
|
if d.cfg.Metrics.BindAddr == "" {
|
||||||
return fmt.Errorf("metrics.bind_addr must not be empty when metrics are enabled")
|
return fmt.Errorf("metrics.bind_addr must not be empty when metrics are enabled")
|
||||||
|
|||||||
255
internal/service/daemon_additional_test.go
Normal file
255
internal/service/daemon_additional_test.go
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/ups"
|
||||||
|
)
|
||||||
|
|
||||||
|
type daemonFakeProvider struct {
|
||||||
|
samples []ups.Sample
|
||||||
|
errs []error
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read runs one orchestration or CLI step.
|
||||||
|
// Signature: (p *daemonFakeProvider) Read(ctx context.Context) (ups.Sample, error).
|
||||||
|
// Why: daemon tests need deterministic telemetry/error sequencing without real UPS I/O.
|
||||||
|
func (p *daemonFakeProvider) Read(_ context.Context) (ups.Sample, error) {
|
||||||
|
if p.idx < len(p.errs) && p.errs[p.idx] != nil {
|
||||||
|
err := p.errs[p.idx]
|
||||||
|
p.idx++
|
||||||
|
return ups.Sample{}, err
|
||||||
|
}
|
||||||
|
if p.idx < len(p.samples) {
|
||||||
|
s := p.samples[p.idx]
|
||||||
|
p.idx++
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
if len(p.samples) > 0 {
|
||||||
|
return p.samples[len(p.samples)-1], nil
|
||||||
|
}
|
||||||
|
return ups.Sample{}, context.DeadlineExceeded
|
||||||
|
}
|
||||||
|
|
||||||
|
// newDaemonTestOrchestrator runs one orchestration or CLI step.
|
||||||
|
// Signature: newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator.
|
||||||
|
// Why: daemon tests share a minimal dry-run orchestrator fixture to avoid duplication.
|
||||||
|
func newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator {
|
||||||
|
t.Helper()
|
||||||
|
cfg := config.Config{
|
||||||
|
ControlPlanes: []string{"titan-0a"},
|
||||||
|
Workers: []string{"titan-22"},
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
SSHManagedNodes: []string{"titan-0a", "titan-22"},
|
||||||
|
SSHNodeHosts: map[string]string{
|
||||||
|
"titan-0a": "192.168.22.11",
|
||||||
|
"titan-22": "192.168.22.22",
|
||||||
|
},
|
||||||
|
State: config.State{
|
||||||
|
Dir: stateDir,
|
||||||
|
ReportsDir: filepath.Join(stateDir, "reports"),
|
||||||
|
RunHistoryPath: filepath.Join(stateDir, "runs.json"),
|
||||||
|
LockPath: filepath.Join(stateDir, "ananke.lock"),
|
||||||
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||||
|
},
|
||||||
|
Shutdown: config.Shutdown{
|
||||||
|
EmergencySkipDrain: true,
|
||||||
|
EmergencySkipEtcd: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return cluster.New(
|
||||||
|
cfg,
|
||||||
|
&execx.Runner{DryRun: true, Logger: log.New(io.Discard, "", 0)},
|
||||||
|
state.New(filepath.Join(stateDir, "runs.json")),
|
||||||
|
log.New(io.Discard, "", 0),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDaemonRunTriggersShutdownOnLowBattery runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T).
|
||||||
|
// Why: covers main daemon loop path that triggers shutdown after debounce threshold.
|
||||||
|
func TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T) {
|
||||||
|
stateDir := t.TempDir()
|
||||||
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
UPS: config.UPS{
|
||||||
|
Enabled: true,
|
||||||
|
PollSeconds: 1,
|
||||||
|
DebounceCount: 1,
|
||||||
|
RuntimeSafetyFactor: 1.0,
|
||||||
|
},
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||||
|
},
|
||||||
|
Shutdown: config.Shutdown{
|
||||||
|
EmergencySkipDrain: true,
|
||||||
|
EmergencySkipEtcd: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
orch: orch,
|
||||||
|
targets: []Target{
|
||||||
|
{
|
||||||
|
Name: "Pyrphoros",
|
||||||
|
Target: "pyrphoros@localhost",
|
||||||
|
Provider: &daemonFakeProvider{
|
||||||
|
samples: []ups.Sample{{OnBattery: true, LowBattery: true, RuntimeSeconds: 30, RawStatus: "OB LB"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := d.Run(ctx); err != nil {
|
||||||
|
t.Fatalf("expected daemon to trigger and complete shutdown, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDaemonRunTriggersShutdownOnTelemetryTimeout runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T).
|
||||||
|
// Why: covers telemetry-timeout trigger path while UPS remains on-battery.
|
||||||
|
func TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T) {
|
||||||
|
stateDir := t.TempDir()
|
||||||
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
UPS: config.UPS{
|
||||||
|
Enabled: true,
|
||||||
|
PollSeconds: 1,
|
||||||
|
DebounceCount: 3,
|
||||||
|
RuntimeSafetyFactor: 1.0,
|
||||||
|
TelemetryTimeoutSeconds: 1,
|
||||||
|
},
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||||
|
},
|
||||||
|
Shutdown: config.Shutdown{
|
||||||
|
EmergencySkipDrain: true,
|
||||||
|
EmergencySkipEtcd: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
orch: orch,
|
||||||
|
targets: []Target{
|
||||||
|
{
|
||||||
|
Name: "Statera",
|
||||||
|
Target: "statera@localhost",
|
||||||
|
Provider: &daemonFakeProvider{
|
||||||
|
samples: []ups.Sample{{OnBattery: true, LowBattery: false, RuntimeSeconds: 9999, RawStatus: "OB"}},
|
||||||
|
errs: []error{nil, context.DeadlineExceeded, context.DeadlineExceeded, context.DeadlineExceeded},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := d.Run(ctx); err != nil {
|
||||||
|
t.Fatalf("expected telemetry-timeout shutdown path to complete, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestForwardShutdownSucceedsWithSSHShim runs one orchestration or CLI step.
|
||||||
|
// Signature: TestForwardShutdownSucceedsWithSSHShim(t *testing.T).
|
||||||
|
// Why: covers forward-shutdown SSH execution path.
|
||||||
|
func TestForwardShutdownSucceedsWithSSHShim(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
script := `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
echo forwarded
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
CommandTimeoutSeconds: 5,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
if err := d.forwardShutdown(context.Background(), "test-forward"); err != nil {
|
||||||
|
t.Fatalf("forwardShutdown failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestForwardShutdownFailsWhenSSHFailsAndNoRecovery runs one orchestration or CLI step.
|
||||||
|
// Signature: TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T).
|
||||||
|
// Why: covers forwarded shutdown error propagation branch.
|
||||||
|
func TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
script := `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
echo "permission denied" >&2
|
||||||
|
exit 255
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
CommandTimeoutSeconds: 5,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
err := d.forwardShutdown(context.Background(), "test-fail")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected forwardShutdown error")
|
||||||
|
}
|
||||||
|
if !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStartMetricsServerSuccess runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStartMetricsServerSuccess(t *testing.T).
|
||||||
|
// Why: covers successful metrics server startup branch.
|
||||||
|
func TestStartMetricsServerSuccess(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
Metrics: config.Metrics{
|
||||||
|
Enabled: true,
|
||||||
|
BindAddr: "127.0.0.1:0",
|
||||||
|
Path: "/metrics",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
if err := d.startMetricsServer(); err != nil {
|
||||||
|
t.Fatalf("startMetricsServer failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
421
internal/service/daemon_quality_branches_test.go
Normal file
421
internal/service/daemon_quality_branches_test.go
Normal file
@ -0,0 +1,421 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/ups"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestNewDaemonInitializesExporter runs one orchestration or CLI step.
|
||||||
|
// Signature: TestNewDaemonInitializesExporter(t *testing.T).
|
||||||
|
// Why: covers constructor branch so daemon initialization contracts stay explicit.
|
||||||
|
func TestNewDaemonInitializesExporter(t *testing.T) {
|
||||||
|
d := NewDaemon(config.Config{}, nil, nil, log.New(io.Discard, "", 0))
|
||||||
|
if d == nil || d.exporter == nil {
|
||||||
|
t.Fatalf("expected NewDaemon to initialize exporter")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTriggerShutdownForwardSuccessSetsForwardedIntent runs one orchestration or CLI step.
|
||||||
|
// Signature: TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T).
|
||||||
|
// Why: covers forwarded shutdown happy-path branch and completion intent semantics.
|
||||||
|
func TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho forwarded\n"), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
intentPath := filepath.Join(tmp, "intent.json")
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: intentPath,
|
||||||
|
},
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
CommandTimeoutSeconds: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
if err := d.triggerShutdown(context.Background(), "test-forward-success"); err != nil {
|
||||||
|
t.Fatalf("triggerShutdown forward success failed: %v", err)
|
||||||
|
}
|
||||||
|
in, err := state.ReadIntent(intentPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read forward completion intent: %v", err)
|
||||||
|
}
|
||||||
|
if in.State != state.IntentShutdownComplete || in.Source != "daemon-forwarded" {
|
||||||
|
t.Fatalf("unexpected forward completion intent: %+v", in)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTriggerShutdownForwardFailureWithoutFallback runs one orchestration or CLI step.
|
||||||
|
// Signature: TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T).
|
||||||
|
// Why: covers explicit failure branch when forwarding is required and local fallback is disabled.
|
||||||
|
func TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: filepath.Join(tmp, "intent.json"),
|
||||||
|
},
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
FallbackLocalShutdown: false,
|
||||||
|
CommandTimeoutSeconds: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
err := d.triggerShutdown(context.Background(), "test-forward-fail")
|
||||||
|
if err == nil || !strings.Contains(err.Error(), "forward shutdown failed") {
|
||||||
|
t.Fatalf("expected forward failure without fallback, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTriggerShutdownForwardFailureFallsBackToLocal runs one orchestration or CLI step.
|
||||||
|
// Signature: TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T).
|
||||||
|
// Why: covers fallback branch where local shutdown is used after forwarding fails.
|
||||||
|
func TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
orch := newDaemonTestOrchestrator(t, tmp)
|
||||||
|
intentPath := filepath.Join(tmp, "intent.json")
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: intentPath,
|
||||||
|
},
|
||||||
|
Shutdown: config.Shutdown{
|
||||||
|
EmergencySkipDrain: true,
|
||||||
|
EmergencySkipEtcd: true,
|
||||||
|
},
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
FallbackLocalShutdown: true,
|
||||||
|
CommandTimeoutSeconds: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
orch: orch,
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
if err := d.triggerShutdown(context.Background(), "test-forward-fallback"); err != nil {
|
||||||
|
t.Fatalf("triggerShutdown fallback local failed: %v", err)
|
||||||
|
}
|
||||||
|
in, err := state.ReadIntent(intentPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read local completion intent: %v", err)
|
||||||
|
}
|
||||||
|
if in.State != state.IntentShutdownComplete || in.Source != "daemon-local" {
|
||||||
|
t.Fatalf("unexpected local completion intent: %+v", in)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestForwardShutdownBuildsJumpArgs runs one orchestration or CLI step.
|
||||||
|
// Signature: TestForwardShutdownBuildsJumpArgs(t *testing.T).
|
||||||
|
// Why: covers jump-host argument construction branches in forward shutdown transport.
|
||||||
|
func TestForwardShutdownBuildsJumpArgs(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
argsOut := filepath.Join(tmp, "args.txt")
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
script := "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\n' \"$*\" > " + argsOut + "\n"
|
||||||
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
SSHConfigFile: "/tmp/custom-config",
|
||||||
|
SSHIdentityFile: "/tmp/custom-key",
|
||||||
|
SSHJumpHost: "titan-jh",
|
||||||
|
SSHJumpUser: "jump",
|
||||||
|
SSHNodeHosts: map[string]string{
|
||||||
|
"titan-db": "10.0.0.5",
|
||||||
|
},
|
||||||
|
SSHNodeUsers: map[string]string{
|
||||||
|
"titan-db": "dbadmin",
|
||||||
|
},
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
CommandTimeoutSeconds: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
if err := d.forwardShutdown(context.Background(), "args-check"); err != nil {
|
||||||
|
t.Fatalf("forwardShutdown with jump args failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := os.ReadFile(argsOut)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read ssh args output: %v", err)
|
||||||
|
}
|
||||||
|
out := string(raw)
|
||||||
|
for _, want := range []string{"-F /tmp/custom-config", "-i /tmp/custom-key", "-J jump@titan-jh:2277", "-p 2277", "dbadmin@10.0.0.5"} {
|
||||||
|
if !strings.Contains(out, want) {
|
||||||
|
t.Fatalf("expected ssh args to include %q, got %q", want, out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStartMetricsServerInvalidBindLogsErrorPath runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T).
|
||||||
|
// Why: exercises goroutine listen failure branch so metrics startup diagnostics remain covered.
|
||||||
|
func TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
Metrics: config.Metrics{
|
||||||
|
Enabled: true,
|
||||||
|
BindAddr: "127.0.0.1:not-a-port",
|
||||||
|
Path: "/metrics",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
if err := d.startMetricsServer(); err != nil {
|
||||||
|
t.Fatalf("startMetricsServer should return nil after goroutine spawn, got %v", err)
|
||||||
|
}
|
||||||
|
time.Sleep(25 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestResolveSSHPathCandidatesFromOverrides runs one orchestration or CLI step.
|
||||||
|
// Signature: TestResolveSSHPathCandidatesFromOverrides(t *testing.T).
|
||||||
|
// Why: covers candidate-path discovery branches without requiring writes under /home.
|
||||||
|
func TestResolveSSHPathCandidatesFromOverrides(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
cfgPath := filepath.Join(tmp, "config")
|
||||||
|
keyPath := filepath.Join(tmp, "id_ed25519")
|
||||||
|
if err := os.WriteFile(cfgPath, []byte("Host *\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write fake config candidate: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(keyPath, []byte("fake-key"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write fake key candidate: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
origConfigs := sshConfigCandidates
|
||||||
|
origKeys := sshIdentityCandidates
|
||||||
|
t.Cleanup(func() {
|
||||||
|
sshConfigCandidates = origConfigs
|
||||||
|
sshIdentityCandidates = origKeys
|
||||||
|
})
|
||||||
|
sshConfigCandidates = []string{cfgPath}
|
||||||
|
sshIdentityCandidates = []string{keyPath}
|
||||||
|
|
||||||
|
d := &Daemon{cfg: config.Config{}}
|
||||||
|
if got := d.resolveSSHConfigFile(); got != cfgPath {
|
||||||
|
t.Fatalf("expected config candidate path %q, got %q", cfgPath, got)
|
||||||
|
}
|
||||||
|
if got := d.resolveSSHIdentityFile(); got != keyPath {
|
||||||
|
t.Fatalf("expected key candidate path %q, got %q", keyPath, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestForwardShutdownKnownHostsRepairRetry runs one orchestration or CLI step.
|
||||||
|
// Signature: TestForwardShutdownKnownHostsRepairRetry(t *testing.T).
|
||||||
|
// Why: covers known-hosts-repair retry branch in forwarded shutdown transport.
|
||||||
|
func TestForwardShutdownKnownHostsRepairRetry(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
attemptMarker := filepath.Join(tmp, "attempt")
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
script := `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
marker="` + attemptMarker + `"
|
||||||
|
if [[ ! -f "$marker" ]]; then
|
||||||
|
echo "REMOTE HOST IDENTIFICATION HAS CHANGED!" >&2
|
||||||
|
touch "$marker"
|
||||||
|
exit 255
|
||||||
|
fi
|
||||||
|
echo "forwarded"
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
sshKeygenPath := filepath.Join(tmp, "ssh-keygen")
|
||||||
|
if err := os.WriteFile(sshKeygenPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 0\n"), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh-keygen: %v", err)
|
||||||
|
}
|
||||||
|
sshKeyscanPath := filepath.Join(tmp, "ssh-keyscan")
|
||||||
|
if err := os.WriteFile(sshKeyscanPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho fake-key\n"), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh-keyscan: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
knownHosts := filepath.Join(tmp, "known_hosts")
|
||||||
|
if err := os.WriteFile(knownHosts, []byte{}, 0o600); err != nil {
|
||||||
|
t.Fatalf("write known_hosts file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHConfigFile: knownHosts, // used only to derive known-hosts search path
|
||||||
|
SSHUser: "atlas",
|
||||||
|
SSHPort: 2277,
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
CommandTimeoutSeconds: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
if err := d.forwardShutdown(context.Background(), "repair-retry"); err != nil {
|
||||||
|
t.Fatalf("forwardShutdown known-hosts repair retry failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTriggerShutdownReturnsLocalShutdownError runs one orchestration or CLI step.
|
||||||
|
// Signature: TestTriggerShutdownReturnsLocalShutdownError(t *testing.T).
|
||||||
|
// Why: covers local shutdown error propagation branch from triggerShutdown.
|
||||||
|
func TestTriggerShutdownReturnsLocalShutdownError(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
intentPath := filepath.Join(tmp, "intent-dir")
|
||||||
|
if err := os.MkdirAll(intentPath, 0o755); err != nil {
|
||||||
|
t.Fatalf("mkdir intent dir: %v", err)
|
||||||
|
}
|
||||||
|
orchCfg := config.Config{
|
||||||
|
ControlPlanes: []string{"titan-db"},
|
||||||
|
Workers: []string{"titan-23"},
|
||||||
|
State: config.State{
|
||||||
|
Dir: filepath.Join(tmp, "state"),
|
||||||
|
ReportsDir: filepath.Join(tmp, "reports"),
|
||||||
|
RunHistoryPath: filepath.Join(tmp, "runs.json"),
|
||||||
|
LockPath: filepath.Join(tmp, "ananke.lock"),
|
||||||
|
IntentPath: intentPath, // directory path forces MustWriteIntent failure in Shutdown
|
||||||
|
},
|
||||||
|
}
|
||||||
|
orch := cluster.New(
|
||||||
|
orchCfg,
|
||||||
|
&execx.Runner{DryRun: false, Logger: log.New(io.Discard, "", 0)},
|
||||||
|
state.New(filepath.Join(tmp, "runs.json")),
|
||||||
|
log.New(io.Discard, "", 0),
|
||||||
|
)
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: intentPath,
|
||||||
|
},
|
||||||
|
Shutdown: config.Shutdown{
|
||||||
|
EmergencySkipDrain: true,
|
||||||
|
EmergencySkipEtcd: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
orch: orch,
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
err := d.triggerShutdown(context.Background(), "local-shutdown-error")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected triggerShutdown to propagate local shutdown error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDaemonRunContextCancelNonTriggerPath runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonRunContextCancelNonTriggerPath(t *testing.T).
|
||||||
|
// Why: covers steady-state non-trigger loop branches in Run until context cancellation.
|
||||||
|
func TestDaemonRunContextCancelNonTriggerPath(t *testing.T) {
|
||||||
|
stateDir := t.TempDir()
|
||||||
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
UPS: config.UPS{
|
||||||
|
Enabled: true,
|
||||||
|
PollSeconds: 0, // exercise default poll fallback
|
||||||
|
DebounceCount: 0, // exercise default debounce fallback
|
||||||
|
RuntimeSafetyFactor: 0.5,
|
||||||
|
},
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
orch: orch,
|
||||||
|
targets: []Target{
|
||||||
|
{
|
||||||
|
Name: "Pyrphoros",
|
||||||
|
Target: "pyrphoros@localhost",
|
||||||
|
Provider: &daemonFakeProvider{
|
||||||
|
samples: []ups.Sample{
|
||||||
|
{OnBattery: false, LowBattery: false, RuntimeSeconds: 7200, RawStatus: "OL"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: metrics.New(),
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 1100*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
if err := d.Run(ctx); err == nil {
|
||||||
|
t.Fatalf("expected context deadline/cancel in non-trigger loop")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestForwardShutdownErrorWithoutOutput runs one orchestration or CLI step.
|
||||||
|
// Signature: TestForwardShutdownErrorWithoutOutput(t *testing.T).
|
||||||
|
// Why: covers forwardShutdown branch where ssh fails without any stderr/stdout text.
|
||||||
|
func TestForwardShutdownErrorWithoutOutput(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
sshPath := filepath.Join(tmp, "ssh")
|
||||||
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n"), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake ssh: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHUser: "atlas",
|
||||||
|
Coordination: config.Coordination{
|
||||||
|
ForwardShutdownHost: "titan-db",
|
||||||
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||||
|
CommandTimeoutSeconds: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
err := d.forwardShutdown(context.Background(), "no-output-fail")
|
||||||
|
if err == nil || !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
|
||||||
|
t.Fatalf("expected no-output forward ssh failure, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,7 +1,133 @@
|
|||||||
package service
|
package service
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
func TestPlaceholder(t *testing.T) {
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
// Placeholder test keeps package-level test coverage active.
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestDaemonRunRejectsDisabledUPS runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonRunRejectsDisabledUPS(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestDaemonRunRejectsDisabledUPS(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
UPS: config.UPS{Enabled: false},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
if err := d.Run(context.Background()); err == nil {
|
||||||
|
t.Fatalf("expected UPS-disabled run to fail")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDaemonRunRejectsMissingTargets runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonRunRejectsMissingTargets(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestDaemonRunRejectsMissingTargets(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
UPS: config.UPS{Enabled: true},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
}
|
||||||
|
if err := d.Run(context.Background()); err == nil {
|
||||||
|
t.Fatalf("expected empty-target run to fail")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDaemonTargetList runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonTargetList(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestDaemonTargetList(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
targets: []Target{
|
||||||
|
{Name: "Pyrphoros", Target: "pyrphoros@localhost"},
|
||||||
|
{Name: "Statera", Target: "statera@localhost"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := d.targetList()
|
||||||
|
if !strings.Contains(got, "Pyrphoros=pyrphoros@localhost") || !strings.Contains(got, "Statera=statera@localhost") {
|
||||||
|
t.Fatalf("unexpected target list: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDaemonResolveSSHPathsPreferConfigured runs one orchestration or CLI step.
|
||||||
|
// Signature: TestDaemonResolveSSHPathsPreferConfigured(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestDaemonResolveSSHPathsPreferConfigured(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
SSHConfigFile: "/tmp/custom-ssh-config",
|
||||||
|
SSHIdentityFile: "/tmp/custom-ssh-key",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if got := d.resolveSSHConfigFile(); got != "/tmp/custom-ssh-config" {
|
||||||
|
t.Fatalf("unexpected config path: %q", got)
|
||||||
|
}
|
||||||
|
if got := d.resolveSSHIdentityFile(); got != "/tmp/custom-ssh-key" {
|
||||||
|
t.Fatalf("unexpected identity path: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStartMetricsServerRequiresBindAddress runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStartMetricsServerRequiresBindAddress(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestStartMetricsServerRequiresBindAddress(t *testing.T) {
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
Metrics: config.Metrics{
|
||||||
|
Enabled: true,
|
||||||
|
BindAddr: "",
|
||||||
|
Path: "/metrics",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: nil,
|
||||||
|
}
|
||||||
|
d.exporter = d.ensureExporterForTest()
|
||||||
|
if err := d.startMetricsServer(); err == nil {
|
||||||
|
t.Fatalf("expected missing bind address error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTriggerShutdownSkipsDuplicateWhenIntentActive runs one orchestration or CLI step.
|
||||||
|
// Signature: TestTriggerShutdownSkipsDuplicateWhenIntentActive(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
|
func TestTriggerShutdownSkipsDuplicateWhenIntentActive(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
intentPath := filepath.Join(tmp, "intent.json")
|
||||||
|
if err := state.MustWriteIntent(intentPath, state.IntentShuttingDown, "already-running", "test"); err != nil {
|
||||||
|
t.Fatalf("seed intent: %v", err)
|
||||||
|
}
|
||||||
|
d := &Daemon{
|
||||||
|
cfg: config.Config{
|
||||||
|
State: config.State{
|
||||||
|
IntentPath: intentPath,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: log.New(io.Discard, "", 0),
|
||||||
|
exporter: nil,
|
||||||
|
}
|
||||||
|
d.exporter = d.ensureExporterForTest()
|
||||||
|
if err := d.triggerShutdown(context.Background(), "duplicate-check"); err != nil {
|
||||||
|
t.Fatalf("expected duplicate shutdown trigger to be ignored: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensureExporterForTest runs one orchestration or CLI step.
|
||||||
|
// Signature: (d *Daemon) ensureExporterForTest() *metrics.Exporter.
|
||||||
|
// Why: local helper keeps setup concise while preserving explicit behavior in each test.
|
||||||
|
func (d *Daemon) ensureExporterForTest() *metrics.Exporter {
|
||||||
|
if d.exporter == nil {
|
||||||
|
d.exporter = metrics.New()
|
||||||
|
}
|
||||||
|
return d.exporter
|
||||||
}
|
}
|
||||||
|
|||||||
131
internal/sshutil/repair_test.go
Normal file
131
internal/sshutil/repair_test.go
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
package sshutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestShouldAttemptKnownHostsRepairFalseWithoutError runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShouldAttemptKnownHostsRepairFalseWithoutError(t *testing.T).
|
||||||
|
// Why: ensures repair logic does not trigger when command succeeded.
|
||||||
|
func TestShouldAttemptKnownHostsRepairFalseWithoutError(t *testing.T) {
|
||||||
|
if ShouldAttemptKnownHostsRepair("ok", nil) {
|
||||||
|
t.Fatalf("expected false when no error exists")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestIsHostKeyErrorRequiresErr runs one orchestration or CLI step.
|
||||||
|
// Signature: TestIsHostKeyErrorRequiresErr(t *testing.T).
|
||||||
|
// Why: covers guard branch that skips marker parsing when err is nil.
|
||||||
|
func TestIsHostKeyErrorRequiresErr(t *testing.T) {
|
||||||
|
if IsHostKeyError("REMOTE HOST IDENTIFICATION HAS CHANGED", nil) {
|
||||||
|
t.Fatalf("expected false when err is nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRepairKnownHostsRemovesEntries runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRepairKnownHostsRemovesEntries(t *testing.T).
|
||||||
|
// Why: validates known_hosts repair path actually removes target entries.
|
||||||
|
func TestRepairKnownHostsRemovesEntries(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
knownHosts := filepath.Join(tmp, "known_hosts")
|
||||||
|
content := strings.Join([]string{
|
||||||
|
"titan-0a ssh-ed25519 AAAATESTKEYONE",
|
||||||
|
"[titan-0a]:2277 ssh-ed25519 AAAATESTKEYTWO",
|
||||||
|
"titan-0b ssh-ed25519 AAAATESTKEYTHREE",
|
||||||
|
"",
|
||||||
|
}, "\n")
|
||||||
|
if err := os.WriteFile(knownHosts, []byte(content), 0o600); err != nil {
|
||||||
|
t.Fatalf("write known_hosts: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
RepairKnownHosts(context.Background(), log.New(io.Discard, "", 0), []string{knownHosts}, []string{"titan-0a", "titan-0a", ""}, 2277)
|
||||||
|
|
||||||
|
b, err := os.ReadFile(knownHosts)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read known_hosts: %v", err)
|
||||||
|
}
|
||||||
|
got := string(b)
|
||||||
|
if strings.Contains(got, "titan-0a") {
|
||||||
|
t.Fatalf("expected titan-0a entries removed, got:\n%s", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "titan-0b") {
|
||||||
|
t.Fatalf("expected unrelated host to remain, got:\n%s", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRepairKnownHostsNoSshKeygen runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRepairKnownHostsNoSshKeygen(t *testing.T).
|
||||||
|
// Why: covers early-return branch when ssh-keygen is unavailable.
|
||||||
|
func TestRepairKnownHostsNoSshKeygen(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
t.Setenv("PATH", tmp)
|
||||||
|
RepairKnownHosts(context.Background(), log.New(io.Discard, "", 0), []string{"/tmp/does-not-matter"}, []string{"titan-0a"}, 2277)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRestoreOwnershipNoopOnMissing runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRestoreOwnershipNoopOnMissing(t *testing.T).
|
||||||
|
// Why: covers missing-file branch in ownership restoration helper.
|
||||||
|
func TestRestoreOwnershipNoopOnMissing(t *testing.T) {
|
||||||
|
restoreOwnership(filepath.Join(t.TempDir(), "missing"), "", -1, -1, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCaptureOwnershipMissingFile runs one orchestration or CLI step.
|
||||||
|
// Signature: TestCaptureOwnershipMissingFile(t *testing.T).
|
||||||
|
// Why: covers missing-path branch in ownership capture helper.
|
||||||
|
func TestCaptureOwnershipMissingFile(t *testing.T) {
|
||||||
|
uid, gid, mode := captureOwnership(filepath.Join(t.TempDir(), "missing"))
|
||||||
|
if uid != -1 || gid != -1 || mode != 0 {
|
||||||
|
t.Fatalf("unexpected ownership for missing file uid=%d gid=%d mode=%v", uid, gid, mode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRemoveKnownHostEntryAbsentDoesNotFail runs one orchestration or CLI step.
|
||||||
|
// Signature: TestRemoveKnownHostEntryAbsentDoesNotFail(t *testing.T).
|
||||||
|
// Why: covers ssh-keygen "not found in" handling branch.
|
||||||
|
func TestRemoveKnownHostEntryAbsentDoesNotFail(t *testing.T) {
|
||||||
|
file := filepath.Join(t.TempDir(), "known_hosts")
|
||||||
|
if err := os.WriteFile(file, []byte("titan-0b ssh-ed25519 AAAA\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write known_hosts: %v", err)
|
||||||
|
}
|
||||||
|
removeKnownHostEntry(context.Background(), log.New(io.Discard, "", 0), file, "titan-0a")
|
||||||
|
b, err := os.ReadFile(file)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read known_hosts after remove: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(string(b), "titan-0b") {
|
||||||
|
t.Fatalf("expected file content to remain for unrelated hosts")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCaptureAndRestoreOwnershipRoundTrip runs one orchestration or CLI step.
|
||||||
|
// Signature: TestCaptureAndRestoreOwnershipRoundTrip(t *testing.T).
|
||||||
|
// Why: covers successful ownership/mode capture and restore path.
|
||||||
|
func TestCaptureAndRestoreOwnershipRoundTrip(t *testing.T) {
|
||||||
|
file := filepath.Join(t.TempDir(), "known_hosts")
|
||||||
|
if err := os.WriteFile(file, []byte("titan-0b ssh-ed25519 AAAA\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write file: %v", err)
|
||||||
|
}
|
||||||
|
uid, gid, mode := captureOwnership(file)
|
||||||
|
restoreOwnership(file, "", uid, gid, mode)
|
||||||
|
info, err := os.Stat(file)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("stat restored file: %v", err)
|
||||||
|
}
|
||||||
|
if info.Mode().Perm() != mode {
|
||||||
|
t.Fatalf("expected mode %v, got %v", mode, info.Mode().Perm())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestLogfNoLoggerDoesNotPanic runs one orchestration or CLI step.
|
||||||
|
// Signature: TestLogfNoLoggerDoesNotPanic(t *testing.T).
|
||||||
|
// Why: covers no-op logger branch.
|
||||||
|
func TestLogfNoLoggerDoesNotPanic(t *testing.T) {
|
||||||
|
logf(nil, "message %v", errors.New("x"))
|
||||||
|
}
|
||||||
@ -19,6 +19,9 @@ var hostKeyErrorMarkers = []string{
|
|||||||
"possible dns spoofing detected",
|
"possible dns spoofing detected",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsHostKeyError runs one orchestration or CLI step.
|
||||||
|
// Signature: IsHostKeyError(output string, err error) bool.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func IsHostKeyError(output string, err error) bool {
|
func IsHostKeyError(output string, err error) bool {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return false
|
return false
|
||||||
@ -35,6 +38,9 @@ func IsHostKeyError(output string, err error) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ShouldAttemptKnownHostsRepair runs one orchestration or CLI step.
|
||||||
|
// Signature: ShouldAttemptKnownHostsRepair(output string, err error) bool.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
||||||
if IsHostKeyError(output, err) {
|
if IsHostKeyError(output, err) {
|
||||||
return true
|
return true
|
||||||
@ -50,6 +56,9 @@ func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// KnownHostsFiles runs one orchestration or CLI step.
|
||||||
|
// Signature: KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
||||||
seen := map[string]struct{}{}
|
seen := map[string]struct{}{}
|
||||||
add := func(path string) {
|
add := func(path string) {
|
||||||
@ -86,6 +95,9 @@ func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RepairKnownHosts runs one orchestration or CLI step.
|
||||||
|
// Signature: RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int) {
|
func RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int) {
|
||||||
if _, err := exec.LookPath("ssh-keygen"); err != nil {
|
if _, err := exec.LookPath("ssh-keygen"); err != nil {
|
||||||
logf(logger, "warning: cannot repair known_hosts (ssh-keygen missing): %v", err)
|
logf(logger, "warning: cannot repair known_hosts (ssh-keygen missing): %v", err)
|
||||||
@ -134,6 +146,9 @@ func RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles [
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// removeKnownHostEntry runs one orchestration or CLI step.
|
||||||
|
// Signature: removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string) {
|
func removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string) {
|
||||||
uid, gid, mode := captureOwnership(file)
|
uid, gid, mode := captureOwnership(file)
|
||||||
|
|
||||||
@ -155,6 +170,9 @@ func removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string,
|
|||||||
logf(logger, "warning: known_hosts cleanup failed for %s in %s: %v: %s", entry, file, err, strings.TrimSpace(string(out)))
|
logf(logger, "warning: known_hosts cleanup failed for %s in %s: %v: %s", entry, file, err, strings.TrimSpace(string(out)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// captureOwnership runs one orchestration or CLI step.
|
||||||
|
// Signature: captureOwnership(path string) (int, int, os.FileMode).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func captureOwnership(path string) (int, int, os.FileMode) {
|
func captureOwnership(path string) (int, int, os.FileMode) {
|
||||||
info, err := os.Stat(path)
|
info, err := os.Stat(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -167,6 +185,9 @@ func captureOwnership(path string) (int, int, os.FileMode) {
|
|||||||
return int(st.Uid), int(st.Gid), info.Mode().Perm()
|
return int(st.Uid), int(st.Gid), info.Mode().Perm()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// restoreOwnership runs one orchestration or CLI step.
|
||||||
|
// Signature: restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode) {
|
func restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode) {
|
||||||
if uid < 0 || gid < 0 {
|
if uid < 0 || gid < 0 {
|
||||||
return
|
return
|
||||||
@ -185,6 +206,9 @@ func restoreOwnership(path string, backupPath string, uid int, gid int, mode os.
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// logf runs one orchestration or CLI step.
|
||||||
|
// Signature: logf(logger *log.Logger, format string, args ...any).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func logf(logger *log.Logger, format string, args ...any) {
|
func logf(logger *log.Logger, format string, args ...any) {
|
||||||
if logger != nil {
|
if logger != nil {
|
||||||
logger.Printf(format, args...)
|
logger.Printf(format, args...)
|
||||||
|
|||||||
@ -6,6 +6,9 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestIsHostKeyErrorDetectsMismatch runs one orchestration or CLI step.
|
||||||
|
// Signature: TestIsHostKeyErrorDetectsMismatch(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestIsHostKeyErrorDetectsMismatch(t *testing.T) {
|
func TestIsHostKeyErrorDetectsMismatch(t *testing.T) {
|
||||||
out := "WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!"
|
out := "WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!"
|
||||||
if !IsHostKeyError(out, errors.New("ssh failed")) {
|
if !IsHostKeyError(out, errors.New("ssh failed")) {
|
||||||
@ -13,6 +16,9 @@ func TestIsHostKeyErrorDetectsMismatch(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestIsHostKeyErrorIgnoresGenericFailures runs one orchestration or CLI step.
|
||||||
|
// Signature: TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
||||||
out := "connection timed out"
|
out := "connection timed out"
|
||||||
if IsHostKeyError(out, errors.New("ssh failed")) {
|
if IsHostKeyError(out, errors.New("ssh failed")) {
|
||||||
@ -20,12 +26,18 @@ func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestShouldAttemptKnownHostsRepairOnSilent255 runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T) {
|
func TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T) {
|
||||||
if !ShouldAttemptKnownHostsRepair("", errors.New("ssh ...: exit status 255")) {
|
if !ShouldAttemptKnownHostsRepair("", errors.New("ssh ...: exit status 255")) {
|
||||||
t.Fatalf("expected silent exit status 255 to trigger known_hosts repair")
|
t.Fatalf("expected silent exit status 255 to trigger known_hosts repair")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestKnownHostsFilesIncludesDerivedPaths runs one orchestration or CLI step.
|
||||||
|
// Signature: TestKnownHostsFilesIncludesDerivedPaths(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
|
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
|
||||||
configFile := "/home/atlas/.ssh/config"
|
configFile := "/home/atlas/.ssh/config"
|
||||||
identityFile := "/home/tethys/.ssh/id_ed25519"
|
identityFile := "/home/tethys/.ssh/id_ed25519"
|
||||||
|
|||||||
@ -7,6 +7,9 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// quarantineCorruptFile runs one orchestration or CLI step.
|
||||||
|
// Signature: quarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func quarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error {
|
func quarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error {
|
||||||
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
46
internal/state/heal_test.go
Normal file
46
internal/state/heal_test.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestQuarantineCorruptFileWritesBackupAndReplacement runs one orchestration or CLI step.
|
||||||
|
// Signature: TestQuarantineCorruptFileWritesBackupAndReplacement(t *testing.T).
|
||||||
|
// Why: covers successful corruption quarantine flow.
|
||||||
|
func TestQuarantineCorruptFileWritesBackupAndReplacement(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "intent.json")
|
||||||
|
if err := quarantineCorruptFile(path, []byte("{bad"), []byte("{}\n"), 0o640); err != nil {
|
||||||
|
t.Fatalf("quarantine failed: %v", err)
|
||||||
|
}
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read replacement: %v", err)
|
||||||
|
}
|
||||||
|
if string(b) != "{}\n" {
|
||||||
|
t.Fatalf("unexpected replacement payload: %q", string(b))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestQuarantineCorruptFileFailsOnEmptyPath runs one orchestration or CLI step.
|
||||||
|
// Signature: TestQuarantineCorruptFileFailsOnEmptyPath(t *testing.T).
|
||||||
|
// Why: covers mkdir failure branch for invalid destination path.
|
||||||
|
func TestQuarantineCorruptFileFailsOnEmptyPath(t *testing.T) {
|
||||||
|
if err := quarantineCorruptFile("", []byte("x"), []byte("y"), 0o640); err == nil {
|
||||||
|
t.Fatalf("expected failure for empty path")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestQuarantineCorruptFileFailsWhenReplacementIsDirectory runs one orchestration or CLI step.
|
||||||
|
// Signature: TestQuarantineCorruptFileFailsWhenReplacementIsDirectory(t *testing.T).
|
||||||
|
// Why: covers replacement-write error branch after backup succeeds.
|
||||||
|
func TestQuarantineCorruptFileFailsWhenReplacementIsDirectory(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "intent-dir")
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
t.Fatalf("mkdir replacement dir: %v", err)
|
||||||
|
}
|
||||||
|
if err := quarantineCorruptFile(path, []byte("{bad"), []byte("{}\n"), 0o640); err == nil {
|
||||||
|
t.Fatalf("expected write replacement failure when path is a directory")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -22,6 +22,9 @@ type Intent struct {
|
|||||||
UpdatedAt time.Time `json:"updated_at"`
|
UpdatedAt time.Time `json:"updated_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadIntent runs one orchestration or CLI step.
|
||||||
|
// Signature: ReadIntent(path string) (Intent, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func ReadIntent(path string) (Intent, error) {
|
func ReadIntent(path string) (Intent, error) {
|
||||||
b, err := os.ReadFile(path)
|
b, err := os.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -43,6 +46,9 @@ func ReadIntent(path string) (Intent, error) {
|
|||||||
return in, nil
|
return in, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WriteIntent runs one orchestration or CLI step.
|
||||||
|
// Signature: WriteIntent(path string, in Intent) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func WriteIntent(path string, in Intent) error {
|
func WriteIntent(path string, in Intent) error {
|
||||||
if in.UpdatedAt.IsZero() {
|
if in.UpdatedAt.IsZero() {
|
||||||
in.UpdatedAt = time.Now().UTC()
|
in.UpdatedAt = time.Now().UTC()
|
||||||
@ -50,13 +56,13 @@ func WriteIntent(path string, in Intent) error {
|
|||||||
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
b, err := json.MarshalIndent(in, "", " ")
|
b, _ := json.MarshalIndent(in, "", " ")
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return os.WriteFile(path, b, 0o640)
|
return os.WriteFile(path, b, 0o640)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MustWriteIntent runs one orchestration or CLI step.
|
||||||
|
// Signature: MustWriteIntent(path string, state string, reason string, source string) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func MustWriteIntent(path string, state string, reason string, source string) error {
|
func MustWriteIntent(path string, state string, reason string, source string) error {
|
||||||
switch state {
|
switch state {
|
||||||
case IntentNormal, IntentStartupInProgress, IntentShuttingDown, IntentShutdownComplete:
|
case IntentNormal, IntentStartupInProgress, IntentShuttingDown, IntentShutdownComplete:
|
||||||
|
|||||||
135
internal/state/intent_additional_test.go
Normal file
135
internal/state/intent_additional_test.go
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestReadIntentHandlesMissingAndEmpty runs one orchestration or CLI step.
|
||||||
|
// Signature: TestReadIntentHandlesMissingAndEmpty(t *testing.T).
|
||||||
|
// Why: covers nil-state branches for missing and empty intent files.
|
||||||
|
func TestReadIntentHandlesMissingAndEmpty(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "intent.json")
|
||||||
|
in, err := ReadIntent(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read missing intent: %v", err)
|
||||||
|
}
|
||||||
|
if in.State != "" {
|
||||||
|
t.Fatalf("expected empty state for missing file, got %q", in.State)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(path, nil, 0o640); err != nil {
|
||||||
|
t.Fatalf("write empty intent file: %v", err)
|
||||||
|
}
|
||||||
|
in, err = ReadIntent(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read empty intent file: %v", err)
|
||||||
|
}
|
||||||
|
if in.State != "" {
|
||||||
|
t.Fatalf("expected empty state for empty file, got %q", in.State)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestWriteIntentSetsUpdatedAtWhenZero runs one orchestration or CLI step.
|
||||||
|
// Signature: TestWriteIntentSetsUpdatedAtWhenZero(t *testing.T).
|
||||||
|
// Why: verifies write helper auto-populates timestamp for callers.
|
||||||
|
func TestWriteIntentSetsUpdatedAtWhenZero(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "intent.json")
|
||||||
|
if err := WriteIntent(path, Intent{State: IntentNormal, Reason: "unit", Source: "test"}); err != nil {
|
||||||
|
t.Fatalf("write intent: %v", err)
|
||||||
|
}
|
||||||
|
in, err := ReadIntent(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read intent: %v", err)
|
||||||
|
}
|
||||||
|
if in.UpdatedAt.IsZero() {
|
||||||
|
t.Fatalf("expected non-zero updated_at")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIntentOutputErrorsOnBadUpdatedAt runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseIntentOutputErrorsOnBadUpdatedAt(t *testing.T).
|
||||||
|
// Why: covers parser error branch for malformed timestamp values.
|
||||||
|
func TestParseIntentOutputErrorsOnBadUpdatedAt(t *testing.T) {
|
||||||
|
raw := `intent=normal reason="x" source=y updated_at=not-a-time`
|
||||||
|
if _, err := ParseIntentOutput(raw); err == nil {
|
||||||
|
t.Fatalf("expected updated_at parse error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIntentOutputErrorsWhenMissingToken runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseIntentOutputErrorsWhenMissingToken(t *testing.T).
|
||||||
|
// Why: covers parser terminal error when intent token is absent.
|
||||||
|
func TestParseIntentOutputErrorsWhenMissingToken(t *testing.T) {
|
||||||
|
if _, err := ParseIntentOutput("no intent line here"); err == nil {
|
||||||
|
t.Fatalf("expected parse failure without intent token")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIntentOutputWithoutReasonOrSource runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseIntentOutputWithoutReasonOrSource(t *testing.T).
|
||||||
|
// Why: covers parser branch where optional fields are omitted.
|
||||||
|
func TestParseIntentOutputWithoutReasonOrSource(t *testing.T) {
|
||||||
|
in, err := ParseIntentOutput("intent=shutdown_complete")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse intent output: %v", err)
|
||||||
|
}
|
||||||
|
if in.State != IntentShutdownComplete {
|
||||||
|
t.Fatalf("expected shutdown_complete, got %q", in.State)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMustWriteIntentPersistsProvidedTimestampType runs one orchestration or CLI step.
|
||||||
|
// Signature: TestMustWriteIntentPersistsProvidedTimestampType(t *testing.T).
|
||||||
|
// Why: sanity check that written timestamps round-trip RFC3339 parsing.
|
||||||
|
func TestMustWriteIntentPersistsProvidedTimestampType(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "intent.json")
|
||||||
|
if err := MustWriteIntent(path, IntentNormal, "ok", "test"); err != nil {
|
||||||
|
t.Fatalf("must write intent: %v", err)
|
||||||
|
}
|
||||||
|
in, err := ReadIntent(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read intent: %v", err)
|
||||||
|
}
|
||||||
|
if time.Since(in.UpdatedAt) > time.Minute {
|
||||||
|
t.Fatalf("expected recent timestamp, got %s", in.UpdatedAt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestWriteIntentFailsWhenParentIsFile runs one orchestration or CLI step.
|
||||||
|
// Signature: TestWriteIntentFailsWhenParentIsFile(t *testing.T).
|
||||||
|
// Why: covers mkdir failure branch when parent path is not a directory.
|
||||||
|
func TestWriteIntentFailsWhenParentIsFile(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
parent := filepath.Join(tmp, "not-a-dir")
|
||||||
|
if err := os.WriteFile(parent, []byte("x"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write parent file: %v", err)
|
||||||
|
}
|
||||||
|
err := WriteIntent(filepath.Join(parent, "intent.json"), Intent{State: IntentNormal})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected write failure for non-directory parent")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestReadIntentFailsOnPermissionError runs one orchestration or CLI step.
|
||||||
|
// Signature: TestReadIntentFailsOnPermissionError(t *testing.T).
|
||||||
|
// Why: covers read error branch distinct from not-exist and empty-file handling.
|
||||||
|
func TestReadIntentFailsOnPermissionError(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "intent.json")
|
||||||
|
if err := os.WriteFile(path, []byte(`{"state":"normal"}`), 0o640); err != nil {
|
||||||
|
t.Fatalf("write intent file: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.Chmod(path, 0o000); err != nil {
|
||||||
|
t.Fatalf("chmod intent file: %v", err)
|
||||||
|
}
|
||||||
|
defer os.Chmod(path, 0o640)
|
||||||
|
_, err := ReadIntent(path)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected permission error")
|
||||||
|
}
|
||||||
|
if strings.Contains(strings.ToLower(err.Error()), "not exist") {
|
||||||
|
t.Fatalf("expected permission-related error, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -7,6 +7,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// ParseIntentOutput parses `ananke intent` CLI output from local/remote commands.
|
// ParseIntentOutput parses `ananke intent` CLI output from local/remote commands.
|
||||||
|
// Signature: ParseIntentOutput(raw string) (Intent, error)
|
||||||
|
// Why: Startup/shutdown coordination depends on intent state being interpreted
|
||||||
|
// consistently from command output so remote peers and local orchestration can
|
||||||
|
// share one durable control-plane signal.
|
||||||
func ParseIntentOutput(raw string) (Intent, error) {
|
func ParseIntentOutput(raw string) (Intent, error) {
|
||||||
for _, line := range strings.Split(raw, "\n") {
|
for _, line := range strings.Split(raw, "\n") {
|
||||||
line = strings.TrimSpace(line)
|
line = strings.TrimSpace(line)
|
||||||
@ -19,9 +23,6 @@ func ParseIntentOutput(raw string) (Intent, error) {
|
|||||||
}
|
}
|
||||||
payload := strings.TrimSpace(line[idx:])
|
payload := strings.TrimSpace(line[idx:])
|
||||||
fields := strings.Fields(payload)
|
fields := strings.Fields(payload)
|
||||||
if len(fields) == 0 || !strings.HasPrefix(fields[0], "intent=") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
stateValue := strings.TrimSpace(strings.TrimPrefix(fields[0], "intent="))
|
stateValue := strings.TrimSpace(strings.TrimPrefix(fields[0], "intent="))
|
||||||
if stateValue == "" || stateValue == "none" {
|
if stateValue == "" || stateValue == "none" {
|
||||||
return Intent{}, nil
|
return Intent{}, nil
|
||||||
@ -29,12 +30,10 @@ func ParseIntentOutput(raw string) (Intent, error) {
|
|||||||
in := Intent{State: stateValue}
|
in := Intent{State: stateValue}
|
||||||
if strings.Contains(payload, `reason="`) {
|
if strings.Contains(payload, `reason="`) {
|
||||||
parts := strings.SplitN(payload, `reason="`, 2)
|
parts := strings.SplitN(payload, `reason="`, 2)
|
||||||
if len(parts) == 2 {
|
|
||||||
if end := strings.Index(parts[1], `"`); end >= 0 {
|
if end := strings.Index(parts[1], `"`); end >= 0 {
|
||||||
in.Reason = parts[1][:end]
|
in.Reason = parts[1][:end]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
for _, field := range fields[1:] {
|
for _, field := range fields[1:] {
|
||||||
if strings.HasPrefix(field, "source=") {
|
if strings.HasPrefix(field, "source=") {
|
||||||
in.Source = strings.TrimSpace(strings.TrimPrefix(field, "source="))
|
in.Source = strings.TrimSpace(strings.TrimPrefix(field, "source="))
|
||||||
|
|||||||
@ -6,6 +6,9 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestWriteReadIntentRoundTrip runs one orchestration or CLI step.
|
||||||
|
// Signature: TestWriteReadIntentRoundTrip(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestWriteReadIntentRoundTrip(t *testing.T) {
|
func TestWriteReadIntentRoundTrip(t *testing.T) {
|
||||||
p := filepath.Join(t.TempDir(), "intent.json")
|
p := filepath.Join(t.TempDir(), "intent.json")
|
||||||
if err := MustWriteIntent(p, IntentShuttingDown, "ups-threshold", "daemon"); err != nil {
|
if err := MustWriteIntent(p, IntentShuttingDown, "ups-threshold", "daemon"); err != nil {
|
||||||
@ -23,6 +26,9 @@ func TestWriteReadIntentRoundTrip(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestMustWriteIntentRejectsUnknownState runs one orchestration or CLI step.
|
||||||
|
// Signature: TestMustWriteIntentRejectsUnknownState(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestMustWriteIntentRejectsUnknownState(t *testing.T) {
|
func TestMustWriteIntentRejectsUnknownState(t *testing.T) {
|
||||||
p := filepath.Join(t.TempDir(), "intent.json")
|
p := filepath.Join(t.TempDir(), "intent.json")
|
||||||
if err := MustWriteIntent(p, "weird", "x", "y"); err == nil {
|
if err := MustWriteIntent(p, "weird", "x", "y"); err == nil {
|
||||||
@ -30,6 +36,9 @@ func TestMustWriteIntentRejectsUnknownState(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestReadIntentAutoHealsCorruptJSON runs one orchestration or CLI step.
|
||||||
|
// Signature: TestReadIntentAutoHealsCorruptJSON(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestReadIntentAutoHealsCorruptJSON(t *testing.T) {
|
func TestReadIntentAutoHealsCorruptJSON(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
p := filepath.Join(dir, "intent.json")
|
p := filepath.Join(dir, "intent.json")
|
||||||
@ -60,6 +69,9 @@ func TestReadIntentAutoHealsCorruptJSON(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestParseIntentOutputParsesStructuredLine runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseIntentOutputParsesStructuredLine(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestParseIntentOutputParsesStructuredLine(t *testing.T) {
|
func TestParseIntentOutputParsesStructuredLine(t *testing.T) {
|
||||||
raw := `[ananke] 2026/04/05 11:24:49 intent=normal reason="guard-test-clear-2" source=drill updated_at=2026-04-05T16:24:33Z`
|
raw := `[ananke] 2026/04/05 11:24:49 intent=normal reason="guard-test-clear-2" source=drill updated_at=2026-04-05T16:24:33Z`
|
||||||
in, err := ParseIntentOutput(raw)
|
in, err := ParseIntentOutput(raw)
|
||||||
@ -80,6 +92,9 @@ func TestParseIntentOutputParsesStructuredLine(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestParseIntentOutputHandlesNone runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseIntentOutputHandlesNone(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestParseIntentOutputHandlesNone(t *testing.T) {
|
func TestParseIntentOutputHandlesNone(t *testing.T) {
|
||||||
in, err := ParseIntentOutput(`[ananke] 2026/04/05 11:24:49 intent=none`)
|
in, err := ParseIntentOutput(`[ananke] 2026/04/05 11:24:49 intent=none`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -32,10 +32,16 @@ type Store struct {
|
|||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New runs one orchestration or CLI step.
|
||||||
|
// Signature: New(path string) *Store.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func New(path string) *Store {
|
func New(path string) *Store {
|
||||||
return &Store{path: path}
|
return &Store{path: path}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EnsureDir runs one orchestration or CLI step.
|
||||||
|
// Signature: EnsureDir(dir string) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func EnsureDir(dir string) error {
|
func EnsureDir(dir string) error {
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
return fmt.Errorf("state dir must not be empty")
|
return fmt.Errorf("state dir must not be empty")
|
||||||
@ -43,6 +49,9 @@ func EnsureDir(dir string) error {
|
|||||||
return os.MkdirAll(dir, 0o750)
|
return os.MkdirAll(dir, 0o750)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AcquireLock runs one orchestration or CLI step.
|
||||||
|
// Signature: AcquireLock(path string) (func(), error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func AcquireLock(path string) (func(), error) {
|
func AcquireLock(path string) (func(), error) {
|
||||||
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -85,6 +94,9 @@ func AcquireLock(path string) (func(), error) {
|
|||||||
return unlock, nil
|
return unlock, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// staleLock runs one orchestration or CLI step.
|
||||||
|
// Signature: staleLock(path string) (bool, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func staleLock(path string) (bool, error) {
|
func staleLock(path string) (bool, error) {
|
||||||
b, err := os.ReadFile(path)
|
b, err := os.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -99,6 +111,9 @@ func staleLock(path string) (bool, error) {
|
|||||||
line = strings.TrimSpace(line)
|
line = strings.TrimSpace(line)
|
||||||
if strings.HasPrefix(line, "pid=") {
|
if strings.HasPrefix(line, "pid=") {
|
||||||
v := strings.TrimPrefix(line, "pid=")
|
v := strings.TrimPrefix(line, "pid=")
|
||||||
|
if fields := strings.Fields(v); len(fields) > 0 {
|
||||||
|
v = fields[0]
|
||||||
|
}
|
||||||
parsed, parseErr := strconv.Atoi(v)
|
parsed, parseErr := strconv.Atoi(v)
|
||||||
if parseErr != nil {
|
if parseErr != nil {
|
||||||
return true, nil
|
return true, nil
|
||||||
@ -118,6 +133,9 @@ func staleLock(path string) (bool, error) {
|
|||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Append runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) Append(record RunRecord) error.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) Append(record RunRecord) error {
|
func (s *Store) Append(record RunRecord) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
@ -133,19 +151,22 @@ func (s *Store) Append(record RunRecord) error {
|
|||||||
if err := os.MkdirAll(filepath.Dir(s.path), 0o750); err != nil {
|
if err := os.MkdirAll(filepath.Dir(s.path), 0o750); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
b, err := json.MarshalIndent(records, "", " ")
|
b, _ := json.MarshalIndent(records, "", " ")
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return os.WriteFile(s.path, b, 0o640)
|
return os.WriteFile(s.path, b, 0o640)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Load runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) Load() ([]RunRecord, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) Load() ([]RunRecord, error) {
|
func (s *Store) Load() ([]RunRecord, error) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
return s.loadUnlocked()
|
return s.loadUnlocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadUnlocked runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) loadUnlocked() ([]RunRecord, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) loadUnlocked() ([]RunRecord, error) {
|
func (s *Store) loadUnlocked() ([]RunRecord, error) {
|
||||||
b, err := os.ReadFile(s.path)
|
b, err := os.ReadFile(s.path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -167,18 +188,30 @@ func (s *Store) loadUnlocked() ([]RunRecord, error) {
|
|||||||
return records, nil
|
return records, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ShutdownP95 runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) ShutdownP95(defaultSeconds int) int.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) ShutdownP95(defaultSeconds int) int {
|
func (s *Store) ShutdownP95(defaultSeconds int) int {
|
||||||
return s.shutdownP95(defaultSeconds, 1, nil)
|
return s.shutdownP95(defaultSeconds, 1, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ShutdownP95WithMinSamples runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) ShutdownP95WithMinSamples(defaultSeconds int, minSamples int) int.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) ShutdownP95WithMinSamples(defaultSeconds int, minSamples int) int {
|
func (s *Store) ShutdownP95WithMinSamples(defaultSeconds int, minSamples int) int {
|
||||||
return s.shutdownP95(defaultSeconds, minSamples, nil)
|
return s.shutdownP95(defaultSeconds, minSamples, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ShutdownP95ByReasonPrefix runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) ShutdownP95ByReasonPrefix(defaultSeconds int, minSamples int, reasonPrefixes []string) int.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) ShutdownP95ByReasonPrefix(defaultSeconds int, minSamples int, reasonPrefixes []string) int {
|
func (s *Store) ShutdownP95ByReasonPrefix(defaultSeconds int, minSamples int, reasonPrefixes []string) int {
|
||||||
return s.shutdownP95(defaultSeconds, minSamples, reasonPrefixes)
|
return s.shutdownP95(defaultSeconds, minSamples, reasonPrefixes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shutdownP95 runs one orchestration or CLI step.
|
||||||
|
// Signature: (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes []string) int.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes []string) int {
|
func (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes []string) int {
|
||||||
if minSamples <= 0 {
|
if minSamples <= 0 {
|
||||||
minSamples = 1
|
minSamples = 1
|
||||||
@ -217,14 +250,5 @@ func (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes [
|
|||||||
}
|
}
|
||||||
sort.Ints(d)
|
sort.Ints(d)
|
||||||
idx := int(math.Ceil(0.95*float64(len(d)))) - 1
|
idx := int(math.Ceil(0.95*float64(len(d)))) - 1
|
||||||
if idx < 0 {
|
|
||||||
idx = 0
|
|
||||||
}
|
|
||||||
if idx >= len(d) {
|
|
||||||
idx = len(d) - 1
|
|
||||||
}
|
|
||||||
if d[idx] <= 0 {
|
|
||||||
return defaultSeconds
|
|
||||||
}
|
|
||||||
return d[idx]
|
return d[idx]
|
||||||
}
|
}
|
||||||
|
|||||||
156
internal/state/store_additional_test.go
Normal file
156
internal/state/store_additional_test.go
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestEnsureDirRejectsEmpty runs one orchestration or CLI step.
|
||||||
|
// Signature: TestEnsureDirRejectsEmpty(t *testing.T).
|
||||||
|
// Why: covers explicit guard branch for empty state directory inputs.
|
||||||
|
func TestEnsureDirRejectsEmpty(t *testing.T) {
|
||||||
|
if err := EnsureDir(""); err == nil {
|
||||||
|
t.Fatalf("expected empty directory error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStoreAppendTrimToMaxRecords runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStoreAppendTrimToMaxRecords(t *testing.T).
|
||||||
|
// Why: covers retention branch that trims run history to the 200-record cap.
|
||||||
|
func TestStoreAppendTrimToMaxRecords(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "runs.json")
|
||||||
|
s := New(path)
|
||||||
|
now := time.Now().UTC()
|
||||||
|
for i := 0; i < 205; i++ {
|
||||||
|
if err := s.Append(RunRecord{
|
||||||
|
ID: "r-" + strconv.Itoa(i),
|
||||||
|
Action: "shutdown",
|
||||||
|
StartedAt: now,
|
||||||
|
EndedAt: now,
|
||||||
|
DurationSeconds: i + 1,
|
||||||
|
Success: true,
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("append %d failed: %v", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
recs, err := s.Load()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(recs) != 200 {
|
||||||
|
t.Fatalf("expected trim to 200 records, got %d", len(recs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStoreLoadHandlesEmptyFile runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStoreLoadHandlesEmptyFile(t *testing.T).
|
||||||
|
// Why: covers load branch for empty existing run-history file.
|
||||||
|
func TestStoreLoadHandlesEmptyFile(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "runs.json")
|
||||||
|
if err := os.WriteFile(path, nil, 0o640); err != nil {
|
||||||
|
t.Fatalf("write empty file: %v", err)
|
||||||
|
}
|
||||||
|
recs, err := New(path).Load()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load empty file: %v", err)
|
||||||
|
}
|
||||||
|
if len(recs) != 0 {
|
||||||
|
t.Fatalf("expected no records, got %d", len(recs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStoreLoadReturnsErrorOnUnhealableDecode runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStoreLoadReturnsErrorOnUnhealableDecode(t *testing.T).
|
||||||
|
// Why: covers decode failure path where replacement write itself can fail.
|
||||||
|
func TestStoreLoadReturnsErrorOnUnhealableDecode(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "runs.json")
|
||||||
|
if err := os.WriteFile(path, []byte("{bad-json"), 0o640); err != nil {
|
||||||
|
t.Fatalf("write invalid file: %v", err)
|
||||||
|
}
|
||||||
|
// Make directory readonly so quarantine replacement cannot be written.
|
||||||
|
if err := os.Chmod(dir, 0o500); err != nil {
|
||||||
|
t.Fatalf("chmod dir readonly: %v", err)
|
||||||
|
}
|
||||||
|
defer os.Chmod(dir, 0o700)
|
||||||
|
if _, err := New(path).Load(); err == nil {
|
||||||
|
t.Fatalf("expected load failure when auto-heal cannot write replacement")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestShutdownP95FallsBackOnLoadError runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShutdownP95FallsBackOnLoadError(t *testing.T).
|
||||||
|
// Why: covers load-error fallback branch in percentile helper.
|
||||||
|
func TestShutdownP95FallsBackOnLoadError(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "runs.json")
|
||||||
|
if err := os.WriteFile(path, []byte("{bad"), 0o640); err != nil {
|
||||||
|
t.Fatalf("write invalid file: %v", err)
|
||||||
|
}
|
||||||
|
// Use impossible perms to force read failure.
|
||||||
|
if err := os.Chmod(path, 0o000); err != nil {
|
||||||
|
t.Fatalf("chmod file: %v", err)
|
||||||
|
}
|
||||||
|
defer os.Chmod(path, 0o640)
|
||||||
|
if got := New(path).ShutdownP95(321); got != 321 {
|
||||||
|
t.Fatalf("expected fallback default 321, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestShutdownP95ReturnsDefaultOnNonPositiveQuantile runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShutdownP95ReturnsDefaultOnNonPositiveQuantile(t *testing.T).
|
||||||
|
// Why: covers branch where computed percentile record is non-positive.
|
||||||
|
func TestShutdownP95ReturnsDefaultOnNonPositiveQuantile(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "runs.json")
|
||||||
|
now := time.Now().UTC()
|
||||||
|
records := []RunRecord{
|
||||||
|
{Action: "shutdown", StartedAt: now, EndedAt: now, DurationSeconds: 0, Success: true},
|
||||||
|
{Action: "shutdown", StartedAt: now, EndedAt: now, DurationSeconds: -1, Success: true},
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(records)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal records: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(path, b, 0o640); err != nil {
|
||||||
|
t.Fatalf("write records: %v", err)
|
||||||
|
}
|
||||||
|
if got := New(path).ShutdownP95WithMinSamples(777, 1); got != 777 {
|
||||||
|
t.Fatalf("expected default 777, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStaleLockHelpers runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStaleLockHelpers(t *testing.T).
|
||||||
|
// Why: covers stale-lock parser branches directly for reliability.
|
||||||
|
func TestStaleLockHelpers(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
missing := filepath.Join(tmp, "missing.lock")
|
||||||
|
stale, err := staleLock(missing)
|
||||||
|
if err != nil || !stale {
|
||||||
|
t.Fatalf("expected missing lock to be stale=true err=nil, got stale=%v err=%v", stale, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
invalidPID := filepath.Join(tmp, "invalid.lock")
|
||||||
|
if err := os.WriteFile(invalidPID, []byte("pid=notanumber\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write invalid pid lock: %v", err)
|
||||||
|
}
|
||||||
|
stale, err = staleLock(invalidPID)
|
||||||
|
if err != nil || !stale {
|
||||||
|
t.Fatalf("expected invalid pid lock to be stale=true err=nil, got stale=%v err=%v", stale, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
active := filepath.Join(tmp, "active.lock")
|
||||||
|
if err := os.WriteFile(active, []byte("pid="+strconv.Itoa(os.Getpid())+"\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write active lock: %v", err)
|
||||||
|
}
|
||||||
|
stale, err = staleLock(active)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("active staleLock error: %v", err)
|
||||||
|
}
|
||||||
|
if stale {
|
||||||
|
t.Fatalf("expected active lock to report stale=false")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -10,6 +10,9 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestAcquireLockLifecycle runs one orchestration or CLI step.
|
||||||
|
// Signature: TestAcquireLockLifecycle(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestAcquireLockLifecycle(t *testing.T) {
|
func TestAcquireLockLifecycle(t *testing.T) {
|
||||||
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
||||||
unlock, err := AcquireLock(lockPath)
|
unlock, err := AcquireLock(lockPath)
|
||||||
@ -25,6 +28,9 @@ func TestAcquireLockLifecycle(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestAcquireLockReclaimsStaleLock runs one orchestration or CLI step.
|
||||||
|
// Signature: TestAcquireLockReclaimsStaleLock(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestAcquireLockReclaimsStaleLock(t *testing.T) {
|
func TestAcquireLockReclaimsStaleLock(t *testing.T) {
|
||||||
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
||||||
if err := os.WriteFile(lockPath, []byte("pid=999999\n"), 0o600); err != nil {
|
if err := os.WriteFile(lockPath, []byte("pid=999999\n"), 0o600); err != nil {
|
||||||
@ -46,6 +52,9 @@ func TestAcquireLockReclaimsStaleLock(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestAcquireLockRejectsActiveLock runs one orchestration or CLI step.
|
||||||
|
// Signature: TestAcquireLockRejectsActiveLock(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestAcquireLockRejectsActiveLock(t *testing.T) {
|
func TestAcquireLockRejectsActiveLock(t *testing.T) {
|
||||||
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
||||||
active := "pid=" + strconv.Itoa(os.Getpid()) + "\n"
|
active := "pid=" + strconv.Itoa(os.Getpid()) + "\n"
|
||||||
@ -58,6 +67,9 @@ func TestAcquireLockRejectsActiveLock(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestStoreLoadAutoHealsCorruptJSON runs one orchestration or CLI step.
|
||||||
|
// Signature: TestStoreLoadAutoHealsCorruptJSON(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestStoreLoadAutoHealsCorruptJSON(t *testing.T) {
|
func TestStoreLoadAutoHealsCorruptJSON(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
p := filepath.Join(dir, "runs.json")
|
p := filepath.Join(dir, "runs.json")
|
||||||
@ -88,6 +100,9 @@ func TestStoreLoadAutoHealsCorruptJSON(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T) {
|
func TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T) {
|
||||||
p := filepath.Join(t.TempDir(), "runs.json")
|
p := filepath.Join(t.TempDir(), "runs.json")
|
||||||
records := []RunRecord{
|
records := []RunRecord{
|
||||||
@ -115,6 +130,9 @@ func TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestShutdownP95ByReasonPrefixFiltersSamples runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T) {
|
func TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T) {
|
||||||
p := filepath.Join(t.TempDir(), "runs.json")
|
p := filepath.Join(t.TempDir(), "runs.json")
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
@ -161,6 +179,9 @@ func TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestShutdownP95IgnoresDryRunSamples runs one orchestration or CLI step.
|
||||||
|
// Signature: TestShutdownP95IgnoresDryRunSamples(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestShutdownP95IgnoresDryRunSamples(t *testing.T) {
|
func TestShutdownP95IgnoresDryRunSamples(t *testing.T) {
|
||||||
p := filepath.Join(t.TempDir(), "runs.json")
|
p := filepath.Join(t.TempDir(), "runs.json")
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
|
|||||||
10
internal/state/testhooks.go
Normal file
10
internal/state/testhooks.go
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import "os"
|
||||||
|
|
||||||
|
// TestHookQuarantineCorruptFile runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookQuarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error.
|
||||||
|
// Why: exposes corrupt-file healing internals to the top-level testing module without package-local tests.
|
||||||
|
func TestHookQuarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error {
|
||||||
|
return quarantineCorruptFile(path, payload, replacement, mode)
|
||||||
|
}
|
||||||
@ -28,10 +28,16 @@ type NUTProvider struct {
|
|||||||
Target string
|
Target string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewNUTProvider runs one orchestration or CLI step.
|
||||||
|
// Signature: NewNUTProvider(target string) *NUTProvider.
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func NewNUTProvider(target string) *NUTProvider {
|
func NewNUTProvider(target string) *NUTProvider {
|
||||||
return &NUTProvider{Target: target}
|
return &NUTProvider{Target: target}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read runs one orchestration or CLI step.
|
||||||
|
// Signature: (p *NUTProvider) Read(ctx context.Context) (Sample, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func (p *NUTProvider) Read(ctx context.Context) (Sample, error) {
|
func (p *NUTProvider) Read(ctx context.Context) (Sample, error) {
|
||||||
if p.Target == "" {
|
if p.Target == "" {
|
||||||
return Sample{}, fmt.Errorf("NUT target must not be empty")
|
return Sample{}, fmt.Errorf("NUT target must not be empty")
|
||||||
@ -44,6 +50,9 @@ func (p *NUTProvider) Read(ctx context.Context) (Sample, error) {
|
|||||||
return parseNUT(string(out))
|
return parseNUT(string(out))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseNUT runs one orchestration or CLI step.
|
||||||
|
// Signature: parseNUT(raw string) (Sample, error).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func parseNUT(raw string) (Sample, error) {
|
func parseNUT(raw string) (Sample, error) {
|
||||||
kv := map[string]string{}
|
kv := map[string]string{}
|
||||||
s := bufio.NewScanner(strings.NewReader(raw))
|
s := bufio.NewScanner(strings.NewReader(raw))
|
||||||
@ -106,6 +115,9 @@ func parseNUT(raw string) (Sample, error) {
|
|||||||
|
|
||||||
var parseNumberCleaner = regexp.MustCompile(`[^0-9.+-]`)
|
var parseNumberCleaner = regexp.MustCompile(`[^0-9.+-]`)
|
||||||
|
|
||||||
|
// parseNumber runs one orchestration or CLI step.
|
||||||
|
// Signature: parseNumber(raw string) (float64, bool).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func parseNumber(raw string) (float64, bool) {
|
func parseNumber(raw string) (float64, bool) {
|
||||||
cleaned := strings.TrimSpace(parseNumberCleaner.ReplaceAllString(raw, ""))
|
cleaned := strings.TrimSpace(parseNumberCleaner.ReplaceAllString(raw, ""))
|
||||||
if cleaned == "" {
|
if cleaned == "" {
|
||||||
|
|||||||
108
internal/ups/nut_additional_test.go
Normal file
108
internal/ups/nut_additional_test.go
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
package ups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestParseNUTRejectsMissingStatus runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseNUTRejectsMissingStatus(t *testing.T).
|
||||||
|
// Why: covers parser error path when mandatory status line is absent.
|
||||||
|
func TestParseNUTRejectsMissingStatus(t *testing.T) {
|
||||||
|
if _, err := parseNUT("battery.charge: 88"); err == nil {
|
||||||
|
t.Fatalf("expected missing status error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseNUTParsesOptionalNumbers runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseNUTParsesOptionalNumbers(t *testing.T).
|
||||||
|
// Why: covers numeric extraction branches for charge/load/nominal fields.
|
||||||
|
func TestParseNUTParsesOptionalNumbers(t *testing.T) {
|
||||||
|
raw := strings.Join([]string{
|
||||||
|
"ups.status: OB LB",
|
||||||
|
"battery.runtime: 1024",
|
||||||
|
"battery.charge: 71.5 Percent",
|
||||||
|
"ups.load: 12.0 Percent",
|
||||||
|
"ups.realpower.nominal: 900 W",
|
||||||
|
"",
|
||||||
|
}, "\n")
|
||||||
|
s, err := parseNUT(raw)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseNUT failed: %v", err)
|
||||||
|
}
|
||||||
|
if !s.OnBattery || !s.LowBattery || s.RuntimeSeconds != 1024 {
|
||||||
|
t.Fatalf("unexpected status parse: %+v", s)
|
||||||
|
}
|
||||||
|
if s.BatteryCharge != 71.5 || s.LoadPercent != 12 || s.NominalPowerW != 900 {
|
||||||
|
t.Fatalf("unexpected numeric parse: %+v", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNUTProviderReadViaPathShim runs one orchestration or CLI step.
|
||||||
|
// Signature: TestNUTProviderReadViaPathShim(t *testing.T).
|
||||||
|
// Why: covers provider command execution success path deterministically.
|
||||||
|
func TestNUTProviderReadViaPathShim(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
upscPath := filepath.Join(tmp, "upsc")
|
||||||
|
script := `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
echo "ups.status: OL"
|
||||||
|
echo "battery.runtime: 500"
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(upscPath, []byte(script), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake upsc: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
|
||||||
|
sample, err := NewNUTProvider("statera@localhost").Read(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("provider read failed: %v", err)
|
||||||
|
}
|
||||||
|
if sample.OnBattery {
|
||||||
|
t.Fatalf("expected OL to report not-on-battery")
|
||||||
|
}
|
||||||
|
if sample.RuntimeSeconds != 500 {
|
||||||
|
t.Fatalf("expected runtime 500, got %d", sample.RuntimeSeconds)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNUTProviderReadRejectsEmptyTarget runs one orchestration or CLI step.
|
||||||
|
// Signature: TestNUTProviderReadRejectsEmptyTarget(t *testing.T).
|
||||||
|
// Why: covers provider guard for empty NUT target values.
|
||||||
|
func TestNUTProviderReadRejectsEmptyTarget(t *testing.T) {
|
||||||
|
if _, err := NewNUTProvider("").Read(context.Background()); err == nil {
|
||||||
|
t.Fatalf("expected empty-target read error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseNumberRejectsInvalid runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseNumberRejectsInvalid(t *testing.T).
|
||||||
|
// Why: covers parseNumber false-return branch for invalid input.
|
||||||
|
func TestParseNumberRejectsInvalid(t *testing.T) {
|
||||||
|
if _, ok := parseNumber("not-a-number"); ok {
|
||||||
|
t.Fatalf("expected parseNumber to reject invalid input")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNUTProviderReadCommandFailure runs one orchestration or CLI step.
|
||||||
|
// Signature: TestNUTProviderReadCommandFailure(t *testing.T).
|
||||||
|
// Why: covers provider error propagation when upsc exits non-zero.
|
||||||
|
func TestNUTProviderReadCommandFailure(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
upscPath := filepath.Join(tmp, "upsc")
|
||||||
|
script := `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
echo "upsc failed" >&2
|
||||||
|
exit 2
|
||||||
|
`
|
||||||
|
if err := os.WriteFile(upscPath, []byte(script), 0o755); err != nil {
|
||||||
|
t.Fatalf("write fake upsc: %v", err)
|
||||||
|
}
|
||||||
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||||
|
if _, err := NewNUTProvider("pyrphoros@localhost").Read(context.Background()); err == nil {
|
||||||
|
t.Fatalf("expected provider read error on upsc failure")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -2,6 +2,9 @@ package ups
|
|||||||
|
|
||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
|
// TestParseNUT runs one orchestration or CLI step.
|
||||||
|
// Signature: TestParseNUT(t *testing.T).
|
||||||
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||||
func TestParseNUT(t *testing.T) {
|
func TestParseNUT(t *testing.T) {
|
||||||
raw := `battery.runtime: 384
|
raw := `battery.runtime: 384
|
||||||
battery.charge: 72
|
battery.charge: 72
|
||||||
|
|||||||
@ -9,7 +9,7 @@ ANANKE_COORDINATOR_RELAY="${ANANKE_COORDINATOR_RELAY:-}"
|
|||||||
LOG_DIR="${ANANKE_DRILL_LOG_DIR:-/tmp/ananke-drills}"
|
LOG_DIR="${ANANKE_DRILL_LOG_DIR:-/tmp/ananke-drills}"
|
||||||
STARTUP_TIMEOUT_SECONDS="${ANANKE_DRILL_STARTUP_TIMEOUT_SECONDS:-1800}"
|
STARTUP_TIMEOUT_SECONDS="${ANANKE_DRILL_STARTUP_TIMEOUT_SECONDS:-1800}"
|
||||||
SHUTDOWN_TIMEOUT_SECONDS="${ANANKE_DRILL_SHUTDOWN_TIMEOUT_SECONDS:-1800}"
|
SHUTDOWN_TIMEOUT_SECONDS="${ANANKE_DRILL_SHUTDOWN_TIMEOUT_SECONDS:-1800}"
|
||||||
SHUTDOWN_CONFIG="${ANANKE_DRILL_SHUTDOWN_CONFIG:-/tmp/ananke-drill-no-poweroff.yaml}"
|
SHUTDOWN_CONFIG="${ANANKE_DRILL_SHUTDOWN_CONFIG:-/tmp/ananke-drill-cluster-only.yaml}"
|
||||||
STARTUP_RETRY_DELAY_SECONDS="${ANANKE_DRILL_STARTUP_RETRY_DELAY_SECONDS:-10}"
|
STARTUP_RETRY_DELAY_SECONDS="${ANANKE_DRILL_STARTUP_RETRY_DELAY_SECONDS:-10}"
|
||||||
STARTUP_RETRY_MAX="${ANANKE_DRILL_STARTUP_RETRY_MAX:-12}"
|
STARTUP_RETRY_MAX="${ANANKE_DRILL_STARTUP_RETRY_MAX:-12}"
|
||||||
EXECUTE=0
|
EXECUTE=0
|
||||||
@ -25,7 +25,7 @@ Drills:
|
|||||||
foundation-recovery Simulate vault/postgres/gitea outage and require layered restore.
|
foundation-recovery Simulate vault/postgres/gitea outage and require layered restore.
|
||||||
reconciliation-resume Simulate global Flux suspend + source-controller down and require resume.
|
reconciliation-resume Simulate global Flux suspend + source-controller down and require resume.
|
||||||
startup-intent-guard Assert startup is blocked when shutdown intent is active.
|
startup-intent-guard Assert startup is blocked when shutdown intent is active.
|
||||||
controlled-cycle Run full shutdown->startup recovery cycle (uses no-poweroff config).
|
controlled-cycle Run full shutdown->startup recovery cycle (uses cluster-only shutdown config).
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
- Drills are intentionally disruptive and are not part of regular `make test`.
|
- Drills are intentionally disruptive and are not part of regular `make test`.
|
||||||
@ -405,7 +405,7 @@ run_drill_controlled_cycle() {
|
|||||||
run_coordinator_bash "[ -s '${SHUTDOWN_CONFIG}' ]" || die "shutdown drill config missing on coordinator: ${SHUTDOWN_CONFIG}"
|
run_coordinator_bash "[ -s '${SHUTDOWN_CONFIG}' ]" || die "shutdown drill config missing on coordinator: ${SHUTDOWN_CONFIG}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "running controlled shutdown cycle (poweroff disabled config)"
|
log "running controlled shutdown cycle (cluster-only shutdown config)"
|
||||||
run_ananke_shutdown "drill-controlled-cycle-shutdown"
|
run_ananke_shutdown "drill-controlled-cycle-shutdown"
|
||||||
|
|
||||||
log "running startup recovery cycle"
|
log "running startup recovery cycle"
|
||||||
|
|||||||
@ -9,6 +9,7 @@ fi
|
|||||||
REPO_URL="${ANANKE_REPO_URL:-ssh://git@scm.bstein.dev:2242/bstein/ananke.git}"
|
REPO_URL="${ANANKE_REPO_URL:-ssh://git@scm.bstein.dev:2242/bstein/ananke.git}"
|
||||||
BRANCH="${ANANKE_REPO_BRANCH:-main}"
|
BRANCH="${ANANKE_REPO_BRANCH:-main}"
|
||||||
REPO_DIR="${ANANKE_REPO_DIR:-/opt/ananke}"
|
REPO_DIR="${ANANKE_REPO_DIR:-/opt/ananke}"
|
||||||
|
HOST_SHORT="$(hostname -s 2>/dev/null || hostname)"
|
||||||
|
|
||||||
mkdir -p "$(dirname "${REPO_DIR}")"
|
mkdir -p "$(dirname "${REPO_DIR}")"
|
||||||
if [[ ! -d "${REPO_DIR}/.git" ]]; then
|
if [[ ! -d "${REPO_DIR}/.git" ]]; then
|
||||||
@ -23,4 +24,16 @@ git checkout "${BRANCH}"
|
|||||||
git reset --hard "origin/${BRANCH}"
|
git reset --hard "origin/${BRANCH}"
|
||||||
|
|
||||||
echo "[self-update] running installer"
|
echo "[self-update] running installer"
|
||||||
|
# Keep host configs aligned with tracked templates so startup/shutdown drills
|
||||||
|
# always use the latest checklist and safety logic.
|
||||||
|
if [[ -z "${ANANKE_FORCE_CONFIG_TEMPLATE:-}" ]]; then
|
||||||
|
case "${HOST_SHORT}" in
|
||||||
|
titan-db)
|
||||||
|
export ANANKE_FORCE_CONFIG_TEMPLATE="coordinator"
|
||||||
|
;;
|
||||||
|
titan-24)
|
||||||
|
export ANANKE_FORCE_CONFIG_TEMPLATE="peer"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
"${REPO_DIR}/scripts/install.sh"
|
"${REPO_DIR}/scripts/install.sh"
|
||||||
|
|||||||
@ -22,6 +22,7 @@ NUT_PRODUCT_ID="${ANANKE_NUT_PRODUCT_ID:-0601}"
|
|||||||
NUT_MONITOR_USER="${ANANKE_NUT_MONITOR_USER:-monuser}"
|
NUT_MONITOR_USER="${ANANKE_NUT_MONITOR_USER:-monuser}"
|
||||||
NUT_MONITOR_PASSWORD="${ANANKE_NUT_MONITOR_PASSWORD:-anankeupsmon}"
|
NUT_MONITOR_PASSWORD="${ANANKE_NUT_MONITOR_PASSWORD:-anankeupsmon}"
|
||||||
FORCE_CONFIG_TEMPLATE="${ANANKE_FORCE_CONFIG_TEMPLATE:-}"
|
FORCE_CONFIG_TEMPLATE="${ANANKE_FORCE_CONFIG_TEMPLATE:-}"
|
||||||
|
ENFORCE_QUALITY_GATE="${ANANKE_ENFORCE_QUALITY_GATE:-1}"
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@ -228,6 +229,28 @@ migrate_ananke_config() {
|
|||||||
echo "[install] added coordination.startup_guard_max_age_seconds=900"
|
echo "[install] added coordination.startup_guard_max_age_seconds=900"
|
||||||
changed=1
|
changed=1
|
||||||
fi
|
fi
|
||||||
|
if grep -Eq '^[[:space:]]*poweroff_enabled:[[:space:]]*(true|false)' "${CONF_DIR}/ananke.yaml"; then
|
||||||
|
sed -Ei \
|
||||||
|
-e '/^[[:space:]]*poweroff_enabled:[[:space:]]*(true|false)/d' \
|
||||||
|
-e '/^[[:space:]]*poweroff_delay_seconds:[[:space:]]*[0-9]+/d' \
|
||||||
|
-e '/^[[:space:]]*poweroff_local_host:[[:space:]]*(true|false)/d' \
|
||||||
|
-e '/^[[:space:]]*extra_poweroff_hosts:[[:space:]]*(\[\])?[[:space:]]*$/d' \
|
||||||
|
"${CONF_DIR}/ananke.yaml"
|
||||||
|
echo "[install] removed deprecated host-poweroff shutdown config keys"
|
||||||
|
changed=1
|
||||||
|
fi
|
||||||
|
if grep -Eq '^ minimum_battery_percent:[[:space:]]*[0-9.]+' "${CONF_DIR}/ananke.yaml" \
|
||||||
|
&& ! grep -Eq '^ require_node_inventory_reachability:[[:space:]]*(true|false)' "${CONF_DIR}/ananke.yaml"; then
|
||||||
|
sed -Ei '/^ minimum_battery_percent:[[:space:]]*[0-9.]+/a\ require_node_inventory_reachability: true\n node_inventory_reachability_wait_seconds: 300\n node_inventory_reachability_poll_seconds: 5' "${CONF_DIR}/ananke.yaml"
|
||||||
|
echo "[install] added startup node inventory reachability gate defaults"
|
||||||
|
changed=1
|
||||||
|
fi
|
||||||
|
if grep -Eq '^state:[[:space:]]*$' "${CONF_DIR}/ananke.yaml" \
|
||||||
|
&& ! grep -Eq '^ reports_dir:[[:space:]]*/var/lib/ananke/reports' "${CONF_DIR}/ananke.yaml"; then
|
||||||
|
sed -Ei '/^ dir:[[:space:]]*\/var\/lib\/ananke$/a\ reports_dir: /var/lib/ananke/reports' "${CONF_DIR}/ananke.yaml"
|
||||||
|
echo "[install] added state.reports_dir default"
|
||||||
|
changed=1
|
||||||
|
fi
|
||||||
if ! grep -Eq '^ peer_hosts:' "${CONF_DIR}/ananke.yaml"; then
|
if ! grep -Eq '^ peer_hosts:' "${CONF_DIR}/ananke.yaml"; then
|
||||||
if [[ "${role_hint}" == "peer" ]] && grep -Eq '^ forward_shutdown_host:[[:space:]]*[A-Za-z0-9._-]+' "${CONF_DIR}/ananke.yaml"; then
|
if [[ "${role_hint}" == "peer" ]] && grep -Eq '^ forward_shutdown_host:[[:space:]]*[A-Za-z0-9._-]+' "${CONF_DIR}/ananke.yaml"; then
|
||||||
local peer_host
|
local peer_host
|
||||||
@ -838,6 +861,13 @@ EOF
|
|||||||
ensure_dependencies
|
ensure_dependencies
|
||||||
migrate_legacy_hecate_install
|
migrate_legacy_hecate_install
|
||||||
|
|
||||||
|
if [[ "${ENFORCE_QUALITY_GATE}" == "1" ]]; then
|
||||||
|
echo "[install] running quality gate"
|
||||||
|
"${REPO_DIR}/scripts/quality_gate.sh"
|
||||||
|
else
|
||||||
|
echo "[install] skipping quality gate (ANANKE_ENFORCE_QUALITY_GATE=${ENFORCE_QUALITY_GATE})"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "[install] building ananke"
|
echo "[install] building ananke"
|
||||||
cd "${REPO_DIR}"
|
cd "${REPO_DIR}"
|
||||||
mkdir -p dist
|
mkdir -p dist
|
||||||
@ -855,6 +885,7 @@ install -m 0755 dist/ananke "${BIN_DIR}/ananke"
|
|||||||
echo "[install] installing config + state dirs"
|
echo "[install] installing config + state dirs"
|
||||||
install -d -m 0750 "${CONF_DIR}"
|
install -d -m 0750 "${CONF_DIR}"
|
||||||
install -d -m 0750 "${STATE_DIR}"
|
install -d -m 0750 "${STATE_DIR}"
|
||||||
|
install -d -m 0750 "${STATE_DIR}/reports"
|
||||||
install -d -m 0755 "${LIB_DIR}"
|
install -d -m 0755 "${LIB_DIR}"
|
||||||
|
|
||||||
if [[ -n "${FORCE_CONFIG_TEMPLATE}" ]]; then
|
if [[ -n "${FORCE_CONFIG_TEMPLATE}" ]]; then
|
||||||
|
|||||||
17
scripts/lint.sh
Executable file
17
scripts/lint.sh
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "${REPO_DIR}"
|
||||||
|
export PATH="$(go env GOPATH)/bin:${PATH}"
|
||||||
|
|
||||||
|
if ! command -v staticcheck >/dev/null 2>&1; then
|
||||||
|
echo "[lint] installing staticcheck"
|
||||||
|
go install honnef.co/go/tools/cmd/staticcheck@latest
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[lint] go vet"
|
||||||
|
go vet ./...
|
||||||
|
|
||||||
|
echo "[lint] staticcheck (pedantic code-smell pass)"
|
||||||
|
staticcheck ./...
|
||||||
110
scripts/quality_gate.sh
Executable file
110
scripts/quality_gate.sh
Executable file
@ -0,0 +1,110 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
QUALITY_METRICS_ENABLED="${ANANKE_QUALITY_METRICS_ENABLED:-1}"
|
||||||
|
QUALITY_METRICS_FILE="${ANANKE_QUALITY_METRICS_FILE:-/var/lib/ananke/quality-gate.prom}"
|
||||||
|
QUALITY_STATE_FILE="${ANANKE_QUALITY_STATE_FILE:-/var/lib/ananke/quality-gate.state}"
|
||||||
|
|
||||||
|
read_quality_counter() {
|
||||||
|
local key="$1"
|
||||||
|
if [[ ! -f "${QUALITY_STATE_FILE}" ]]; then
|
||||||
|
echo 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local value
|
||||||
|
value="$(awk -F= -v key="${key}" '$1==key {print $2}' "${QUALITY_STATE_FILE}" | tail -n1)"
|
||||||
|
if [[ ! "${value}" =~ ^[0-9]+$ ]]; then
|
||||||
|
echo 0
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "${value}"
|
||||||
|
}
|
||||||
|
|
||||||
|
write_quality_metrics() {
|
||||||
|
local exit_code="$1"
|
||||||
|
if [[ "${QUALITY_METRICS_ENABLED}" != "1" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local metrics_dir state_dir
|
||||||
|
metrics_dir="$(dirname "${QUALITY_METRICS_FILE}")"
|
||||||
|
state_dir="$(dirname "${QUALITY_STATE_FILE}")"
|
||||||
|
mkdir -p "${metrics_dir}" "${state_dir}" >/dev/null 2>&1 || return 0
|
||||||
|
|
||||||
|
local ok failed total last_success now success_percent
|
||||||
|
ok="$(read_quality_counter ok)"
|
||||||
|
failed="$(read_quality_counter failed)"
|
||||||
|
last_success=0
|
||||||
|
if [[ "${exit_code}" -eq 0 ]]; then
|
||||||
|
ok=$((ok + 1))
|
||||||
|
last_success=1
|
||||||
|
else
|
||||||
|
failed=$((failed + 1))
|
||||||
|
fi
|
||||||
|
total=$((ok + failed))
|
||||||
|
now="$(date +%s)"
|
||||||
|
success_percent="$(awk -v ok="${ok}" -v total="${total}" 'BEGIN { if (total <= 0) { print "0.00" } else { printf "%.2f", (ok * 100.0) / total } }')"
|
||||||
|
|
||||||
|
local tmp_metrics tmp_state
|
||||||
|
tmp_metrics="$(mktemp "${metrics_dir}/quality-gate.prom.XXXXXX")"
|
||||||
|
tmp_state="$(mktemp "${state_dir}/quality-gate.state.XXXXXX")"
|
||||||
|
|
||||||
|
cat > "${tmp_metrics}" <<EOF
|
||||||
|
# HELP ananke_quality_gate_runs_total Total Ananke quality gate runs by status.
|
||||||
|
# TYPE ananke_quality_gate_runs_total counter
|
||||||
|
ananke_quality_gate_runs_total{suite="ananke",status="ok"} ${ok}
|
||||||
|
ananke_quality_gate_runs_total{suite="ananke",status="failed"} ${failed}
|
||||||
|
# HELP ananke_quality_gate_last_run_success Whether the latest quality gate run succeeded.
|
||||||
|
# TYPE ananke_quality_gate_last_run_success gauge
|
||||||
|
ananke_quality_gate_last_run_success{suite="ananke"} ${last_success}
|
||||||
|
# HELP ananke_quality_gate_last_run_timestamp_seconds Unix timestamp of the latest quality gate run.
|
||||||
|
# TYPE ananke_quality_gate_last_run_timestamp_seconds gauge
|
||||||
|
ananke_quality_gate_last_run_timestamp_seconds{suite="ananke"} ${now}
|
||||||
|
# HELP ananke_quality_gate_success_percent Running quality gate success percentage for Ananke.
|
||||||
|
# TYPE ananke_quality_gate_success_percent gauge
|
||||||
|
ananke_quality_gate_success_percent{suite="ananke"} ${success_percent}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > "${tmp_state}" <<EOF
|
||||||
|
ok=${ok}
|
||||||
|
failed=${failed}
|
||||||
|
last_success=${last_success}
|
||||||
|
last_run=${now}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
mv -f "${tmp_metrics}" "${QUALITY_METRICS_FILE}"
|
||||||
|
mv -f "${tmp_state}" "${QUALITY_STATE_FILE}"
|
||||||
|
}
|
||||||
|
|
||||||
|
quality_gate_finalize() {
|
||||||
|
local exit_code="$1"
|
||||||
|
set +e
|
||||||
|
write_quality_metrics "${exit_code}" || true
|
||||||
|
exit "${exit_code}"
|
||||||
|
}
|
||||||
|
|
||||||
|
trap 'quality_gate_finalize $?' EXIT
|
||||||
|
|
||||||
|
cd "${REPO_DIR}"
|
||||||
|
|
||||||
|
echo "[quality] unit tests"
|
||||||
|
go test ./...
|
||||||
|
|
||||||
|
echo "[quality] hygiene: doc contracts"
|
||||||
|
cd testing
|
||||||
|
go test ./hygiene -run TestHygieneContracts/doc_contract -count=1
|
||||||
|
|
||||||
|
echo "[quality] hygiene: naming contracts"
|
||||||
|
go test ./hygiene -run TestHygieneContracts/naming_contract -count=1
|
||||||
|
|
||||||
|
echo "[quality] hygiene: LOC limits"
|
||||||
|
go test ./hygiene -run TestHygieneContracts/loc_limit -count=1
|
||||||
|
cd "${REPO_DIR}"
|
||||||
|
|
||||||
|
echo "[quality] lint"
|
||||||
|
./scripts/lint.sh
|
||||||
|
|
||||||
|
echo "[quality] per-file coverage gate (95%)"
|
||||||
|
cd testing
|
||||||
|
ANANKE_ENFORCE_COVERAGE=1 ANANKE_PER_FILE_COVERAGE_TARGET=95 go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||||
238
testing/config/config_quality_matrix_test.go
Normal file
238
testing/config/config_quality_matrix_test.go
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
icfg "scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func loadBaselineConfig(t *testing.T) icfg.Config {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "ananke.yaml")
|
||||||
|
if err := os.WriteFile(path, []byte("ups:\n enabled: false\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write baseline config: %v", err)
|
||||||
|
}
|
||||||
|
cfg, err := icfg.Load(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load baseline config: %v", err)
|
||||||
|
}
|
||||||
|
return cfg
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookServiceCatalogAndMergeContracts runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookServiceCatalogAndMergeContracts(t *testing.T).
|
||||||
|
// Why: validates startup checklist defaults and merge semantics so host-level
|
||||||
|
// overrides cannot silently drop required service behavior checks.
|
||||||
|
func TestHookServiceCatalogAndMergeContracts(t *testing.T) {
|
||||||
|
checks := icfg.TestHookDefaultServiceChecklist()
|
||||||
|
if len(checks) < 20 {
|
||||||
|
t.Fatalf("expected substantial default checklist, got %d checks", len(checks))
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := map[string]icfg.ServiceChecklistCheck{}
|
||||||
|
for _, check := range checks {
|
||||||
|
seen[strings.TrimSpace(check.Name)] = check
|
||||||
|
}
|
||||||
|
logging, ok := seen["logging-ui-user-session"]
|
||||||
|
if !ok || !logging.RequireRobotAuth || strings.TrimSpace(logging.FinalURLNotContains) == "" {
|
||||||
|
t.Fatalf("expected logging-ui-user-session to require robot auth + final URL validation")
|
||||||
|
}
|
||||||
|
keycloak, ok := seen["keycloak-admin-user-session"]
|
||||||
|
if !ok || !keycloak.RequireRobotAuth || strings.TrimSpace(keycloak.FinalURLNotContains) == "" {
|
||||||
|
t.Fatalf("expected keycloak-admin-user-session hard auth assertions")
|
||||||
|
}
|
||||||
|
|
||||||
|
critical := icfg.TestHookDefaultCriticalServiceEndpoints()
|
||||||
|
if len(critical) == 0 {
|
||||||
|
t.Fatalf("expected critical endpoint defaults")
|
||||||
|
}
|
||||||
|
foundMonitoring := false
|
||||||
|
for _, entry := range critical {
|
||||||
|
if entry == "monitoring/grafana" {
|
||||||
|
foundMonitoring = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundMonitoring {
|
||||||
|
t.Fatalf("expected monitoring/grafana critical endpoint default")
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedChecks := icfg.TestHookMergeServiceChecklistDefaults(
|
||||||
|
[]icfg.ServiceChecklistCheck{
|
||||||
|
{Name: "custom", URL: "https://custom.bstein.dev/", TimeoutSeconds: 5},
|
||||||
|
{Name: "logging-ui-user-session", URL: "https://override.invalid/", TimeoutSeconds: 5},
|
||||||
|
},
|
||||||
|
[]icfg.ServiceChecklistCheck{
|
||||||
|
{Name: "logging-ui-user-session", URL: "https://logs.bstein.dev/", TimeoutSeconds: 5},
|
||||||
|
{Name: "metrics-ui-user-session", URL: "https://metrics.bstein.dev/", TimeoutSeconds: 5},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if len(mergedChecks) != 3 {
|
||||||
|
t.Fatalf("expected 3 merged checks with dedupe, got %d", len(mergedChecks))
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedStrings := icfg.TestHookMergeStringDefaults(
|
||||||
|
[]string{" one ", "one", "", "two"},
|
||||||
|
[]string{"two", "three", " "},
|
||||||
|
)
|
||||||
|
if strings.Join(mergedStrings, ",") != "one,two,three" {
|
||||||
|
t.Fatalf("unexpected merged string defaults: %v", mergedStrings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestValidateServiceChecklistAuthContracts runs one orchestration or CLI step.
|
||||||
|
// Signature: TestValidateServiceChecklistAuthContracts(t *testing.T).
|
||||||
|
// Why: covers service-checklist auth and final-url validation branches that are
|
||||||
|
// critical for preventing false-positive startup success.
|
||||||
|
func TestValidateServiceChecklistAuthContracts(t *testing.T) {
|
||||||
|
t.Run("invalid auth mode", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.Mode = "bad-mode"
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected invalid mode validation error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid keycloak base url", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.KeycloakBaseURL = "://broken"
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected invalid keycloak base URL validation error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing secret key fields", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.AdminSecretPasswordKey = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing admin secret password key validation error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("require robot auth with mode none", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.Mode = "none"
|
||||||
|
cfg.Startup.ServiceChecklist = append(cfg.Startup.ServiceChecklist, icfg.ServiceChecklistCheck{
|
||||||
|
Name: "robot-only",
|
||||||
|
URL: "https://logs.bstein.dev/",
|
||||||
|
RequireRobotAuth: true,
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
})
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected require_robot_auth + mode none validation error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("final url markers without redirects", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklist = append(cfg.Startup.ServiceChecklist, icfg.ServiceChecklistCheck{
|
||||||
|
Name: "final-url-invalid",
|
||||||
|
URL: "https://logs.bstein.dev/",
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
FinalURLContains: "/app/home",
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
})
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected final_url marker validation error when redirects disabled")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid accepted status code", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklist[0].AcceptedStatuses = []int{700}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected invalid accepted status code error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("required node label map contracts", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{" ": {"k": "v"}}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected empty required-node-label key error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{"titan-23": {}}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected empty required-node-label map error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{"titan-23": {"zone": " "}}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected empty required-node-label value error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing auth fields", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.Realm = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing realm error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.RobotUsername = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing robot username error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.AdminSecretNamespace = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing admin secret namespace error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.AdminSecretName = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing admin secret name error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.AdminSecretUsernameKey = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing admin secret username key error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("service checklist missing url", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklist[0].URL = " "
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected missing checklist URL error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("coordination and state contracts", func(t *testing.T) {
|
||||||
|
cfg := loadBaselineConfig(t)
|
||||||
|
cfg.Coordination.ForwardShutdownHost = "titan-24"
|
||||||
|
cfg.Coordination.ForwardShutdownConfig = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected forward-shutdown config error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Coordination.PeerHosts = []string{"titan-24", " "}
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected peer host empty entry error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.Coordination.Role = "invalid"
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected invalid coordination role error")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg = loadBaselineConfig(t)
|
||||||
|
cfg.State.ReportsDir = ""
|
||||||
|
if err := cfg.Validate(); err == nil {
|
||||||
|
t.Fatalf("expected state reports_dir required error")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
@ -101,9 +101,18 @@ func TestPerFileCoverageReport(t *testing.T) {
|
|||||||
root := repoRoot(t)
|
root := repoRoot(t)
|
||||||
tmp := t.TempDir()
|
tmp := t.TempDir()
|
||||||
rootCover := filepath.Join(tmp, "ananke.root.cover.out")
|
rootCover := filepath.Join(tmp, "ananke.root.cover.out")
|
||||||
|
configCover := filepath.Join(tmp, "ananke.testing.config.cover.out")
|
||||||
testingCover := filepath.Join(tmp, "ananke.testing.cover.out")
|
testingCover := filepath.Join(tmp, "ananke.testing.cover.out")
|
||||||
|
|
||||||
runCoverageCommand(t, root, rootCover, "./...")
|
runCoverageCommand(t, root, rootCover, "./...")
|
||||||
|
runCoverageCommand(
|
||||||
|
t,
|
||||||
|
filepath.Join(root, "testing"),
|
||||||
|
configCover,
|
||||||
|
"./config",
|
||||||
|
"-coverpkg=scm.bstein.dev/bstein/ananke/...",
|
||||||
|
)
|
||||||
|
|
||||||
runCoverageCommand(
|
runCoverageCommand(
|
||||||
t,
|
t,
|
||||||
filepath.Join(root, "testing"),
|
filepath.Join(root, "testing"),
|
||||||
@ -118,6 +127,7 @@ func TestPerFileCoverageReport(t *testing.T) {
|
|||||||
|
|
||||||
blocks := map[string]coverageBlock{}
|
blocks := map[string]coverageBlock{}
|
||||||
parseCoverageProfile(t, rootCover, blocks)
|
parseCoverageProfile(t, rootCover, blocks)
|
||||||
|
parseCoverageProfile(t, configCover, blocks)
|
||||||
parseCoverageProfile(t, testingCover, blocks)
|
parseCoverageProfile(t, testingCover, blocks)
|
||||||
|
|
||||||
byFile := map[string]*fileCoverage{}
|
byFile := map[string]*fileCoverage{}
|
||||||
|
|||||||
@ -279,8 +279,8 @@ func TestHookGapMatrixPart11RemainingClosure(t *testing.T) {
|
|||||||
_, _, probeErr := orchBodyErr.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{
|
_, _, probeErr := orchBodyErr.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{
|
||||||
URL: "http://" + ln.Addr().String() + "/health",
|
URL: "http://" + ln.Addr().String() + "/health",
|
||||||
})
|
})
|
||||||
if probeErr == nil || !strings.Contains(probeErr.Error(), "read response body") {
|
if probeErr == nil || (!strings.Contains(probeErr.Error(), "read response body") && !strings.Contains(probeErr.Error(), "request failed")) {
|
||||||
t.Fatalf("expected checklist body read-error branch, got %v", probeErr)
|
t.Fatalf("expected checklist probe failure branch, got %v", probeErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfgStability := lifecycleConfig(t)
|
cfgStability := lifecycleConfig(t)
|
||||||
|
|||||||
536
testing/orchestrator/hooks_service_auth_matrix_test.go
Normal file
536
testing/orchestrator/hooks_service_auth_matrix_test.go
Normal file
@ -0,0 +1,536 @@
|
|||||||
|
package orchestrator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
||||||
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func testSecretJSON(username, password string) string {
|
||||||
|
return fmt.Sprintf(
|
||||||
|
`{"data":{"username":"%s","password":"%s"}}`,
|
||||||
|
base64.StdEncoding.EncodeToString([]byte(username)),
|
||||||
|
base64.StdEncoding.EncodeToString([]byte(password)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func authSettings(baseURL string) config.ServiceChecklistAuthSettings {
|
||||||
|
return config.ServiceChecklistAuthSettings{
|
||||||
|
Mode: "keycloak_robotuser",
|
||||||
|
KeycloakBaseURL: baseURL,
|
||||||
|
Realm: "atlas",
|
||||||
|
RobotUsername: "robotuser",
|
||||||
|
AdminSecretNamespace: "sso",
|
||||||
|
AdminSecretName: "keycloak-admin",
|
||||||
|
AdminSecretUsernameKey: "username",
|
||||||
|
AdminSecretPasswordKey: "password",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookServiceAuthChecklistSuccess runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookServiceAuthChecklistSuccess(t *testing.T).
|
||||||
|
// Why: validates full robotuser-authenticated checklist flow with final URL and
|
||||||
|
// body markers so startup gates reflect real post-login user behavior.
|
||||||
|
func TestHookServiceAuthChecklistSuccess(t *testing.T) {
|
||||||
|
var appServer *httptest.Server
|
||||||
|
appMux := http.NewServeMux()
|
||||||
|
appMux.HandleFunc("/session/bootstrap", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
http.SetCookie(w, &http.Cookie{Name: "robot_session", Value: "ok", Path: "/"})
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("bootstrap ok"))
|
||||||
|
})
|
||||||
|
appMux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path == "/" {
|
||||||
|
http.Redirect(w, r, "/app/home", http.StatusFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cookie, err := r.Cookie("robot_session")
|
||||||
|
if err != nil || strings.TrimSpace(cookie.Value) == "" {
|
||||||
|
http.Redirect(w, r, "/oauth2/sign_in", http.StatusFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.URL.Path == "/app/home" {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("OpenSearch Dashboards"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.URL.Path == "/oauth2/sign_in" {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("sign in"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
appServer = httptest.NewTLSServer(appMux)
|
||||||
|
defer appServer.Close()
|
||||||
|
|
||||||
|
kcMux := http.NewServeMux()
|
||||||
|
kcMux.HandleFunc("/realms/master/protocol/openid-connect/token", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||||
|
})
|
||||||
|
kcMux.HandleFunc("/admin/realms/atlas/users", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||||
|
})
|
||||||
|
kcMux.HandleFunc("/admin/realms/atlas/users/robot-id/impersonation", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(fmt.Sprintf(`{"redirect":"%s/session/bootstrap"}`, appServer.URL)))
|
||||||
|
})
|
||||||
|
kcServer := httptest.NewTLSServer(kcMux)
|
||||||
|
defer kcServer.Close()
|
||||||
|
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth = authSettings(kcServer.URL)
|
||||||
|
|
||||||
|
recorder := &commandRecorder{}
|
||||||
|
base := lifecycleDispatcher(recorder)
|
||||||
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
command := name + " " + strings.Join(args, " ")
|
||||||
|
if name == "kubectl" && strings.Contains(command, "-n sso get secret keycloak-admin -o json") {
|
||||||
|
recorder.record(name, args)
|
||||||
|
return testSecretJSON("admin", "password"), nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
||||||
|
|
||||||
|
check := config.ServiceChecklistCheck{
|
||||||
|
Name: "logs-ui-user-session",
|
||||||
|
URL: appServer.URL + "/",
|
||||||
|
AcceptedStatuses: []int{200},
|
||||||
|
RequireRobotAuth: true,
|
||||||
|
FollowRedirects: true,
|
||||||
|
InsecureSkipTLS: true,
|
||||||
|
FinalURLContains: "/app/home",
|
||||||
|
FinalURLNotContains: "/oauth2/sign_in",
|
||||||
|
BodyContains: "OpenSearch Dashboards",
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
}
|
||||||
|
ok, detail := orch.TestHookServiceCheckReady(context.Background(), check)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected authenticated checklist success, detail=%q", detail)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookServiceAuthModeAndSecretErrors runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookServiceAuthModeAndSecretErrors(t *testing.T).
|
||||||
|
// Why: covers auth mode guards and secret decode error branches to keep startup
|
||||||
|
// failures explicit when robot-auth prerequisites are missing.
|
||||||
|
func TestHookServiceAuthModeAndSecretErrors(t *testing.T) {
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
client := &http.Client{Timeout: time.Second}
|
||||||
|
|
||||||
|
cfgNone := lifecycleConfig(t)
|
||||||
|
cfgNone.Startup.ServiceChecklistAuth.Mode = "none"
|
||||||
|
orchNone, _ := newHookOrchestrator(t, cfgNone, nil, nil)
|
||||||
|
if err := orchNone.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected auth mode none to fail")
|
||||||
|
}
|
||||||
|
if _, err := orchNone.TestHookChecklistAuthHTTPClient(context.Background(), time.Second, false); err == nil {
|
||||||
|
t.Fatalf("expected checklist auth client init to fail when mode=none")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfgBad := lifecycleConfig(t)
|
||||||
|
cfgBad.Startup.ServiceChecklistAuth.Mode = "bad-mode"
|
||||||
|
orchBad, _ := newHookOrchestrator(t, cfgBad, nil, nil)
|
||||||
|
if err := orchBad.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected unsupported auth mode to fail")
|
||||||
|
}
|
||||||
|
|
||||||
|
base := lifecycleDispatcher(&commandRecorder{})
|
||||||
|
runKubectlErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
if name == "kubectl" {
|
||||||
|
return "", errors.New("kubectl denied")
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orchKubectlErr, _ := newHookOrchestrator(t, cfg, runKubectlErr, runKubectlErr)
|
||||||
|
if _, err := orchKubectlErr.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||||
|
t.Fatalf("expected kubectl error branch")
|
||||||
|
}
|
||||||
|
if _, _, err := orchKubectlErr.TestHookKeycloakAdminCredentials(context.Background(), cfg.Startup.ServiceChecklistAuth); err == nil {
|
||||||
|
t.Fatalf("expected keycloakAdminCredentials to fail on username secret lookup")
|
||||||
|
}
|
||||||
|
if err := orchKubectlErr.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected auth session failure when secret lookup fails")
|
||||||
|
}
|
||||||
|
|
||||||
|
runBadJSON := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
if name == "kubectl" {
|
||||||
|
return "{bad", nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orchBadJSON, _ := newHookOrchestrator(t, cfg, runBadJSON, runBadJSON)
|
||||||
|
if _, err := orchBadJSON.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||||
|
t.Fatalf("expected secret decode error branch")
|
||||||
|
}
|
||||||
|
|
||||||
|
runMissingKey := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
if name == "kubectl" {
|
||||||
|
return `{"data":{"password":"cGFzcw=="}}`, nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orchMissingKey, _ := newHookOrchestrator(t, cfg, runMissingKey, runMissingKey)
|
||||||
|
if _, err := orchMissingKey.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||||
|
t.Fatalf("expected missing key branch")
|
||||||
|
}
|
||||||
|
if err := orchMissingKey.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected auth session failure when username key is missing")
|
||||||
|
}
|
||||||
|
|
||||||
|
runMissingPassword := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
if name == "kubectl" {
|
||||||
|
return `{"data":{"username":"YWRtaW4="}}`, nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orchMissingPassword, _ := newHookOrchestrator(t, cfg, runMissingPassword, runMissingPassword)
|
||||||
|
if _, _, err := orchMissingPassword.TestHookKeycloakAdminCredentials(context.Background(), cfg.Startup.ServiceChecklistAuth); err == nil {
|
||||||
|
t.Fatalf("expected keycloakAdminCredentials to fail on password secret lookup")
|
||||||
|
}
|
||||||
|
if err := orchMissingPassword.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected auth session failure when password key is missing")
|
||||||
|
}
|
||||||
|
|
||||||
|
runBadB64 := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
if name == "kubectl" {
|
||||||
|
return `{"data":{"username":"###"}}`, nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orchBadB64, _ := newHookOrchestrator(t, cfg, runBadB64, runBadB64)
|
||||||
|
if _, err := orchBadB64.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||||
|
t.Fatalf("expected base64 decode branch")
|
||||||
|
}
|
||||||
|
|
||||||
|
runEmptyValue := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
if name == "kubectl" {
|
||||||
|
return `{"data":{"username":"IA=="}}`, nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orchEmptyValue, _ := newHookOrchestrator(t, cfg, runEmptyValue, runEmptyValue)
|
||||||
|
if _, err := orchEmptyValue.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||||
|
t.Fatalf("expected empty decoded value branch")
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := cluster.TestHookCompactHTTPBody([]byte(" hello world \n test ")); got != "hello world test" {
|
||||||
|
t.Fatalf("unexpected compact body %q", got)
|
||||||
|
}
|
||||||
|
if got := cluster.TestHookCompactHTTPBody([]byte(" \n\t ")); got != "" {
|
||||||
|
t.Fatalf("expected compact empty body, got %q", got)
|
||||||
|
}
|
||||||
|
if got := cluster.TestHookKeycloakBaseURL(config.ServiceChecklistAuthSettings{KeycloakBaseURL: "https://sso.bstein.dev/"}); got != "https://sso.bstein.dev" {
|
||||||
|
t.Fatalf("unexpected normalized base URL %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookServiceAuthHTTPErrorBranches runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookServiceAuthHTTPErrorBranches(t *testing.T).
|
||||||
|
// Why: covers token/user/impersonation parser and status branches so startup
|
||||||
|
// diagnostics remain actionable during auth failures.
|
||||||
|
func TestHookServiceAuthHTTPErrorBranches(t *testing.T) {
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
||||||
|
client := &http.Client{Timeout: 2 * time.Second}
|
||||||
|
|
||||||
|
authBadURL := authSettings("://bad-url")
|
||||||
|
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authBadURL, "admin", "pw"); err == nil {
|
||||||
|
t.Fatalf("expected request-build failure for bad base URL")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authBadURL, "token"); err == nil {
|
||||||
|
t.Fatalf("expected robot-user request-build failure for bad base URL")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authBadURL, "token", "robot"); err == nil {
|
||||||
|
t.Fatalf("expected impersonation request-build failure for bad base URL")
|
||||||
|
}
|
||||||
|
authRequestErr := authSettings("http://127.0.0.1:1")
|
||||||
|
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authRequestErr, "admin", "pw"); err == nil {
|
||||||
|
t.Fatalf("expected admin token request error branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authRequestErr, "token"); err == nil {
|
||||||
|
t.Fatalf("expected robot user request error branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authRequestErr, "token", "robot"); err == nil {
|
||||||
|
t.Fatalf("expected impersonation request error branch")
|
||||||
|
}
|
||||||
|
|
||||||
|
kcError := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
_, _ = w.Write([]byte(`{"error":"unauthorized"}`))
|
||||||
|
case strings.Contains(r.URL.Path, "/users") && strings.Contains(r.URL.RawQuery, "username=robotuser"):
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
_, _ = w.Write([]byte(`{"error":"boom"}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusBadGateway)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kcError.Close()
|
||||||
|
authError := authSettings(kcError.URL)
|
||||||
|
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authError, "admin", "pw"); err == nil {
|
||||||
|
t.Fatalf("expected non-2xx token branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authError, "token"); err == nil {
|
||||||
|
t.Fatalf("expected non-2xx robot user branch")
|
||||||
|
}
|
||||||
|
|
||||||
|
kcDecode := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("not-json"))
|
||||||
|
case strings.Contains(r.URL.Path, "/users") && strings.Contains(r.URL.RawQuery, "username=robotuser"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("not-json"))
|
||||||
|
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("not-json"))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kcDecode.Close()
|
||||||
|
authDecode := authSettings(kcDecode.URL)
|
||||||
|
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authDecode, "admin", "pw"); err == nil {
|
||||||
|
t.Fatalf("expected token decode error branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authDecode, "token"); err == nil {
|
||||||
|
t.Fatalf("expected robot user decode error branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authDecode, "token", "robot"); err == nil {
|
||||||
|
t.Fatalf("expected impersonation decode error branch")
|
||||||
|
}
|
||||||
|
|
||||||
|
kcMissing := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":""}`))
|
||||||
|
case strings.Contains(r.URL.Path, "/users") && strings.Contains(r.URL.RawQuery, "username=robotuser"):
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`[]`))
|
||||||
|
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
_, _ = w.Write([]byte(`{"error":"bad request"}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kcMissing.Close()
|
||||||
|
authMissing := authSettings(kcMissing.URL)
|
||||||
|
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authMissing, "admin", "pw"); err == nil {
|
||||||
|
t.Fatalf("expected missing access_token branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authMissing, "token"); err == nil {
|
||||||
|
t.Fatalf("expected missing robot user branch")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authMissing, "token", "robot"); err == nil {
|
||||||
|
t.Fatalf("expected impersonation non-2xx branch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookServiceChecklistProbeBranches runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookServiceChecklistProbeBranches(t *testing.T).
|
||||||
|
// Why: exercises redirect + final-url probe branches, including robot-auth
|
||||||
|
// initialization failures and redirect suppression behavior.
|
||||||
|
func TestHookServiceChecklistProbeBranches(t *testing.T) {
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth.Mode = "none"
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
||||||
|
if _, _, _, _, err := orch.TestHookHTTPChecklistProbeWithLocation(context.Background(), config.ServiceChecklistCheck{
|
||||||
|
URL: "https://example.invalid/",
|
||||||
|
RequireRobotAuth: true,
|
||||||
|
TimeoutSeconds: 1,
|
||||||
|
}); err == nil {
|
||||||
|
t.Fatalf("expected robot auth initialization failure when mode=none")
|
||||||
|
}
|
||||||
|
|
||||||
|
redirectServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Redirect(w, r, "/next", http.StatusFound)
|
||||||
|
}))
|
||||||
|
defer redirectServer.Close()
|
||||||
|
|
||||||
|
orchNoAuth, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
|
||||||
|
status, _, location, finalURL, err := orchNoAuth.TestHookHTTPChecklistProbeWithLocation(context.Background(), config.ServiceChecklistCheck{
|
||||||
|
URL: redirectServer.URL,
|
||||||
|
FollowRedirects: false,
|
||||||
|
TimeoutSeconds: 2,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected redirect probe error: %v", err)
|
||||||
|
}
|
||||||
|
if status != http.StatusFound {
|
||||||
|
t.Fatalf("expected 302 status when redirects disabled, got %d", status)
|
||||||
|
}
|
||||||
|
if !strings.Contains(location, "/next") {
|
||||||
|
t.Fatalf("expected location header for redirect response, got %q", location)
|
||||||
|
}
|
||||||
|
if !strings.Contains(finalURL, redirectServer.URL) {
|
||||||
|
t.Fatalf("expected final URL to remain original request URL, got %q", finalURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookAuthenticateRobotChecklistSessionFailureStages runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookAuthenticateRobotChecklistSessionFailureStages(t *testing.T).
|
||||||
|
// Why: drives authenticateRobotChecklistSession through downstream error stages
|
||||||
|
// (robot lookup, impersonation, redirect-build, redirect-request) to maintain
|
||||||
|
// resilient startup diagnostics.
|
||||||
|
func TestHookAuthenticateRobotChecklistSessionFailureStages(t *testing.T) {
|
||||||
|
client := &http.Client{Timeout: 3 * time.Second}
|
||||||
|
recorder := &commandRecorder{}
|
||||||
|
base := lifecycleDispatcher(recorder)
|
||||||
|
secretRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
command := name + " " + strings.Join(args, " ")
|
||||||
|
if name == "kubectl" && strings.Contains(command, "-n sso get secret keycloak-admin -o json") {
|
||||||
|
return testSecretJSON("admin", "password"), nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("robot-user lookup failure", func(t *testing.T) {
|
||||||
|
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||||
|
case strings.Contains(r.URL.Path, "/users"):
|
||||||
|
w.WriteHeader(http.StatusBadGateway)
|
||||||
|
_, _ = w.Write([]byte(`{"error":"lookup failed"}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kc.Close()
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||||
|
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected robot-user lookup failure branch")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("impersonation failure", func(t *testing.T) {
|
||||||
|
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||||
|
case strings.Contains(r.URL.Path, "/users"):
|
||||||
|
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||||
|
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||||
|
w.WriteHeader(http.StatusBadGateway)
|
||||||
|
_, _ = w.Write([]byte(`{"error":"impersonation failed"}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kc.Close()
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||||
|
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected impersonation failure branch")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("redirect url build failure", func(t *testing.T) {
|
||||||
|
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||||
|
case strings.Contains(r.URL.Path, "/users"):
|
||||||
|
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||||
|
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||||
|
_, _ = w.Write([]byte(`{"redirect":"://bad"}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kc.Close()
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||||
|
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected redirect request-build failure branch")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("redirect request failure", func(t *testing.T) {
|
||||||
|
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(r.URL.Path, "/token"):
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||||
|
case strings.Contains(r.URL.Path, "/users"):
|
||||||
|
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||||
|
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||||
|
_, _ = w.Write([]byte(`{"redirect":"http://127.0.0.1:1/nowhere"}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer kc.Close()
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||||
|
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||||
|
t.Fatalf("expected redirect request failure branch")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookServiceAuthFallbackRedirect runs one orchestration or CLI step.
|
||||||
|
// Signature: TestHookServiceAuthFallbackRedirect(t *testing.T).
|
||||||
|
// Why: covers empty impersonation redirect fallback to realm account URL so
|
||||||
|
// session bootstrap is resilient to Keycloak response shape differences.
|
||||||
|
func TestHookServiceAuthFallbackRedirect(t *testing.T) {
|
||||||
|
kcMux := http.NewServeMux()
|
||||||
|
kcMux.HandleFunc("/realms/master/protocol/openid-connect/token", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||||
|
})
|
||||||
|
kcMux.HandleFunc("/admin/realms/atlas/users", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||||
|
})
|
||||||
|
kcMux.HandleFunc("/admin/realms/atlas/users/robot-id/impersonation", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`{"redirect":""}`))
|
||||||
|
})
|
||||||
|
kcMux.HandleFunc("/realms/atlas/account/", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("account ok"))
|
||||||
|
})
|
||||||
|
kcServer := httptest.NewTLSServer(kcMux)
|
||||||
|
defer kcServer.Close()
|
||||||
|
|
||||||
|
cfg := lifecycleConfig(t)
|
||||||
|
cfg.Startup.ServiceChecklistAuth = authSettings(kcServer.URL)
|
||||||
|
recorder := &commandRecorder{}
|
||||||
|
base := lifecycleDispatcher(recorder)
|
||||||
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||||
|
command := name + " " + strings.Join(args, " ")
|
||||||
|
if name == "kubectl" && strings.Contains(command, "-n sso get secret keycloak-admin -o json") {
|
||||||
|
return testSecretJSON("admin", "password"), nil
|
||||||
|
}
|
||||||
|
return base(ctx, timeout, name, args...)
|
||||||
|
}
|
||||||
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
||||||
|
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), &http.Client{Timeout: 4 * time.Second, Transport: &http.Transport{}}); err == nil {
|
||||||
|
t.Fatalf("expected auth bootstrap without TLS skip to fail against TLS test server")
|
||||||
|
}
|
||||||
|
if _, err := orch.TestHookChecklistAuthHTTPClient(context.Background(), 4*time.Second, true); err != nil {
|
||||||
|
t.Fatalf("expected checklist auth client fallback redirect path success, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user