ananke: refactor orchestrator, enforce quality gates, and harden startup checks
This commit is contained in:
parent
baead1426e
commit
c2c79e5821
19
Makefile
19
Makefile
@ -1,4 +1,4 @@
|
||||
.PHONY: build test fmt tidy install drill-list drill-run
|
||||
.PHONY: build test test-all quality-gate hygiene lint coverage-report coverage-gate fmt tidy install drill-list drill-run
|
||||
|
||||
build:
|
||||
go build -o dist/ananke ./cmd/ananke
|
||||
@ -6,6 +6,23 @@ build:
|
||||
test:
|
||||
go test ./...
|
||||
|
||||
test-all: test hygiene lint coverage-report
|
||||
|
||||
quality-gate:
|
||||
./scripts/quality_gate.sh
|
||||
|
||||
hygiene:
|
||||
cd testing && go test ./hygiene
|
||||
|
||||
lint:
|
||||
./scripts/lint.sh
|
||||
|
||||
coverage-report:
|
||||
cd testing && go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||
|
||||
coverage-gate:
|
||||
cd testing && ANANKE_ENFORCE_COVERAGE=1 ANANKE_PER_FILE_COVERAGE_TARGET=95 go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||
|
||||
fmt:
|
||||
gofmt -w ./cmd ./internal
|
||||
|
||||
|
||||
@ -48,6 +48,9 @@ startup:
|
||||
api_poll_seconds: 2
|
||||
shutdown_cooldown_seconds: 45
|
||||
minimum_battery_percent: 20
|
||||
require_node_inventory_reachability: true
|
||||
node_inventory_reachability_wait_seconds: 300
|
||||
node_inventory_reachability_poll_seconds: 5
|
||||
required_node_labels:
|
||||
titan-09:
|
||||
ananke.bstein.dev/harbor-bootstrap: "true"
|
||||
@ -78,6 +81,15 @@ startup:
|
||||
service_checklist_wait_seconds: 420
|
||||
service_checklist_poll_seconds: 5
|
||||
service_checklist_stability_seconds: 120
|
||||
service_checklist_auth:
|
||||
mode: keycloak_robotuser
|
||||
keycloak_base_url: https://sso.bstein.dev
|
||||
realm: atlas
|
||||
robot_username: robotuser
|
||||
admin_secret_namespace: sso
|
||||
admin_secret_name: keycloak-admin
|
||||
admin_secret_username_key: username
|
||||
admin_secret_password_key: password
|
||||
service_checklist:
|
||||
- name: gitea-api
|
||||
url: https://scm.bstein.dev/api/healthz
|
||||
@ -99,10 +111,20 @@ startup:
|
||||
accepted_statuses: [401]
|
||||
body_contains: unauthorized
|
||||
timeout_seconds: 12
|
||||
- name: longhorn-auth
|
||||
url: https://longhorn.bstein.dev/
|
||||
accepted_statuses: [200, 302]
|
||||
- name: longhorn-api-user-session
|
||||
url: https://longhorn.bstein.dev/v1
|
||||
accepted_statuses: [200]
|
||||
require_robot_auth: true
|
||||
follow_redirects: true
|
||||
final_url_contains: /v1
|
||||
final_url_not_contains: /oauth2/sign_in
|
||||
body_contains: '"id":"v1"'
|
||||
timeout_seconds: 12
|
||||
require_critical_service_endpoints: true
|
||||
critical_service_endpoint_wait_seconds: 420
|
||||
critical_service_endpoint_poll_seconds: 5
|
||||
critical_service_endpoints:
|
||||
- monitoring/victoria-metrics-single-server
|
||||
require_ingress_checklist: true
|
||||
ingress_checklist_wait_seconds: 420
|
||||
ingress_checklist_poll_seconds: 5
|
||||
@ -139,10 +161,6 @@ shutdown:
|
||||
drain_parallelism: 6
|
||||
scale_parallelism: 8
|
||||
ssh_parallelism: 8
|
||||
poweroff_enabled: false
|
||||
poweroff_delay_seconds: 25
|
||||
poweroff_local_host: false
|
||||
extra_poweroff_hosts: []
|
||||
ups:
|
||||
enabled: true
|
||||
provider: nut
|
||||
@ -170,6 +188,7 @@ metrics:
|
||||
path: /metrics
|
||||
state:
|
||||
dir: /var/lib/ananke
|
||||
reports_dir: /var/lib/ananke/reports
|
||||
run_history_path: /var/lib/ananke/runs.json
|
||||
lock_path: /var/lib/ananke/ananke.lock
|
||||
intent_path: /var/lib/ananke/intent.json
|
||||
|
||||
@ -114,6 +114,9 @@ startup:
|
||||
api_poll_seconds: 2
|
||||
shutdown_cooldown_seconds: 45
|
||||
minimum_battery_percent: 20
|
||||
require_node_inventory_reachability: true
|
||||
node_inventory_reachability_wait_seconds: 300
|
||||
node_inventory_reachability_poll_seconds: 5
|
||||
required_node_labels:
|
||||
titan-09:
|
||||
ananke.bstein.dev/harbor-bootstrap: "true"
|
||||
@ -144,6 +147,15 @@ startup:
|
||||
service_checklist_wait_seconds: 420
|
||||
service_checklist_poll_seconds: 5
|
||||
service_checklist_stability_seconds: 120
|
||||
service_checklist_auth:
|
||||
mode: keycloak_robotuser
|
||||
keycloak_base_url: https://sso.bstein.dev
|
||||
realm: atlas
|
||||
robot_username: robotuser
|
||||
admin_secret_namespace: sso
|
||||
admin_secret_name: keycloak-admin
|
||||
admin_secret_username_key: username
|
||||
admin_secret_password_key: password
|
||||
service_checklist:
|
||||
- name: gitea-api
|
||||
url: https://scm.bstein.dev/api/healthz
|
||||
@ -165,10 +177,20 @@ startup:
|
||||
accepted_statuses: [401]
|
||||
body_contains: unauthorized
|
||||
timeout_seconds: 12
|
||||
- name: longhorn-auth
|
||||
url: https://longhorn.bstein.dev/
|
||||
accepted_statuses: [200, 302]
|
||||
- name: longhorn-api-user-session
|
||||
url: https://longhorn.bstein.dev/v1
|
||||
accepted_statuses: [200]
|
||||
require_robot_auth: true
|
||||
follow_redirects: true
|
||||
final_url_contains: /v1
|
||||
final_url_not_contains: /oauth2/sign_in
|
||||
body_contains: '"id":"v1"'
|
||||
timeout_seconds: 12
|
||||
require_critical_service_endpoints: true
|
||||
critical_service_endpoint_wait_seconds: 420
|
||||
critical_service_endpoint_poll_seconds: 5
|
||||
critical_service_endpoints:
|
||||
- monitoring/victoria-metrics-single-server
|
||||
require_ingress_checklist: true
|
||||
ingress_checklist_wait_seconds: 420
|
||||
ingress_checklist_poll_seconds: 5
|
||||
@ -205,10 +227,6 @@ shutdown:
|
||||
drain_parallelism: 6
|
||||
scale_parallelism: 8
|
||||
ssh_parallelism: 8
|
||||
poweroff_enabled: false
|
||||
poweroff_delay_seconds: 25
|
||||
poweroff_local_host: false
|
||||
extra_poweroff_hosts: []
|
||||
ups:
|
||||
enabled: true
|
||||
provider: nut
|
||||
@ -236,6 +254,7 @@ metrics:
|
||||
path: /metrics
|
||||
state:
|
||||
dir: /var/lib/ananke
|
||||
reports_dir: /var/lib/ananke/reports
|
||||
run_history_path: /var/lib/ananke/runs.json
|
||||
lock_path: /var/lib/ananke/ananke.lock
|
||||
intent_path: /var/lib/ananke/intent.json
|
||||
|
||||
@ -114,6 +114,9 @@ startup:
|
||||
api_poll_seconds: 2
|
||||
shutdown_cooldown_seconds: 45
|
||||
minimum_battery_percent: 20
|
||||
require_node_inventory_reachability: true
|
||||
node_inventory_reachability_wait_seconds: 300
|
||||
node_inventory_reachability_poll_seconds: 5
|
||||
required_node_labels:
|
||||
titan-09:
|
||||
ananke.bstein.dev/harbor-bootstrap: "true"
|
||||
@ -144,6 +147,15 @@ startup:
|
||||
service_checklist_wait_seconds: 420
|
||||
service_checklist_poll_seconds: 5
|
||||
service_checklist_stability_seconds: 120
|
||||
service_checklist_auth:
|
||||
mode: keycloak_robotuser
|
||||
keycloak_base_url: https://sso.bstein.dev
|
||||
realm: atlas
|
||||
robot_username: robotuser
|
||||
admin_secret_namespace: sso
|
||||
admin_secret_name: keycloak-admin
|
||||
admin_secret_username_key: username
|
||||
admin_secret_password_key: password
|
||||
service_checklist:
|
||||
- name: gitea-api
|
||||
url: https://scm.bstein.dev/api/healthz
|
||||
@ -165,10 +177,20 @@ startup:
|
||||
accepted_statuses: [401]
|
||||
body_contains: unauthorized
|
||||
timeout_seconds: 12
|
||||
- name: longhorn-auth
|
||||
url: https://longhorn.bstein.dev/
|
||||
accepted_statuses: [200, 302]
|
||||
- name: longhorn-api-user-session
|
||||
url: https://longhorn.bstein.dev/v1
|
||||
accepted_statuses: [200]
|
||||
require_robot_auth: true
|
||||
follow_redirects: true
|
||||
final_url_contains: /v1
|
||||
final_url_not_contains: /oauth2/sign_in
|
||||
body_contains: '"id":"v1"'
|
||||
timeout_seconds: 12
|
||||
require_critical_service_endpoints: true
|
||||
critical_service_endpoint_wait_seconds: 420
|
||||
critical_service_endpoint_poll_seconds: 5
|
||||
critical_service_endpoints:
|
||||
- monitoring/victoria-metrics-single-server
|
||||
require_ingress_checklist: true
|
||||
ingress_checklist_wait_seconds: 420
|
||||
ingress_checklist_poll_seconds: 5
|
||||
@ -205,10 +227,6 @@ shutdown:
|
||||
drain_parallelism: 6
|
||||
scale_parallelism: 8
|
||||
ssh_parallelism: 8
|
||||
poweroff_enabled: false
|
||||
poweroff_delay_seconds: 25
|
||||
poweroff_local_host: false
|
||||
extra_poweroff_hosts: []
|
||||
ups:
|
||||
enabled: true
|
||||
provider: nut
|
||||
@ -236,6 +254,7 @@ metrics:
|
||||
path: /metrics
|
||||
state:
|
||||
dir: /var/lib/ananke
|
||||
reports_dir: /var/lib/ananke/reports
|
||||
run_history_path: /var/lib/ananke/runs.json
|
||||
lock_path: /var/lib/ananke/ananke.lock
|
||||
intent_path: /var/lib/ananke/intent.json
|
||||
|
||||
286
internal/cluster/orchestrator_service_auth.go
Normal file
286
internal/cluster/orchestrator_service_auth.go
Normal file
@ -0,0 +1,286 @@
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
neturl "net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||
)
|
||||
|
||||
type keycloakTokenResponse struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
}
|
||||
|
||||
type keycloakUser struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
|
||||
type keycloakImpersonationResponse struct {
|
||||
Redirect string `json:"redirect"`
|
||||
}
|
||||
|
||||
type kubernetesSecret struct {
|
||||
Data map[string]string `json:"data"`
|
||||
}
|
||||
|
||||
// checklistAuthHTTPClient runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error).
|
||||
// Why: startup checklist checks that require real user behavior need an
|
||||
// authenticated robotuser browser-like session before probing service pages.
|
||||
func (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) {
|
||||
jar, err := cookiejar.New(nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create cookie jar: %w", err)
|
||||
}
|
||||
transport := &http.Transport{}
|
||||
if insecureSkipTLS {
|
||||
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
||||
}
|
||||
client := &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: transport,
|
||||
Jar: jar,
|
||||
}
|
||||
if err := o.authenticateRobotChecklistSession(ctx, client); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// authenticateRobotChecklistSession runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error.
|
||||
// Why: authenticated checklist probes must reflect what a human sees after
|
||||
// Keycloak login, not only pre-auth redirects.
|
||||
func (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error {
|
||||
auth := o.cfg.Startup.ServiceChecklistAuth
|
||||
mode := strings.TrimSpace(auth.Mode)
|
||||
if mode == "" || mode == "none" {
|
||||
return fmt.Errorf("startup checklist auth mode is disabled")
|
||||
}
|
||||
if mode != "keycloak_robotuser" {
|
||||
return fmt.Errorf("unsupported startup checklist auth mode %q", mode)
|
||||
}
|
||||
|
||||
adminUser, adminPassword, err := o.keycloakAdminCredentials(ctx, auth)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
adminToken, err := o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
robotUserID, err := o.keycloakRobotUserID(ctx, client, auth, adminToken)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
redirectURL, err := o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(redirectURL) == "" {
|
||||
redirectURL = keycloakBaseURL(auth) + "/realms/" + strings.TrimSpace(auth.Realm) + "/account/"
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirectURL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build robot redirect request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("initialize robot session redirect: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024))
|
||||
return nil
|
||||
}
|
||||
|
||||
// keycloakAdminCredentials runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error).
|
||||
// Why: robotuser impersonation uses a cluster-managed admin secret so startup
|
||||
// checks do not rely on interactive credentials.
|
||||
func (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) {
|
||||
namespace := strings.TrimSpace(auth.AdminSecretNamespace)
|
||||
name := strings.TrimSpace(auth.AdminSecretName)
|
||||
userKey := strings.TrimSpace(auth.AdminSecretUsernameKey)
|
||||
passwordKey := strings.TrimSpace(auth.AdminSecretPasswordKey)
|
||||
|
||||
username, err := o.kubernetesSecretValue(ctx, namespace, name, userKey)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("read keycloak admin username from secret %s/%s: %w", namespace, name, err)
|
||||
}
|
||||
password, err := o.kubernetesSecretValue(ctx, namespace, name, passwordKey)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("read keycloak admin password from secret %s/%s: %w", namespace, name, err)
|
||||
}
|
||||
return username, password, nil
|
||||
}
|
||||
|
||||
// kubernetesSecretValue runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error).
|
||||
// Why: checklist auth depends on secret-backed credentials and should decode
|
||||
// them directly from Kubernetes rather than shelling out to external tools.
|
||||
func (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) {
|
||||
out, err := o.kubectl(ctx, 25*time.Second, "-n", namespace, "get", "secret", name, "-o", "json")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("kubectl get secret: %w", err)
|
||||
}
|
||||
var doc kubernetesSecret
|
||||
if err := json.Unmarshal([]byte(out), &doc); err != nil {
|
||||
return "", fmt.Errorf("decode secret json: %w", err)
|
||||
}
|
||||
encoded, ok := doc.Data[key]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("key %q not present in secret", key)
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(encoded))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("decode base64 secret value: %w", err)
|
||||
}
|
||||
value := strings.TrimSpace(string(decoded))
|
||||
if value == "" {
|
||||
return "", fmt.Errorf("decoded value is empty")
|
||||
}
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// keycloakAdminToken runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error).
|
||||
// Why: admin API access is needed to impersonate robotuser for deterministic
|
||||
// user-journey checks across OIDC-gated services.
|
||||
func (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) {
|
||||
form := neturl.Values{}
|
||||
form.Set("grant_type", "password")
|
||||
form.Set("client_id", "admin-cli")
|
||||
form.Set("username", adminUser)
|
||||
form.Set("password", adminPassword)
|
||||
|
||||
tokenURL := keycloakBaseURL(auth) + "/realms/master/protocol/openid-connect/token"
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(form.Encode()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("build admin token request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("request admin token: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||
if resp.StatusCode/100 != 2 {
|
||||
return "", fmt.Errorf("admin token request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
|
||||
}
|
||||
|
||||
var payload keycloakTokenResponse
|
||||
if err := json.Unmarshal(body, &payload); err != nil {
|
||||
return "", fmt.Errorf("decode admin token response: %w", err)
|
||||
}
|
||||
token := strings.TrimSpace(payload.AccessToken)
|
||||
if token == "" {
|
||||
return "", fmt.Errorf("admin token response missing access_token")
|
||||
}
|
||||
return token, nil
|
||||
}
|
||||
|
||||
// keycloakRobotUserID runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error).
|
||||
// Why: impersonation requires the concrete user id and should fail fast when
|
||||
// robotuser is missing from the realm.
|
||||
func (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) {
|
||||
base := keycloakBaseURL(auth)
|
||||
realm := strings.TrimSpace(auth.Realm)
|
||||
username := strings.TrimSpace(auth.RobotUsername)
|
||||
query := neturl.Values{}
|
||||
query.Set("username", username)
|
||||
query.Set("exact", "true")
|
||||
usersURL := base + "/admin/realms/" + realm + "/users?" + query.Encode()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, usersURL, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("build robot user lookup request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+adminToken)
|
||||
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("lookup robot user: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||
if resp.StatusCode/100 != 2 {
|
||||
return "", fmt.Errorf("robot user lookup failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
|
||||
}
|
||||
|
||||
var users []keycloakUser
|
||||
if err := json.Unmarshal(body, &users); err != nil {
|
||||
return "", fmt.Errorf("decode robot user lookup response: %w", err)
|
||||
}
|
||||
if len(users) == 0 || strings.TrimSpace(users[0].ID) == "" {
|
||||
return "", fmt.Errorf("robot user %q not found in realm %q", username, realm)
|
||||
}
|
||||
return strings.TrimSpace(users[0].ID), nil
|
||||
}
|
||||
|
||||
// keycloakImpersonationRedirect runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error).
|
||||
// Why: opening a real impersonated browser session guarantees checks evaluate
|
||||
// post-login app behavior instead of only auth-gateway redirects.
|
||||
func (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) {
|
||||
base := keycloakBaseURL(auth)
|
||||
realm := strings.TrimSpace(auth.Realm)
|
||||
impersonateURL := base + "/admin/realms/" + realm + "/users/" + strings.TrimSpace(robotUserID) + "/impersonation"
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, impersonateURL, http.NoBody)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("build robot impersonation request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+adminToken)
|
||||
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("request robot impersonation: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||
if resp.StatusCode/100 != 2 {
|
||||
return "", fmt.Errorf("robot impersonation failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
|
||||
}
|
||||
|
||||
var payload keycloakImpersonationResponse
|
||||
if err := json.Unmarshal(body, &payload); err != nil {
|
||||
return "", fmt.Errorf("decode robot impersonation response: %w", err)
|
||||
}
|
||||
return strings.TrimSpace(payload.Redirect), nil
|
||||
}
|
||||
|
||||
// keycloakBaseURL runs one orchestration or CLI step.
|
||||
// Signature: keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string.
|
||||
// Why: centralizing URL normalization keeps auth request construction stable.
|
||||
func keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string {
|
||||
return strings.TrimRight(strings.TrimSpace(auth.KeycloakBaseURL), "/")
|
||||
}
|
||||
|
||||
// compactHTTPBody runs one orchestration or CLI step.
|
||||
// Signature: compactHTTPBody(raw []byte) string.
|
||||
// Why: checklist auth errors should include a readable body summary without
|
||||
// leaking multi-line payload noise into orchestrator logs.
|
||||
func compactHTTPBody(raw []byte) string {
|
||||
text := strings.TrimSpace(string(raw))
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(strings.Fields(text), " ")
|
||||
}
|
||||
@ -184,6 +184,16 @@ func (o *Orchestrator) serviceCheckReady(ctx context.Context, check config.Servi
|
||||
return false, fmt.Sprintf("location header contained forbidden marker %q", locationNotContains)
|
||||
}
|
||||
|
||||
finalURLContains := strings.TrimSpace(check.FinalURLContains)
|
||||
if finalURLContains != "" && !checklistContains(result.FinalURL, finalURLContains) {
|
||||
return false, fmt.Sprintf("final url missing expected marker %q", finalURLContains)
|
||||
}
|
||||
|
||||
finalURLNotContains := strings.TrimSpace(check.FinalURLNotContains)
|
||||
if finalURLNotContains != "" && checklistContains(result.FinalURL, finalURLNotContains) {
|
||||
return false, fmt.Sprintf("final url contained forbidden marker %q", finalURLNotContains)
|
||||
}
|
||||
|
||||
bodyContains := strings.TrimSpace(check.BodyContains)
|
||||
if bodyContains != "" && !checklistContains(result.Body, bodyContains) {
|
||||
return false, fmt.Sprintf("response missing expected marker %q", bodyContains)
|
||||
@ -201,6 +211,7 @@ type checklistHTTPProbeResult struct {
|
||||
Status int
|
||||
Body string
|
||||
Location string
|
||||
FinalURL string
|
||||
}
|
||||
|
||||
// httpChecklistProbeResult runs one orchestration or CLI step.
|
||||
@ -209,13 +220,14 @@ type checklistHTTPProbeResult struct {
|
||||
// addition to status/body so startup can validate real user-facing behavior.
|
||||
func (o *Orchestrator) httpChecklistProbeResult(ctx context.Context, check config.ServiceChecklistCheck) (checklistHTTPProbeResult, error) {
|
||||
result := checklistHTTPProbeResult{}
|
||||
status, body, location, err := o.httpChecklistProbeWithLocation(ctx, check)
|
||||
status, body, location, finalURL, err := o.httpChecklistProbeWithLocation(ctx, check)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
result.Status = status
|
||||
result.Body = body
|
||||
result.Location = location
|
||||
result.FinalURL = finalURL
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@ -223,50 +235,66 @@ func (o *Orchestrator) httpChecklistProbeResult(ctx context.Context, check confi
|
||||
// Signature: (o *Orchestrator) httpChecklistProbe(ctx context.Context, check config.ServiceChecklistCheck) (int, string, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (o *Orchestrator) httpChecklistProbe(ctx context.Context, check config.ServiceChecklistCheck) (int, string, error) {
|
||||
status, body, _, err := o.httpChecklistProbeWithLocation(ctx, check)
|
||||
status, body, _, _, err := o.httpChecklistProbeWithLocation(ctx, check)
|
||||
return status, body, err
|
||||
}
|
||||
|
||||
// httpChecklistProbeWithLocation runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, error).
|
||||
// Signature: (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error).
|
||||
// Why: redirects and auth gates require location-header assertions to prevent
|
||||
// startup false-positives on partially healthy protected services.
|
||||
func (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, error) {
|
||||
func (o *Orchestrator) httpChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error) {
|
||||
timeout := time.Duration(check.TimeoutSeconds) * time.Second
|
||||
if timeout <= 0 {
|
||||
timeout = 12 * time.Second
|
||||
}
|
||||
|
||||
followRedirects := check.FollowRedirects || check.RequireRobotAuth
|
||||
var client *http.Client
|
||||
if check.RequireRobotAuth {
|
||||
authClient, authErr := o.checklistAuthHTTPClient(ctx, timeout, check.InsecureSkipTLS)
|
||||
if authErr != nil {
|
||||
return 0, "", "", "", fmt.Errorf("initialize robotuser checklist session: %w", authErr)
|
||||
}
|
||||
client = authClient
|
||||
} else {
|
||||
transport := &http.Transport{}
|
||||
if check.InsecureSkipTLS {
|
||||
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
||||
}
|
||||
client := &http.Client{
|
||||
client = &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: transport,
|
||||
CheckRedirect: func(_ *http.Request, _ []*http.Request) error {
|
||||
}
|
||||
}
|
||||
if !followRedirects {
|
||||
client.CheckRedirect = func(_ *http.Request, _ []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, strings.TrimSpace(check.URL), nil)
|
||||
if err != nil {
|
||||
return 0, "", "", fmt.Errorf("build request: %w", err)
|
||||
return 0, "", "", "", fmt.Errorf("build request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", "ananke/startup-checklist")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return 0, "", "", fmt.Errorf("request failed: %w", err)
|
||||
return 0, "", "", "", fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, readErr := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||
if readErr != nil {
|
||||
return resp.StatusCode, "", "", fmt.Errorf("read response body: %w", readErr)
|
||||
return resp.StatusCode, "", "", "", fmt.Errorf("read response body: %w", readErr)
|
||||
}
|
||||
|
||||
return resp.StatusCode, string(body), strings.TrimSpace(resp.Header.Get("Location")), nil
|
||||
finalURL := strings.TrimSpace(req.URL.String())
|
||||
if resp.Request != nil && resp.Request.URL != nil {
|
||||
finalURL = strings.TrimSpace(resp.Request.URL.String())
|
||||
}
|
||||
return resp.StatusCode, string(body), strings.TrimSpace(resp.Header.Get("Location")), finalURL, nil
|
||||
}
|
||||
|
||||
// checklistContains runs one orchestration or CLI step.
|
||||
|
||||
@ -329,6 +329,80 @@ func TestServiceCheckReadyRejectsMissingLocationMarker(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestServiceCheckReadyRequiresFinalURLContains runs one orchestration or CLI step.
|
||||
// Signature: TestServiceCheckReadyRequiresFinalURLContains(t *testing.T).
|
||||
// Why: authenticated user-journey checks depend on final URL assertions after
|
||||
// redirects complete, not only on initial response status.
|
||||
func TestServiceCheckReadyRequiresFinalURLContains(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/" {
|
||||
http.Redirect(w, r, "/app/home", http.StatusFound)
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/app/home" {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("OpenSearch Dashboards"))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
orch := &Orchestrator{
|
||||
log: log.New(os.Stdout, "", 0),
|
||||
}
|
||||
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
|
||||
Name: "logging-ui-user-session",
|
||||
URL: srv.URL,
|
||||
AcceptedStatuses: []int{200},
|
||||
FollowRedirects: true,
|
||||
FinalURLContains: "/app/home",
|
||||
BodyContains: "OpenSearch Dashboards",
|
||||
TimeoutSeconds: 5,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatalf("expected final-url-aware service check to pass, detail=%s", detail)
|
||||
}
|
||||
}
|
||||
|
||||
// TestServiceCheckReadyRejectsForbiddenFinalURLMarker runs one orchestration or CLI step.
|
||||
// Signature: TestServiceCheckReadyRejectsForbiddenFinalURLMarker(t *testing.T).
|
||||
// Why: user-session checks should fail when final URL indicates auth/login loop
|
||||
// instead of the expected post-login app route.
|
||||
func TestServiceCheckReadyRejectsForbiddenFinalURLMarker(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/" {
|
||||
http.Redirect(w, r, "/oauth2/sign_in", http.StatusFound)
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/oauth2/sign_in" {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("sign in"))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
orch := &Orchestrator{
|
||||
log: log.New(os.Stdout, "", 0),
|
||||
}
|
||||
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
|
||||
Name: "logging-ui-user-session",
|
||||
URL: srv.URL,
|
||||
AcceptedStatuses: []int{200},
|
||||
FollowRedirects: true,
|
||||
FinalURLNotContains: "/oauth2/sign_in",
|
||||
TimeoutSeconds: 5,
|
||||
})
|
||||
if ok {
|
||||
t.Fatalf("expected forbidden final-url marker check to fail")
|
||||
}
|
||||
if !strings.Contains(detail, "final url contained forbidden marker") {
|
||||
t.Fatalf("expected final-url forbidden marker detail, got %q", detail)
|
||||
}
|
||||
}
|
||||
|
||||
// TestChecklistFailureHostFromIngressDetail runs one orchestration or CLI step.
|
||||
// Signature: TestChecklistFailureHostFromIngressDetail(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
@ -385,59 +459,3 @@ func TestChecklistFailureHostUnknown(t *testing.T) {
|
||||
t.Fatalf("expected empty host for unknown check, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStuckVaultInitReasonDetectsHungInit runs one orchestration or CLI step.
|
||||
// Signature: TestStuckVaultInitReasonDetectsHungInit(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestStuckVaultInitReasonDetectsHungInit(t *testing.T) {
|
||||
var pod podResource
|
||||
pod.Status.Phase = "Pending"
|
||||
pod.Metadata.Annotations = map[string]string{
|
||||
"vault.hashicorp.com/agent-inject": "true",
|
||||
}
|
||||
pod.Status.InitContainerStatuses = []podContainerStatus{
|
||||
{
|
||||
Name: "vault-agent-init",
|
||||
State: podContainerState{
|
||||
Running: &podContainerRunningState{
|
||||
StartedAt: time.Now().Add(-10 * time.Minute),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
reason := stuckVaultInitReason(pod, 3*time.Minute)
|
||||
if reason != "VaultInitStuck" {
|
||||
t.Fatalf("expected VaultInitStuck reason, got %q", reason)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods runs one orchestration or CLI step.
|
||||
// Signature: TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T) {
|
||||
var pod podResource
|
||||
pod.Status.Phase = "Pending"
|
||||
pod.Metadata.Annotations = map[string]string{
|
||||
"vault.hashicorp.com/agent-inject": "true",
|
||||
}
|
||||
pod.Status.InitContainerStatuses = []podContainerStatus{
|
||||
{
|
||||
Name: "vault-agent-init",
|
||||
State: podContainerState{
|
||||
Running: &podContainerRunningState{
|
||||
StartedAt: time.Now().Add(-30 * time.Second),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
||||
t.Fatalf("expected no reason for fresh init, got %q", reason)
|
||||
}
|
||||
|
||||
pod.Metadata.Annotations["vault.hashicorp.com/agent-inject"] = "false"
|
||||
pod.Status.InitContainerStatuses[0].State.Running.StartedAt = time.Now().Add(-10 * time.Minute)
|
||||
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
||||
t.Fatalf("expected no reason for non-vault pod, got %q", reason)
|
||||
}
|
||||
}
|
||||
|
||||
62
internal/cluster/orchestrator_vault_test.go
Normal file
62
internal/cluster/orchestrator_vault_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestStuckVaultInitReasonDetectsHungInit runs one orchestration or CLI step.
|
||||
// Signature: TestStuckVaultInitReasonDetectsHungInit(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestStuckVaultInitReasonDetectsHungInit(t *testing.T) {
|
||||
var pod podResource
|
||||
pod.Status.Phase = "Pending"
|
||||
pod.Metadata.Annotations = map[string]string{
|
||||
"vault.hashicorp.com/agent-inject": "true",
|
||||
}
|
||||
pod.Status.InitContainerStatuses = []podContainerStatus{
|
||||
{
|
||||
Name: "vault-agent-init",
|
||||
State: podContainerState{
|
||||
Running: &podContainerRunningState{
|
||||
StartedAt: time.Now().Add(-10 * time.Minute),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
reason := stuckVaultInitReason(pod, 3*time.Minute)
|
||||
if reason != "VaultInitStuck" {
|
||||
t.Fatalf("expected VaultInitStuck reason, got %q", reason)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods runs one orchestration or CLI step.
|
||||
// Signature: TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestStuckVaultInitReasonIgnoresFreshOrNonVaultPods(t *testing.T) {
|
||||
var pod podResource
|
||||
pod.Status.Phase = "Pending"
|
||||
pod.Metadata.Annotations = map[string]string{
|
||||
"vault.hashicorp.com/agent-inject": "true",
|
||||
}
|
||||
pod.Status.InitContainerStatuses = []podContainerStatus{
|
||||
{
|
||||
Name: "vault-agent-init",
|
||||
State: podContainerState{
|
||||
Running: &podContainerRunningState{
|
||||
StartedAt: time.Now().Add(-30 * time.Second),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
||||
t.Fatalf("expected no reason for fresh init, got %q", reason)
|
||||
}
|
||||
|
||||
pod.Metadata.Annotations["vault.hashicorp.com/agent-inject"] = "false"
|
||||
pod.Status.InitContainerStatuses[0].State.Running.StartedAt = time.Now().Add(-10 * time.Minute)
|
||||
if reason := stuckVaultInitReason(pod, 3*time.Minute); reason != "" {
|
||||
t.Fatalf("expected no reason for non-vault pod, got %q", reason)
|
||||
}
|
||||
}
|
||||
79
internal/cluster/testing_hooks_auth.go
Normal file
79
internal/cluster/testing_hooks_auth.go
Normal file
@ -0,0 +1,79 @@
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||
)
|
||||
|
||||
// TestHookChecklistAuthHTTPClient runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookChecklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error).
|
||||
// Why: exposes checklist auth client/session bootstrap internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookChecklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) {
|
||||
return o.checklistAuthHTTPClient(ctx, timeout, insecureSkipTLS)
|
||||
}
|
||||
|
||||
// TestHookAuthenticateRobotChecklistSession runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookAuthenticateRobotChecklistSession(ctx context.Context, client *http.Client) error.
|
||||
// Why: exposes robotuser auth session internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookAuthenticateRobotChecklistSession(ctx context.Context, client *http.Client) error {
|
||||
return o.authenticateRobotChecklistSession(ctx, client)
|
||||
}
|
||||
|
||||
// TestHookKubernetesSecretValue runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookKubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error).
|
||||
// Why: exposes Kubernetes secret decode internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookKubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) {
|
||||
return o.kubernetesSecretValue(ctx, namespace, name, key)
|
||||
}
|
||||
|
||||
// TestHookKeycloakAdminCredentials runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookKeycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error).
|
||||
// Why: exposes secret-backed credential resolution internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookKeycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) {
|
||||
return o.keycloakAdminCredentials(ctx, auth)
|
||||
}
|
||||
|
||||
// TestHookKeycloakAdminToken runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookKeycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error).
|
||||
// Why: exposes Keycloak admin token acquisition internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookKeycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) {
|
||||
return o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword)
|
||||
}
|
||||
|
||||
// TestHookKeycloakRobotUserID runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookKeycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error).
|
||||
// Why: exposes Keycloak robot-user lookup internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookKeycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) {
|
||||
return o.keycloakRobotUserID(ctx, client, auth, adminToken)
|
||||
}
|
||||
|
||||
// TestHookKeycloakImpersonationRedirect runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookKeycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error).
|
||||
// Why: exposes Keycloak impersonation internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookKeycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) {
|
||||
return o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID)
|
||||
}
|
||||
|
||||
// TestHookHTTPChecklistProbeWithLocation runs one orchestration or CLI step.
|
||||
// Signature: (o *Orchestrator) TestHookHTTPChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error).
|
||||
// Why: exposes redirect-aware checklist probe internals to top-level tests.
|
||||
func (o *Orchestrator) TestHookHTTPChecklistProbeWithLocation(ctx context.Context, check config.ServiceChecklistCheck) (int, string, string, string, error) {
|
||||
return o.httpChecklistProbeWithLocation(ctx, check)
|
||||
}
|
||||
|
||||
// TestHookKeycloakBaseURL runs one orchestration or CLI step.
|
||||
// Signature: TestHookKeycloakBaseURL(auth config.ServiceChecklistAuthSettings) string.
|
||||
// Why: exposes base URL normalizer helper to top-level tests.
|
||||
func TestHookKeycloakBaseURL(auth config.ServiceChecklistAuthSettings) string {
|
||||
return keycloakBaseURL(auth)
|
||||
}
|
||||
|
||||
// TestHookCompactHTTPBody runs one orchestration or CLI step.
|
||||
// Signature: TestHookCompactHTTPBody(raw []byte) string.
|
||||
// Why: exposes compact HTTP body helper to top-level tests.
|
||||
func TestHookCompactHTTPBody(raw []byte) string {
|
||||
return compactHTTPBody(raw)
|
||||
}
|
||||
@ -97,6 +97,30 @@ func (c *Config) applyDefaults() {
|
||||
if c.Startup.ServiceChecklistStabilitySec < 0 {
|
||||
c.Startup.ServiceChecklistStabilitySec = 0
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Mode) == "" {
|
||||
c.Startup.ServiceChecklistAuth.Mode = "keycloak_robotuser"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.KeycloakBaseURL) == "" {
|
||||
c.Startup.ServiceChecklistAuth.KeycloakBaseURL = "https://sso.bstein.dev"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Realm) == "" {
|
||||
c.Startup.ServiceChecklistAuth.Realm = "atlas"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.RobotUsername) == "" {
|
||||
c.Startup.ServiceChecklistAuth.RobotUsername = "robotuser"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretNamespace) == "" {
|
||||
c.Startup.ServiceChecklistAuth.AdminSecretNamespace = "sso"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretName) == "" {
|
||||
c.Startup.ServiceChecklistAuth.AdminSecretName = "keycloak-admin"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey) == "" {
|
||||
c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey = "username"
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey) == "" {
|
||||
c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey = "password"
|
||||
}
|
||||
c.Startup.ServiceChecklist = mergeServiceChecklistDefaults(c.Startup.ServiceChecklist, defaultServiceChecklist())
|
||||
for i := range c.Startup.ServiceChecklist {
|
||||
if c.Startup.ServiceChecklist[i].TimeoutSeconds <= 0 {
|
||||
|
||||
@ -207,6 +207,58 @@ func TestValidateRejectsBadServiceChecklistURL(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateRejectsUnknownServiceChecklistAuthMode runs one orchestration or CLI step.
|
||||
// Signature: TestValidateRejectsUnknownServiceChecklistAuthMode(t *testing.T).
|
||||
// Why: authenticated user-journey checklist gates should fail fast when auth
|
||||
// mode is invalid to avoid silent false-positive startup passes.
|
||||
func TestValidateRejectsUnknownServiceChecklistAuthMode(t *testing.T) {
|
||||
cfg := defaults()
|
||||
cfg.Startup.ServiceChecklistAuth.Mode = "bad-mode"
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected validation error for invalid service checklist auth mode")
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateRejectsFinalURLMarkersWithoutRedirectFollow runs one orchestration or CLI step.
|
||||
// Signature: TestValidateRejectsFinalURLMarkersWithoutRedirectFollow(t *testing.T).
|
||||
// Why: final-url assertions only make sense when redirect following is enabled.
|
||||
func TestValidateRejectsFinalURLMarkersWithoutRedirectFollow(t *testing.T) {
|
||||
cfg := defaults()
|
||||
cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
|
||||
{
|
||||
Name: "bad-final-url",
|
||||
URL: "https://logs.bstein.dev/",
|
||||
AcceptedStatuses: []int{200},
|
||||
FinalURLContains: "/app/home",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected validation error for final_url_* markers without redirect follow")
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled runs one orchestration or CLI step.
|
||||
// Signature: TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled(t *testing.T).
|
||||
// Why: robot-auth checks must be blocked when checklist auth mode is disabled.
|
||||
func TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled(t *testing.T) {
|
||||
cfg := defaults()
|
||||
cfg.Startup.ServiceChecklistAuth.Mode = "none"
|
||||
cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
|
||||
{
|
||||
Name: "logs-ui",
|
||||
URL: "https://logs.bstein.dev/",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected validation error for robot-auth checklist check when auth mode is none")
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateRejectsBadIgnoreFluxKustomizationFormat runs one orchestration or CLI step.
|
||||
// Signature: TestValidateRejectsBadIgnoreFluxKustomizationFormat(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
@ -291,8 +343,8 @@ func TestApplyDefaultsMergesServiceChecklistDefaults(t *testing.T) {
|
||||
if _, ok := names["custom-smoke"]; !ok {
|
||||
t.Fatalf("expected custom checklist entry to be preserved")
|
||||
}
|
||||
if _, ok := names["logging-oidc-redirect"]; !ok {
|
||||
t.Fatalf("expected default logging redirect check to be merged in")
|
||||
if _, ok := names["logging-ui-user-session"]; !ok {
|
||||
t.Fatalf("expected default logging user-session check to be merged in")
|
||||
}
|
||||
if _, ok := names["vaultwarden-ui"]; !ok {
|
||||
t.Fatalf("expected default vaultwarden check to be merged in")
|
||||
|
||||
@ -81,6 +81,16 @@ func defaults() Config {
|
||||
ServiceChecklistWaitSeconds: 420,
|
||||
ServiceChecklistPollSeconds: 5,
|
||||
ServiceChecklistStabilitySec: 120,
|
||||
ServiceChecklistAuth: ServiceChecklistAuthSettings{
|
||||
Mode: "keycloak_robotuser",
|
||||
KeycloakBaseURL: "https://sso.bstein.dev",
|
||||
Realm: "atlas",
|
||||
RobotUsername: "robotuser",
|
||||
AdminSecretNamespace: "sso",
|
||||
AdminSecretName: "keycloak-admin",
|
||||
AdminSecretUsernameKey: "username",
|
||||
AdminSecretPasswordKey: "password",
|
||||
},
|
||||
ServiceChecklist: defaultServiceChecklist(),
|
||||
RequireCriticalServiceEndpoints: true,
|
||||
CriticalServiceEndpointWaitSec: 420,
|
||||
|
||||
@ -44,10 +44,12 @@ func defaultServiceChecklist() []ServiceChecklistCheck {
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
Name: "auth-gateway-redirect",
|
||||
Name: "auth-gateway-user-session",
|
||||
URL: "https://auth.bstein.dev/",
|
||||
AcceptedStatuses: []int{302},
|
||||
LocationContains: "https://sso.bstein.dev/realms/atlas/",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
BodyContains: "Authenticated",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
@ -121,17 +123,32 @@ func defaultServiceChecklist() []ServiceChecklistCheck {
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
Name: "logging-oidc-redirect",
|
||||
Name: "logging-ui-user-session",
|
||||
URL: "https://logs.bstein.dev/",
|
||||
AcceptedStatuses: []int{302},
|
||||
LocationContains: "client_id=logs",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
FinalURLNotContains: "/protocol/openid-connect/auth",
|
||||
BodyContains: "OpenSearch Dashboards",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
Name: "longhorn-oidc-redirect",
|
||||
URL: "https://longhorn.bstein.dev/",
|
||||
AcceptedStatuses: []int{302},
|
||||
LocationContains: "https://sso.bstein.dev/realms/atlas/",
|
||||
Name: "logging-api-user-session",
|
||||
URL: "https://logs.bstein.dev/api/status",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
BodyContains: "\"state\":\"green\"",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
Name: "longhorn-api-user-session",
|
||||
URL: "https://longhorn.bstein.dev/v1",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
FinalURLNotContains: "/protocol/openid-connect/auth",
|
||||
BodyContains: "\"id\":\"v1\"",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
@ -190,17 +207,24 @@ func defaultServiceChecklist() []ServiceChecklistCheck {
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
Name: "sentinel-oidc-redirect",
|
||||
URL: "https://sentinel.bstein.dev/",
|
||||
AcceptedStatuses: []int{302},
|
||||
LocationContains: "client_id=metis",
|
||||
Name: "sentinel-user-session",
|
||||
URL: "https://sentinel.bstein.dev/healthz",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
FinalURLNotContains: "/protocol/openid-connect/auth",
|
||||
BodyContains: "ok",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
Name: "keycloak-admin-redirect",
|
||||
URL: "https://sso.bstein.dev/",
|
||||
AcceptedStatuses: []int{302},
|
||||
LocationContains: "https://sso.bstein.dev/admin/",
|
||||
Name: "keycloak-admin-user-session",
|
||||
URL: "https://sso.bstein.dev/admin/",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
FinalURLContains: "/admin/master/console/",
|
||||
FinalURLNotContains: "/login-actions/authenticate",
|
||||
BodyContains: "Keycloak Administration Console",
|
||||
TimeoutSeconds: 12,
|
||||
},
|
||||
{
|
||||
@ -253,23 +277,23 @@ func mergeServiceChecklistDefaults(existing, defaults []ServiceChecklistCheck) [
|
||||
return out
|
||||
}
|
||||
|
||||
byName := map[string]struct{}{}
|
||||
for _, check := range existing {
|
||||
name := strings.TrimSpace(check.Name)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
byName[name] = struct{}{}
|
||||
}
|
||||
|
||||
out := make([]ServiceChecklistCheck, 0, len(existing)+len(defaults))
|
||||
out = append(out, existing...)
|
||||
defaultByName := map[string]struct{}{}
|
||||
for _, check := range defaults {
|
||||
name := strings.TrimSpace(check.Name)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
if _, exists := byName[name]; exists {
|
||||
defaultByName[name] = struct{}{}
|
||||
}
|
||||
|
||||
out := make([]ServiceChecklistCheck, 0, len(defaults)+len(existing))
|
||||
out = append(out, defaults...)
|
||||
for _, check := range existing {
|
||||
name := strings.TrimSpace(check.Name)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
if _, exists := defaultByName[name]; exists {
|
||||
continue
|
||||
}
|
||||
out = append(out, check)
|
||||
|
||||
33
internal/config/testing_hooks.go
Normal file
33
internal/config/testing_hooks.go
Normal file
@ -0,0 +1,33 @@
|
||||
package config
|
||||
|
||||
// TestHookDefaultServiceChecklist runs one orchestration or CLI step.
|
||||
// Signature: TestHookDefaultServiceChecklist() []ServiceChecklistCheck.
|
||||
// Why: exposes default service checklist catalog to top-level tests.
|
||||
func TestHookDefaultServiceChecklist() []ServiceChecklistCheck {
|
||||
out := make([]ServiceChecklistCheck, 0, len(defaultServiceChecklist()))
|
||||
out = append(out, defaultServiceChecklist()...)
|
||||
return out
|
||||
}
|
||||
|
||||
// TestHookDefaultCriticalServiceEndpoints runs one orchestration or CLI step.
|
||||
// Signature: TestHookDefaultCriticalServiceEndpoints() []string.
|
||||
// Why: exposes default critical endpoint catalog to top-level tests.
|
||||
func TestHookDefaultCriticalServiceEndpoints() []string {
|
||||
out := make([]string, 0, len(defaultCriticalServiceEndpoints()))
|
||||
out = append(out, defaultCriticalServiceEndpoints()...)
|
||||
return out
|
||||
}
|
||||
|
||||
// TestHookMergeServiceChecklistDefaults runs one orchestration or CLI step.
|
||||
// Signature: TestHookMergeServiceChecklistDefaults(existing, defaults []ServiceChecklistCheck) []ServiceChecklistCheck.
|
||||
// Why: exposes checklist merge helper to top-level tests.
|
||||
func TestHookMergeServiceChecklistDefaults(existing, defaults []ServiceChecklistCheck) []ServiceChecklistCheck {
|
||||
return mergeServiceChecklistDefaults(existing, defaults)
|
||||
}
|
||||
|
||||
// TestHookMergeStringDefaults runs one orchestration or CLI step.
|
||||
// Signature: TestHookMergeStringDefaults(existing, defaults []string) []string.
|
||||
// Why: exposes string merge helper to top-level tests.
|
||||
func TestHookMergeStringDefaults(existing, defaults []string) []string {
|
||||
return mergeStringDefaults(existing, defaults)
|
||||
}
|
||||
@ -56,6 +56,7 @@ type Startup struct {
|
||||
ServiceChecklistWaitSeconds int `yaml:"service_checklist_wait_seconds"`
|
||||
ServiceChecklistPollSeconds int `yaml:"service_checklist_poll_seconds"`
|
||||
ServiceChecklistStabilitySec int `yaml:"service_checklist_stability_seconds"`
|
||||
ServiceChecklistAuth ServiceChecklistAuthSettings `yaml:"service_checklist_auth"`
|
||||
ServiceChecklist []ServiceChecklistCheck `yaml:"service_checklist"`
|
||||
RequireCriticalServiceEndpoints bool `yaml:"require_critical_service_endpoints"`
|
||||
CriticalServiceEndpointWaitSec int `yaml:"critical_service_endpoint_wait_seconds"`
|
||||
@ -91,14 +92,29 @@ type ServiceChecklistCheck struct {
|
||||
Name string `yaml:"name"`
|
||||
URL string `yaml:"url"`
|
||||
AcceptedStatuses []int `yaml:"accepted_statuses"`
|
||||
RequireRobotAuth bool `yaml:"require_robot_auth"`
|
||||
FollowRedirects bool `yaml:"follow_redirects"`
|
||||
LocationContains string `yaml:"location_contains"`
|
||||
LocationNotContains string `yaml:"location_not_contains"`
|
||||
FinalURLContains string `yaml:"final_url_contains"`
|
||||
FinalURLNotContains string `yaml:"final_url_not_contains"`
|
||||
BodyContains string `yaml:"body_contains"`
|
||||
BodyNotContains string `yaml:"body_not_contains"`
|
||||
TimeoutSeconds int `yaml:"timeout_seconds"`
|
||||
InsecureSkipTLS bool `yaml:"insecure_skip_tls"`
|
||||
}
|
||||
|
||||
type ServiceChecklistAuthSettings struct {
|
||||
Mode string `yaml:"mode"`
|
||||
KeycloakBaseURL string `yaml:"keycloak_base_url"`
|
||||
Realm string `yaml:"realm"`
|
||||
RobotUsername string `yaml:"robot_username"`
|
||||
AdminSecretNamespace string `yaml:"admin_secret_namespace"`
|
||||
AdminSecretName string `yaml:"admin_secret_name"`
|
||||
AdminSecretUsernameKey string `yaml:"admin_secret_username_key"`
|
||||
AdminSecretPasswordKey string `yaml:"admin_secret_password_key"`
|
||||
}
|
||||
|
||||
type Shutdown struct {
|
||||
DefaultBudgetSeconds int `yaml:"default_budget_seconds"`
|
||||
HistoryMinSamples int `yaml:"history_min_samples"`
|
||||
|
||||
@ -136,6 +136,35 @@ func (c Config) Validate() error {
|
||||
if c.Startup.RequireServiceChecklist && len(c.Startup.ServiceChecklist) == 0 {
|
||||
return fmt.Errorf("config.startup.service_checklist must not be empty when require_service_checklist is true")
|
||||
}
|
||||
authMode := strings.TrimSpace(c.Startup.ServiceChecklistAuth.Mode)
|
||||
if authMode != "none" && authMode != "keycloak_robotuser" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.mode must be none or keycloak_robotuser")
|
||||
}
|
||||
if authMode == "keycloak_robotuser" {
|
||||
baseURL := strings.TrimSpace(c.Startup.ServiceChecklistAuth.KeycloakBaseURL)
|
||||
parsed, err := neturl.Parse(baseURL)
|
||||
if err != nil || parsed.Scheme == "" || parsed.Host == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.keycloak_base_url is invalid: %q", baseURL)
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Realm) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.realm must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.RobotUsername) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.robot_username must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretNamespace) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_namespace must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretName) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_name must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_username_key must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_password_key must not be empty")
|
||||
}
|
||||
}
|
||||
for i, check := range c.Startup.ServiceChecklist {
|
||||
if strings.TrimSpace(check.Name) == "" {
|
||||
return fmt.Errorf("config.startup.service_checklist[%d].name must not be empty", i)
|
||||
@ -151,6 +180,13 @@ func (c Config) Validate() error {
|
||||
if check.TimeoutSeconds <= 0 {
|
||||
return fmt.Errorf("config.startup.service_checklist[%d].timeout_seconds must be > 0", i)
|
||||
}
|
||||
if check.RequireRobotAuth && authMode == "none" {
|
||||
return fmt.Errorf("config.startup.service_checklist[%d] requires robot auth but service_checklist_auth.mode is none", i)
|
||||
}
|
||||
if (strings.TrimSpace(check.FinalURLContains) != "" || strings.TrimSpace(check.FinalURLNotContains) != "") &&
|
||||
!(check.FollowRedirects || check.RequireRobotAuth) {
|
||||
return fmt.Errorf("config.startup.service_checklist[%d] uses final_url_* markers without redirects enabled", i)
|
||||
}
|
||||
for _, code := range check.AcceptedStatuses {
|
||||
if code < 100 || code > 599 {
|
||||
return fmt.Errorf("config.startup.service_checklist[%d].accepted_statuses contains invalid HTTP code %d", i, code)
|
||||
|
||||
@ -15,6 +15,9 @@ type Runner struct {
|
||||
Logger *log.Logger
|
||||
}
|
||||
|
||||
// Run runs one orchestration or CLI step.
|
||||
// Signature: (r *Runner) Run(ctx context.Context, name string, args ...string) (string, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (r *Runner) Run(ctx context.Context, name string, args ...string) (string, error) {
|
||||
if r.DryRun {
|
||||
r.logf("DRY-RUN: %s %s", name, strings.Join(args, " "))
|
||||
@ -37,11 +40,17 @@ func (r *Runner) Run(ctx context.Context, name string, args ...string) (string,
|
||||
return trimmed, nil
|
||||
}
|
||||
|
||||
// CommandExists runs one orchestration or CLI step.
|
||||
// Signature: (r *Runner) CommandExists(name string) bool.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (r *Runner) CommandExists(name string) bool {
|
||||
_, err := exec.LookPath(name)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// logf runs one orchestration or CLI step.
|
||||
// Signature: (r *Runner) logf(format string, args ...any).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (r *Runner) logf(format string, args ...any) {
|
||||
if r.Logger != nil {
|
||||
r.Logger.Printf(format, args...)
|
||||
|
||||
53
internal/execx/runner_additional_test.go
Normal file
53
internal/execx/runner_additional_test.go
Normal file
@ -0,0 +1,53 @@
|
||||
package execx
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestRunnerRunFailureWithoutOutput runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerRunFailureWithoutOutput(t *testing.T).
|
||||
// Why: covers error branch where command fails without producing output.
|
||||
func TestRunnerRunFailureWithoutOutput(t *testing.T) {
|
||||
r := &Runner{}
|
||||
out, err := r.Run(context.Background(), "sh", "-c", "exit 3")
|
||||
if err == nil {
|
||||
t.Fatalf("expected failure")
|
||||
}
|
||||
if out != "" {
|
||||
t.Fatalf("expected empty output, got %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunnerLogfNoLogger runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerLogfNoLogger(t *testing.T).
|
||||
// Why: covers no-op logging path.
|
||||
func TestRunnerLogfNoLogger(t *testing.T) {
|
||||
r := &Runner{}
|
||||
r.logf("hello %s", "world")
|
||||
}
|
||||
|
||||
// TestRunnerCommandMissing runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerCommandMissing(t *testing.T).
|
||||
// Why: covers false branch of command existence checks.
|
||||
func TestRunnerCommandMissing(t *testing.T) {
|
||||
r := &Runner{}
|
||||
if r.CommandExists("definitely-not-a-real-command-ananke") {
|
||||
t.Fatalf("expected missing command to be false")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunnerInjectsKubeconfigEnv runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerInjectsKubeconfigEnv(t *testing.T).
|
||||
// Why: covers kubeconfig environment injection branch in command runner.
|
||||
func TestRunnerInjectsKubeconfigEnv(t *testing.T) {
|
||||
r := &Runner{Kubeconfig: "/tmp/test-kubeconfig"}
|
||||
out, err := r.Run(context.Background(), "sh", "-c", "printf %s \"$KUBECONFIG\"")
|
||||
if err != nil {
|
||||
t.Fatalf("runner command failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(out) != "/tmp/test-kubeconfig" {
|
||||
t.Fatalf("expected kubeconfig env to propagate, got %q", out)
|
||||
}
|
||||
}
|
||||
68
internal/execx/runner_test.go
Normal file
68
internal/execx/runner_test.go
Normal file
@ -0,0 +1,68 @@
|
||||
package execx
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"log"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestRunnerDryRun runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerDryRun(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestRunnerDryRun(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
r := &Runner{
|
||||
DryRun: true,
|
||||
Logger: log.New(&buf, "", 0),
|
||||
}
|
||||
out, err := r.Run(context.Background(), "echo", "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("dry-run should not fail: %v", err)
|
||||
}
|
||||
if out != "" {
|
||||
t.Fatalf("expected empty dry-run output, got %q", out)
|
||||
}
|
||||
if !strings.Contains(buf.String(), "DRY-RUN: echo hello") {
|
||||
t.Fatalf("expected dry-run log entry, got %q", buf.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunnerRunSuccess runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerRunSuccess(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestRunnerRunSuccess(t *testing.T) {
|
||||
r := &Runner{}
|
||||
out, err := r.Run(context.Background(), "sh", "-c", "printf ok")
|
||||
if err != nil {
|
||||
t.Fatalf("expected command success: %v", err)
|
||||
}
|
||||
if out != "ok" {
|
||||
t.Fatalf("expected output ok, got %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunnerRunFailureIncludesOutput runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerRunFailureIncludesOutput(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestRunnerRunFailureIncludesOutput(t *testing.T) {
|
||||
r := &Runner{}
|
||||
out, err := r.Run(context.Background(), "sh", "-c", "echo boom >&2; exit 1")
|
||||
if err == nil {
|
||||
t.Fatalf("expected command failure")
|
||||
}
|
||||
if strings.TrimSpace(out) != "boom" {
|
||||
t.Fatalf("expected stderr to be preserved, got %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunnerCommandExists runs one orchestration or CLI step.
|
||||
// Signature: TestRunnerCommandExists(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestRunnerCommandExists(t *testing.T) {
|
||||
r := &Runner{}
|
||||
if !r.CommandExists("sh") {
|
||||
t.Fatalf("expected shell command to exist")
|
||||
}
|
||||
}
|
||||
@ -3,6 +3,7 @@ package metrics
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -35,18 +36,27 @@ type Exporter struct {
|
||||
samples map[string]Sample
|
||||
}
|
||||
|
||||
// New runs one orchestration or CLI step.
|
||||
// Signature: New() *Exporter.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func New() *Exporter {
|
||||
return &Exporter{
|
||||
samples: make(map[string]Sample),
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateBudget runs one orchestration or CLI step.
|
||||
// Signature: (e *Exporter) UpdateBudget(seconds int).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (e *Exporter) UpdateBudget(seconds int) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
e.shutdownBudgetSec = seconds
|
||||
}
|
||||
|
||||
// UpdateSample runs one orchestration or CLI step.
|
||||
// Signature: (e *Exporter) UpdateSample(s Sample).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (e *Exporter) UpdateSample(s Sample) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
@ -56,6 +66,9 @@ func (e *Exporter) UpdateSample(s Sample) {
|
||||
e.samples[s.Name] = s
|
||||
}
|
||||
|
||||
// MarkShutdown runs one orchestration or CLI step.
|
||||
// Signature: (e *Exporter) MarkShutdown(reason string).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (e *Exporter) MarkShutdown(reason string) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
@ -64,6 +77,9 @@ func (e *Exporter) MarkShutdown(reason string) {
|
||||
e.lastShutdownAt = time.Now().UTC()
|
||||
}
|
||||
|
||||
// Handler runs one orchestration or CLI step.
|
||||
// Signature: (e *Exporter) Handler(path string) http.Handler.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (e *Exporter) Handler(path string) http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
metricsPath := path
|
||||
@ -78,6 +94,9 @@ func (e *Exporter) Handler(path string) http.Handler {
|
||||
return mux
|
||||
}
|
||||
|
||||
// serveMetrics runs one orchestration or CLI step.
|
||||
// Signature: (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request) {
|
||||
e.mu.RLock()
|
||||
defer e.mu.RUnlock()
|
||||
@ -145,10 +164,40 @@ func (e *Exporter) serveMetrics(w http.ResponseWriter, _ *http.Request) {
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("ananke_ups_error%s %d\n", labels, boolNum(s.LastError != "")))
|
||||
}
|
||||
appendQualityGateMetrics(&b)
|
||||
|
||||
_, _ = w.Write([]byte(b.String()))
|
||||
}
|
||||
|
||||
// appendQualityGateMetrics runs one orchestration or CLI step.
|
||||
// Signature: appendQualityGateMetrics(dst *strings.Builder).
|
||||
// Why: quality-gate pass/fail telemetry should appear alongside UPS metrics so
|
||||
// Grafana can track Ananke suite health over time.
|
||||
func appendQualityGateMetrics(dst *strings.Builder) {
|
||||
path := strings.TrimSpace(os.Getenv("ANANKE_QUALITY_METRICS_FILE"))
|
||||
if path == "" {
|
||||
path = "/var/lib/ananke/quality-gate.prom"
|
||||
}
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
text := strings.TrimSpace(string(raw))
|
||||
if text == "" {
|
||||
return
|
||||
}
|
||||
if dst.Len() > 0 {
|
||||
dst.WriteString("\n")
|
||||
}
|
||||
dst.WriteString(text)
|
||||
if !strings.HasSuffix(text, "\n") {
|
||||
dst.WriteString("\n")
|
||||
}
|
||||
}
|
||||
|
||||
// boolNum runs one orchestration or CLI step.
|
||||
// Signature: boolNum(v bool) int.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func boolNum(v bool) int {
|
||||
if v {
|
||||
return 1
|
||||
@ -156,6 +205,9 @@ func boolNum(v bool) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// safe runs one orchestration or CLI step.
|
||||
// Signature: safe(in string) string.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func safe(in string) string {
|
||||
out := strings.ReplaceAll(in, "\\", "\\\\")
|
||||
return strings.ReplaceAll(out, "\"", "\\\"")
|
||||
|
||||
86
internal/metrics/exporter_additional_test.go
Normal file
86
internal/metrics/exporter_additional_test.go
Normal file
@ -0,0 +1,86 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestExporterHealthzAndEscaping runs one orchestration or CLI step.
|
||||
// Signature: TestExporterHealthzAndEscaping(t *testing.T).
|
||||
// Why: covers health endpoint and label escaping branches in metrics renderer.
|
||||
func TestExporterHealthzAndEscaping(t *testing.T) {
|
||||
e := New()
|
||||
e.UpdateSample(Sample{
|
||||
Name: `Sta"tera`,
|
||||
Target: `statera\host`,
|
||||
Status: `O"B`,
|
||||
LastError: "x",
|
||||
})
|
||||
|
||||
h := e.Handler("/custom")
|
||||
healthReq := httptest.NewRequest(http.MethodGet, "/healthz", nil)
|
||||
healthRR := httptest.NewRecorder()
|
||||
h.ServeHTTP(healthRR, healthReq)
|
||||
if healthRR.Code != http.StatusOK || strings.TrimSpace(healthRR.Body.String()) != "ok" {
|
||||
t.Fatalf("unexpected health response: code=%d body=%q", healthRR.Code, healthRR.Body.String())
|
||||
}
|
||||
|
||||
metricsReq := httptest.NewRequest(http.MethodGet, "/custom", nil)
|
||||
metricsRR := httptest.NewRecorder()
|
||||
h.ServeHTTP(metricsRR, metricsReq)
|
||||
body := metricsRR.Body.String()
|
||||
if !strings.Contains(body, `source="Sta\\\"tera"`) {
|
||||
t.Fatalf("expected escaped source label, got:\n%s", body)
|
||||
}
|
||||
if !strings.Contains(body, `target="statera\\\\host"`) {
|
||||
t.Fatalf("expected escaped target label, got:\n%s", body)
|
||||
}
|
||||
if !strings.Contains(body, "ananke_ups_error") {
|
||||
t.Fatalf("expected error metric line in output")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBoolNumAndSafeHelpers runs one orchestration or CLI step.
|
||||
// Signature: TestBoolNumAndSafeHelpers(t *testing.T).
|
||||
// Why: directly covers remaining helper branches.
|
||||
func TestBoolNumAndSafeHelpers(t *testing.T) {
|
||||
if boolNum(true) != 1 || boolNum(false) != 0 {
|
||||
t.Fatalf("unexpected boolNum values")
|
||||
}
|
||||
if got := safe(`a"b\c`); got != `a\"b\\c` {
|
||||
t.Fatalf("unexpected escaped string: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestExporterAppendsQualityGateMetrics runs one orchestration or CLI step.
|
||||
// Signature: TestExporterAppendsQualityGateMetrics(t *testing.T).
|
||||
// Why: verifies quality-gate metrics are surfaced on /metrics for Grafana suite
|
||||
// pass-rate tracking.
|
||||
func TestExporterAppendsQualityGateMetrics(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
metricsPath := filepath.Join(tmp, "quality-gate.prom")
|
||||
content := strings.Join([]string{
|
||||
`# HELP ananke_quality_gate_runs_total Total quality gate runs by status.`,
|
||||
`# TYPE ananke_quality_gate_runs_total counter`,
|
||||
`ananke_quality_gate_runs_total{suite="ananke",status="ok"} 10`,
|
||||
`ananke_quality_gate_runs_total{suite="ananke",status="failed"} 2`,
|
||||
"",
|
||||
}, "\n")
|
||||
if err := os.WriteFile(metricsPath, []byte(content), 0o600); err != nil {
|
||||
t.Fatalf("write quality metrics file: %v", err)
|
||||
}
|
||||
t.Setenv("ANANKE_QUALITY_METRICS_FILE", metricsPath)
|
||||
|
||||
e := New()
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
e.Handler("/metrics").ServeHTTP(rr, req)
|
||||
body := rr.Body.String()
|
||||
if !strings.Contains(body, `ananke_quality_gate_runs_total{suite="ananke",status="ok"} 10`) {
|
||||
t.Fatalf("expected quality gate metrics appended to exporter output, got:\n%s", body)
|
||||
}
|
||||
}
|
||||
@ -7,6 +7,9 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestExporterEmitsCoreMetrics runs one orchestration or CLI step.
|
||||
// Signature: TestExporterEmitsCoreMetrics(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestExporterEmitsCoreMetrics(t *testing.T) {
|
||||
e := New()
|
||||
e.UpdateBudget(321)
|
||||
|
||||
@ -34,6 +34,19 @@ type Daemon struct {
|
||||
exporter *metrics.Exporter
|
||||
}
|
||||
|
||||
var sshConfigCandidates = []string{
|
||||
"/home/atlas/.ssh/config",
|
||||
"/home/tethys/.ssh/config",
|
||||
}
|
||||
|
||||
var sshIdentityCandidates = []string{
|
||||
"/home/atlas/.ssh/id_ed25519",
|
||||
"/home/tethys/.ssh/id_ed25519",
|
||||
}
|
||||
|
||||
// NewDaemon runs one orchestration or CLI step.
|
||||
// Signature: NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target, logger *log.Logger) *Daemon.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target, logger *log.Logger) *Daemon {
|
||||
return &Daemon{
|
||||
cfg: cfg,
|
||||
@ -44,6 +57,9 @@ func NewDaemon(cfg config.Config, orch *cluster.Orchestrator, targets []Target,
|
||||
}
|
||||
}
|
||||
|
||||
// Run runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) Run(ctx context.Context) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) Run(ctx context.Context) error {
|
||||
if !d.cfg.UPS.Enabled {
|
||||
return fmt.Errorf("ups monitoring is disabled in config")
|
||||
@ -152,6 +168,9 @@ func (d *Daemon) Run(ctx context.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
// triggerShutdown runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) triggerShutdown(ctx context.Context, reason string) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) triggerShutdown(ctx context.Context, reason string) error {
|
||||
intent, err := state.ReadIntent(d.cfg.State.IntentPath)
|
||||
if err == nil && intent.State == state.IntentShuttingDown {
|
||||
@ -190,6 +209,9 @@ func (d *Daemon) triggerShutdown(ctx context.Context, reason string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// forwardShutdown runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) forwardShutdown(ctx context.Context, reason string) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
||||
timeout := time.Duration(d.cfg.Coordination.CommandTimeoutSeconds) * time.Second
|
||||
if timeout <= 0 {
|
||||
@ -280,15 +302,14 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolveSSHConfigFile runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) resolveSSHConfigFile() string.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) resolveSSHConfigFile() string {
|
||||
if strings.TrimSpace(d.cfg.SSHConfigFile) != "" {
|
||||
return strings.TrimSpace(d.cfg.SSHConfigFile)
|
||||
}
|
||||
candidates := []string{
|
||||
"/home/atlas/.ssh/config",
|
||||
"/home/tethys/.ssh/config",
|
||||
}
|
||||
for _, p := range candidates {
|
||||
for _, p := range sshConfigCandidates {
|
||||
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
|
||||
return p
|
||||
}
|
||||
@ -296,15 +317,14 @@ func (d *Daemon) resolveSSHConfigFile() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveSSHIdentityFile runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) resolveSSHIdentityFile() string.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) resolveSSHIdentityFile() string {
|
||||
if strings.TrimSpace(d.cfg.SSHIdentityFile) != "" {
|
||||
return strings.TrimSpace(d.cfg.SSHIdentityFile)
|
||||
}
|
||||
candidates := []string{
|
||||
"/home/atlas/.ssh/id_ed25519",
|
||||
"/home/tethys/.ssh/id_ed25519",
|
||||
}
|
||||
for _, p := range candidates {
|
||||
for _, p := range sshIdentityCandidates {
|
||||
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
|
||||
return p
|
||||
}
|
||||
@ -312,6 +332,9 @@ func (d *Daemon) resolveSSHIdentityFile() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// targetList runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) targetList() string.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) targetList() string {
|
||||
names := make([]string, 0, len(d.targets))
|
||||
for _, t := range d.targets {
|
||||
@ -320,6 +343,9 @@ func (d *Daemon) targetList() string {
|
||||
return strings.Join(names, ",")
|
||||
}
|
||||
|
||||
// startMetricsServer runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) startMetricsServer() error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (d *Daemon) startMetricsServer() error {
|
||||
if d.cfg.Metrics.BindAddr == "" {
|
||||
return fmt.Errorf("metrics.bind_addr must not be empty when metrics are enabled")
|
||||
|
||||
255
internal/service/daemon_additional_test.go
Normal file
255
internal/service/daemon_additional_test.go
Normal file
@ -0,0 +1,255 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
||||
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||
"scm.bstein.dev/bstein/ananke/internal/execx"
|
||||
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||
"scm.bstein.dev/bstein/ananke/internal/state"
|
||||
"scm.bstein.dev/bstein/ananke/internal/ups"
|
||||
)
|
||||
|
||||
type daemonFakeProvider struct {
|
||||
samples []ups.Sample
|
||||
errs []error
|
||||
idx int
|
||||
}
|
||||
|
||||
// Read runs one orchestration or CLI step.
|
||||
// Signature: (p *daemonFakeProvider) Read(ctx context.Context) (ups.Sample, error).
|
||||
// Why: daemon tests need deterministic telemetry/error sequencing without real UPS I/O.
|
||||
func (p *daemonFakeProvider) Read(_ context.Context) (ups.Sample, error) {
|
||||
if p.idx < len(p.errs) && p.errs[p.idx] != nil {
|
||||
err := p.errs[p.idx]
|
||||
p.idx++
|
||||
return ups.Sample{}, err
|
||||
}
|
||||
if p.idx < len(p.samples) {
|
||||
s := p.samples[p.idx]
|
||||
p.idx++
|
||||
return s, nil
|
||||
}
|
||||
if len(p.samples) > 0 {
|
||||
return p.samples[len(p.samples)-1], nil
|
||||
}
|
||||
return ups.Sample{}, context.DeadlineExceeded
|
||||
}
|
||||
|
||||
// newDaemonTestOrchestrator runs one orchestration or CLI step.
|
||||
// Signature: newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator.
|
||||
// Why: daemon tests share a minimal dry-run orchestrator fixture to avoid duplication.
|
||||
func newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator {
|
||||
t.Helper()
|
||||
cfg := config.Config{
|
||||
ControlPlanes: []string{"titan-0a"},
|
||||
Workers: []string{"titan-22"},
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
SSHManagedNodes: []string{"titan-0a", "titan-22"},
|
||||
SSHNodeHosts: map[string]string{
|
||||
"titan-0a": "192.168.22.11",
|
||||
"titan-22": "192.168.22.22",
|
||||
},
|
||||
State: config.State{
|
||||
Dir: stateDir,
|
||||
ReportsDir: filepath.Join(stateDir, "reports"),
|
||||
RunHistoryPath: filepath.Join(stateDir, "runs.json"),
|
||||
LockPath: filepath.Join(stateDir, "ananke.lock"),
|
||||
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||
},
|
||||
Shutdown: config.Shutdown{
|
||||
EmergencySkipDrain: true,
|
||||
EmergencySkipEtcd: true,
|
||||
},
|
||||
}
|
||||
return cluster.New(
|
||||
cfg,
|
||||
&execx.Runner{DryRun: true, Logger: log.New(io.Discard, "", 0)},
|
||||
state.New(filepath.Join(stateDir, "runs.json")),
|
||||
log.New(io.Discard, "", 0),
|
||||
)
|
||||
}
|
||||
|
||||
// TestDaemonRunTriggersShutdownOnLowBattery runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T).
|
||||
// Why: covers main daemon loop path that triggers shutdown after debounce threshold.
|
||||
func TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T) {
|
||||
stateDir := t.TempDir()
|
||||
orch := newDaemonTestOrchestrator(t, stateDir)
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
UPS: config.UPS{
|
||||
Enabled: true,
|
||||
PollSeconds: 1,
|
||||
DebounceCount: 1,
|
||||
RuntimeSafetyFactor: 1.0,
|
||||
},
|
||||
State: config.State{
|
||||
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||
},
|
||||
Shutdown: config.Shutdown{
|
||||
EmergencySkipDrain: true,
|
||||
EmergencySkipEtcd: true,
|
||||
},
|
||||
},
|
||||
orch: orch,
|
||||
targets: []Target{
|
||||
{
|
||||
Name: "Pyrphoros",
|
||||
Target: "pyrphoros@localhost",
|
||||
Provider: &daemonFakeProvider{
|
||||
samples: []ups.Sample{{OnBattery: true, LowBattery: true, RuntimeSeconds: 30, RawStatus: "OB LB"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if err := d.Run(ctx); err != nil {
|
||||
t.Fatalf("expected daemon to trigger and complete shutdown, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDaemonRunTriggersShutdownOnTelemetryTimeout runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T).
|
||||
// Why: covers telemetry-timeout trigger path while UPS remains on-battery.
|
||||
func TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T) {
|
||||
stateDir := t.TempDir()
|
||||
orch := newDaemonTestOrchestrator(t, stateDir)
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
UPS: config.UPS{
|
||||
Enabled: true,
|
||||
PollSeconds: 1,
|
||||
DebounceCount: 3,
|
||||
RuntimeSafetyFactor: 1.0,
|
||||
TelemetryTimeoutSeconds: 1,
|
||||
},
|
||||
State: config.State{
|
||||
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||
},
|
||||
Shutdown: config.Shutdown{
|
||||
EmergencySkipDrain: true,
|
||||
EmergencySkipEtcd: true,
|
||||
},
|
||||
},
|
||||
orch: orch,
|
||||
targets: []Target{
|
||||
{
|
||||
Name: "Statera",
|
||||
Target: "statera@localhost",
|
||||
Provider: &daemonFakeProvider{
|
||||
samples: []ups.Sample{{OnBattery: true, LowBattery: false, RuntimeSeconds: 9999, RawStatus: "OB"}},
|
||||
errs: []error{nil, context.DeadlineExceeded, context.DeadlineExceeded, context.DeadlineExceeded},
|
||||
},
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
|
||||
defer cancel()
|
||||
if err := d.Run(ctx); err != nil {
|
||||
t.Fatalf("expected telemetry-timeout shutdown path to complete, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestForwardShutdownSucceedsWithSSHShim runs one orchestration or CLI step.
|
||||
// Signature: TestForwardShutdownSucceedsWithSSHShim(t *testing.T).
|
||||
// Why: covers forward-shutdown SSH execution path.
|
||||
func TestForwardShutdownSucceedsWithSSHShim(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
script := `#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
echo forwarded
|
||||
`
|
||||
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
CommandTimeoutSeconds: 5,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
if err := d.forwardShutdown(context.Background(), "test-forward"); err != nil {
|
||||
t.Fatalf("forwardShutdown failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestForwardShutdownFailsWhenSSHFailsAndNoRecovery runs one orchestration or CLI step.
|
||||
// Signature: TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T).
|
||||
// Why: covers forwarded shutdown error propagation branch.
|
||||
func TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
script := `#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
echo "permission denied" >&2
|
||||
exit 255
|
||||
`
|
||||
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
CommandTimeoutSeconds: 5,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
err := d.forwardShutdown(context.Background(), "test-fail")
|
||||
if err == nil {
|
||||
t.Fatalf("expected forwardShutdown error")
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStartMetricsServerSuccess runs one orchestration or CLI step.
|
||||
// Signature: TestStartMetricsServerSuccess(t *testing.T).
|
||||
// Why: covers successful metrics server startup branch.
|
||||
func TestStartMetricsServerSuccess(t *testing.T) {
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
Metrics: config.Metrics{
|
||||
Enabled: true,
|
||||
BindAddr: "127.0.0.1:0",
|
||||
Path: "/metrics",
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
if err := d.startMetricsServer(); err != nil {
|
||||
t.Fatalf("startMetricsServer failed: %v", err)
|
||||
}
|
||||
}
|
||||
421
internal/service/daemon_quality_branches_test.go
Normal file
421
internal/service/daemon_quality_branches_test.go
Normal file
@ -0,0 +1,421 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
||||
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||
"scm.bstein.dev/bstein/ananke/internal/execx"
|
||||
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||
"scm.bstein.dev/bstein/ananke/internal/state"
|
||||
"scm.bstein.dev/bstein/ananke/internal/ups"
|
||||
)
|
||||
|
||||
// TestNewDaemonInitializesExporter runs one orchestration or CLI step.
|
||||
// Signature: TestNewDaemonInitializesExporter(t *testing.T).
|
||||
// Why: covers constructor branch so daemon initialization contracts stay explicit.
|
||||
func TestNewDaemonInitializesExporter(t *testing.T) {
|
||||
d := NewDaemon(config.Config{}, nil, nil, log.New(io.Discard, "", 0))
|
||||
if d == nil || d.exporter == nil {
|
||||
t.Fatalf("expected NewDaemon to initialize exporter")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerShutdownForwardSuccessSetsForwardedIntent runs one orchestration or CLI step.
|
||||
// Signature: TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T).
|
||||
// Why: covers forwarded shutdown happy-path branch and completion intent semantics.
|
||||
func TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho forwarded\n"), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
intentPath := filepath.Join(tmp, "intent.json")
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
State: config.State{
|
||||
IntentPath: intentPath,
|
||||
},
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
CommandTimeoutSeconds: 3,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
if err := d.triggerShutdown(context.Background(), "test-forward-success"); err != nil {
|
||||
t.Fatalf("triggerShutdown forward success failed: %v", err)
|
||||
}
|
||||
in, err := state.ReadIntent(intentPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read forward completion intent: %v", err)
|
||||
}
|
||||
if in.State != state.IntentShutdownComplete || in.Source != "daemon-forwarded" {
|
||||
t.Fatalf("unexpected forward completion intent: %+v", in)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerShutdownForwardFailureWithoutFallback runs one orchestration or CLI step.
|
||||
// Signature: TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T).
|
||||
// Why: covers explicit failure branch when forwarding is required and local fallback is disabled.
|
||||
func TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
State: config.State{
|
||||
IntentPath: filepath.Join(tmp, "intent.json"),
|
||||
},
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
FallbackLocalShutdown: false,
|
||||
CommandTimeoutSeconds: 3,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
err := d.triggerShutdown(context.Background(), "test-forward-fail")
|
||||
if err == nil || !strings.Contains(err.Error(), "forward shutdown failed") {
|
||||
t.Fatalf("expected forward failure without fallback, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerShutdownForwardFailureFallsBackToLocal runs one orchestration or CLI step.
|
||||
// Signature: TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T).
|
||||
// Why: covers fallback branch where local shutdown is used after forwarding fails.
|
||||
func TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
orch := newDaemonTestOrchestrator(t, tmp)
|
||||
intentPath := filepath.Join(tmp, "intent.json")
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
State: config.State{
|
||||
IntentPath: intentPath,
|
||||
},
|
||||
Shutdown: config.Shutdown{
|
||||
EmergencySkipDrain: true,
|
||||
EmergencySkipEtcd: true,
|
||||
},
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
FallbackLocalShutdown: true,
|
||||
CommandTimeoutSeconds: 3,
|
||||
},
|
||||
},
|
||||
orch: orch,
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
if err := d.triggerShutdown(context.Background(), "test-forward-fallback"); err != nil {
|
||||
t.Fatalf("triggerShutdown fallback local failed: %v", err)
|
||||
}
|
||||
in, err := state.ReadIntent(intentPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read local completion intent: %v", err)
|
||||
}
|
||||
if in.State != state.IntentShutdownComplete || in.Source != "daemon-local" {
|
||||
t.Fatalf("unexpected local completion intent: %+v", in)
|
||||
}
|
||||
}
|
||||
|
||||
// TestForwardShutdownBuildsJumpArgs runs one orchestration or CLI step.
|
||||
// Signature: TestForwardShutdownBuildsJumpArgs(t *testing.T).
|
||||
// Why: covers jump-host argument construction branches in forward shutdown transport.
|
||||
func TestForwardShutdownBuildsJumpArgs(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
argsOut := filepath.Join(tmp, "args.txt")
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
script := "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\n' \"$*\" > " + argsOut + "\n"
|
||||
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
SSHConfigFile: "/tmp/custom-config",
|
||||
SSHIdentityFile: "/tmp/custom-key",
|
||||
SSHJumpHost: "titan-jh",
|
||||
SSHJumpUser: "jump",
|
||||
SSHNodeHosts: map[string]string{
|
||||
"titan-db": "10.0.0.5",
|
||||
},
|
||||
SSHNodeUsers: map[string]string{
|
||||
"titan-db": "dbadmin",
|
||||
},
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
CommandTimeoutSeconds: 3,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
if err := d.forwardShutdown(context.Background(), "args-check"); err != nil {
|
||||
t.Fatalf("forwardShutdown with jump args failed: %v", err)
|
||||
}
|
||||
|
||||
raw, err := os.ReadFile(argsOut)
|
||||
if err != nil {
|
||||
t.Fatalf("read ssh args output: %v", err)
|
||||
}
|
||||
out := string(raw)
|
||||
for _, want := range []string{"-F /tmp/custom-config", "-i /tmp/custom-key", "-J jump@titan-jh:2277", "-p 2277", "dbadmin@10.0.0.5"} {
|
||||
if !strings.Contains(out, want) {
|
||||
t.Fatalf("expected ssh args to include %q, got %q", want, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestStartMetricsServerInvalidBindLogsErrorPath runs one orchestration or CLI step.
|
||||
// Signature: TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T).
|
||||
// Why: exercises goroutine listen failure branch so metrics startup diagnostics remain covered.
|
||||
func TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T) {
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
Metrics: config.Metrics{
|
||||
Enabled: true,
|
||||
BindAddr: "127.0.0.1:not-a-port",
|
||||
Path: "/metrics",
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
if err := d.startMetricsServer(); err != nil {
|
||||
t.Fatalf("startMetricsServer should return nil after goroutine spawn, got %v", err)
|
||||
}
|
||||
time.Sleep(25 * time.Millisecond)
|
||||
}
|
||||
|
||||
// TestResolveSSHPathCandidatesFromOverrides runs one orchestration or CLI step.
|
||||
// Signature: TestResolveSSHPathCandidatesFromOverrides(t *testing.T).
|
||||
// Why: covers candidate-path discovery branches without requiring writes under /home.
|
||||
func TestResolveSSHPathCandidatesFromOverrides(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
cfgPath := filepath.Join(tmp, "config")
|
||||
keyPath := filepath.Join(tmp, "id_ed25519")
|
||||
if err := os.WriteFile(cfgPath, []byte("Host *\n"), 0o600); err != nil {
|
||||
t.Fatalf("write fake config candidate: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(keyPath, []byte("fake-key"), 0o600); err != nil {
|
||||
t.Fatalf("write fake key candidate: %v", err)
|
||||
}
|
||||
|
||||
origConfigs := sshConfigCandidates
|
||||
origKeys := sshIdentityCandidates
|
||||
t.Cleanup(func() {
|
||||
sshConfigCandidates = origConfigs
|
||||
sshIdentityCandidates = origKeys
|
||||
})
|
||||
sshConfigCandidates = []string{cfgPath}
|
||||
sshIdentityCandidates = []string{keyPath}
|
||||
|
||||
d := &Daemon{cfg: config.Config{}}
|
||||
if got := d.resolveSSHConfigFile(); got != cfgPath {
|
||||
t.Fatalf("expected config candidate path %q, got %q", cfgPath, got)
|
||||
}
|
||||
if got := d.resolveSSHIdentityFile(); got != keyPath {
|
||||
t.Fatalf("expected key candidate path %q, got %q", keyPath, got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestForwardShutdownKnownHostsRepairRetry runs one orchestration or CLI step.
|
||||
// Signature: TestForwardShutdownKnownHostsRepairRetry(t *testing.T).
|
||||
// Why: covers known-hosts-repair retry branch in forwarded shutdown transport.
|
||||
func TestForwardShutdownKnownHostsRepairRetry(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
attemptMarker := filepath.Join(tmp, "attempt")
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
script := `#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
marker="` + attemptMarker + `"
|
||||
if [[ ! -f "$marker" ]]; then
|
||||
echo "REMOTE HOST IDENTIFICATION HAS CHANGED!" >&2
|
||||
touch "$marker"
|
||||
exit 255
|
||||
fi
|
||||
echo "forwarded"
|
||||
`
|
||||
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
sshKeygenPath := filepath.Join(tmp, "ssh-keygen")
|
||||
if err := os.WriteFile(sshKeygenPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 0\n"), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh-keygen: %v", err)
|
||||
}
|
||||
sshKeyscanPath := filepath.Join(tmp, "ssh-keyscan")
|
||||
if err := os.WriteFile(sshKeyscanPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho fake-key\n"), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh-keyscan: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
knownHosts := filepath.Join(tmp, "known_hosts")
|
||||
if err := os.WriteFile(knownHosts, []byte{}, 0o600); err != nil {
|
||||
t.Fatalf("write known_hosts file: %v", err)
|
||||
}
|
||||
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHConfigFile: knownHosts, // used only to derive known-hosts search path
|
||||
SSHUser: "atlas",
|
||||
SSHPort: 2277,
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
CommandTimeoutSeconds: 3,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
if err := d.forwardShutdown(context.Background(), "repair-retry"); err != nil {
|
||||
t.Fatalf("forwardShutdown known-hosts repair retry failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerShutdownReturnsLocalShutdownError runs one orchestration or CLI step.
|
||||
// Signature: TestTriggerShutdownReturnsLocalShutdownError(t *testing.T).
|
||||
// Why: covers local shutdown error propagation branch from triggerShutdown.
|
||||
func TestTriggerShutdownReturnsLocalShutdownError(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
intentPath := filepath.Join(tmp, "intent-dir")
|
||||
if err := os.MkdirAll(intentPath, 0o755); err != nil {
|
||||
t.Fatalf("mkdir intent dir: %v", err)
|
||||
}
|
||||
orchCfg := config.Config{
|
||||
ControlPlanes: []string{"titan-db"},
|
||||
Workers: []string{"titan-23"},
|
||||
State: config.State{
|
||||
Dir: filepath.Join(tmp, "state"),
|
||||
ReportsDir: filepath.Join(tmp, "reports"),
|
||||
RunHistoryPath: filepath.Join(tmp, "runs.json"),
|
||||
LockPath: filepath.Join(tmp, "ananke.lock"),
|
||||
IntentPath: intentPath, // directory path forces MustWriteIntent failure in Shutdown
|
||||
},
|
||||
}
|
||||
orch := cluster.New(
|
||||
orchCfg,
|
||||
&execx.Runner{DryRun: false, Logger: log.New(io.Discard, "", 0)},
|
||||
state.New(filepath.Join(tmp, "runs.json")),
|
||||
log.New(io.Discard, "", 0),
|
||||
)
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
State: config.State{
|
||||
IntentPath: intentPath,
|
||||
},
|
||||
Shutdown: config.Shutdown{
|
||||
EmergencySkipDrain: true,
|
||||
EmergencySkipEtcd: true,
|
||||
},
|
||||
},
|
||||
orch: orch,
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
err := d.triggerShutdown(context.Background(), "local-shutdown-error")
|
||||
if err == nil {
|
||||
t.Fatalf("expected triggerShutdown to propagate local shutdown error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDaemonRunContextCancelNonTriggerPath runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonRunContextCancelNonTriggerPath(t *testing.T).
|
||||
// Why: covers steady-state non-trigger loop branches in Run until context cancellation.
|
||||
func TestDaemonRunContextCancelNonTriggerPath(t *testing.T) {
|
||||
stateDir := t.TempDir()
|
||||
orch := newDaemonTestOrchestrator(t, stateDir)
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
UPS: config.UPS{
|
||||
Enabled: true,
|
||||
PollSeconds: 0, // exercise default poll fallback
|
||||
DebounceCount: 0, // exercise default debounce fallback
|
||||
RuntimeSafetyFactor: 0.5,
|
||||
},
|
||||
State: config.State{
|
||||
IntentPath: filepath.Join(stateDir, "intent.json"),
|
||||
},
|
||||
},
|
||||
orch: orch,
|
||||
targets: []Target{
|
||||
{
|
||||
Name: "Pyrphoros",
|
||||
Target: "pyrphoros@localhost",
|
||||
Provider: &daemonFakeProvider{
|
||||
samples: []ups.Sample{
|
||||
{OnBattery: false, LowBattery: false, RuntimeSeconds: 7200, RawStatus: "OL"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: metrics.New(),
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1100*time.Millisecond)
|
||||
defer cancel()
|
||||
if err := d.Run(ctx); err == nil {
|
||||
t.Fatalf("expected context deadline/cancel in non-trigger loop")
|
||||
}
|
||||
}
|
||||
|
||||
// TestForwardShutdownErrorWithoutOutput runs one orchestration or CLI step.
|
||||
// Signature: TestForwardShutdownErrorWithoutOutput(t *testing.T).
|
||||
// Why: covers forwardShutdown branch where ssh fails without any stderr/stdout text.
|
||||
func TestForwardShutdownErrorWithoutOutput(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
sshPath := filepath.Join(tmp, "ssh")
|
||||
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n"), 0o755); err != nil {
|
||||
t.Fatalf("write fake ssh: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHUser: "atlas",
|
||||
Coordination: config.Coordination{
|
||||
ForwardShutdownHost: "titan-db",
|
||||
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
||||
CommandTimeoutSeconds: 3,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
err := d.forwardShutdown(context.Background(), "no-output-fail")
|
||||
if err == nil || !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
|
||||
t.Fatalf("expected no-output forward ssh failure, got %v", err)
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,133 @@
|
||||
package service
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
func TestPlaceholder(t *testing.T) {
|
||||
// Placeholder test keeps package-level test coverage active.
|
||||
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
||||
"scm.bstein.dev/bstein/ananke/internal/state"
|
||||
)
|
||||
|
||||
// TestDaemonRunRejectsDisabledUPS runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonRunRejectsDisabledUPS(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestDaemonRunRejectsDisabledUPS(t *testing.T) {
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
UPS: config.UPS{Enabled: false},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
if err := d.Run(context.Background()); err == nil {
|
||||
t.Fatalf("expected UPS-disabled run to fail")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDaemonRunRejectsMissingTargets runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonRunRejectsMissingTargets(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestDaemonRunRejectsMissingTargets(t *testing.T) {
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
UPS: config.UPS{Enabled: true},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
}
|
||||
if err := d.Run(context.Background()); err == nil {
|
||||
t.Fatalf("expected empty-target run to fail")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDaemonTargetList runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonTargetList(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestDaemonTargetList(t *testing.T) {
|
||||
d := &Daemon{
|
||||
targets: []Target{
|
||||
{Name: "Pyrphoros", Target: "pyrphoros@localhost"},
|
||||
{Name: "Statera", Target: "statera@localhost"},
|
||||
},
|
||||
}
|
||||
got := d.targetList()
|
||||
if !strings.Contains(got, "Pyrphoros=pyrphoros@localhost") || !strings.Contains(got, "Statera=statera@localhost") {
|
||||
t.Fatalf("unexpected target list: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDaemonResolveSSHPathsPreferConfigured runs one orchestration or CLI step.
|
||||
// Signature: TestDaemonResolveSSHPathsPreferConfigured(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestDaemonResolveSSHPathsPreferConfigured(t *testing.T) {
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
SSHConfigFile: "/tmp/custom-ssh-config",
|
||||
SSHIdentityFile: "/tmp/custom-ssh-key",
|
||||
},
|
||||
}
|
||||
if got := d.resolveSSHConfigFile(); got != "/tmp/custom-ssh-config" {
|
||||
t.Fatalf("unexpected config path: %q", got)
|
||||
}
|
||||
if got := d.resolveSSHIdentityFile(); got != "/tmp/custom-ssh-key" {
|
||||
t.Fatalf("unexpected identity path: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStartMetricsServerRequiresBindAddress runs one orchestration or CLI step.
|
||||
// Signature: TestStartMetricsServerRequiresBindAddress(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestStartMetricsServerRequiresBindAddress(t *testing.T) {
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
Metrics: config.Metrics{
|
||||
Enabled: true,
|
||||
BindAddr: "",
|
||||
Path: "/metrics",
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: nil,
|
||||
}
|
||||
d.exporter = d.ensureExporterForTest()
|
||||
if err := d.startMetricsServer(); err == nil {
|
||||
t.Fatalf("expected missing bind address error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerShutdownSkipsDuplicateWhenIntentActive runs one orchestration or CLI step.
|
||||
// Signature: TestTriggerShutdownSkipsDuplicateWhenIntentActive(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestTriggerShutdownSkipsDuplicateWhenIntentActive(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
intentPath := filepath.Join(tmp, "intent.json")
|
||||
if err := state.MustWriteIntent(intentPath, state.IntentShuttingDown, "already-running", "test"); err != nil {
|
||||
t.Fatalf("seed intent: %v", err)
|
||||
}
|
||||
d := &Daemon{
|
||||
cfg: config.Config{
|
||||
State: config.State{
|
||||
IntentPath: intentPath,
|
||||
},
|
||||
},
|
||||
log: log.New(io.Discard, "", 0),
|
||||
exporter: nil,
|
||||
}
|
||||
d.exporter = d.ensureExporterForTest()
|
||||
if err := d.triggerShutdown(context.Background(), "duplicate-check"); err != nil {
|
||||
t.Fatalf("expected duplicate shutdown trigger to be ignored: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ensureExporterForTest runs one orchestration or CLI step.
|
||||
// Signature: (d *Daemon) ensureExporterForTest() *metrics.Exporter.
|
||||
// Why: local helper keeps setup concise while preserving explicit behavior in each test.
|
||||
func (d *Daemon) ensureExporterForTest() *metrics.Exporter {
|
||||
if d.exporter == nil {
|
||||
d.exporter = metrics.New()
|
||||
}
|
||||
return d.exporter
|
||||
}
|
||||
|
||||
131
internal/sshutil/repair_test.go
Normal file
131
internal/sshutil/repair_test.go
Normal file
@ -0,0 +1,131 @@
|
||||
package sshutil
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestShouldAttemptKnownHostsRepairFalseWithoutError runs one orchestration or CLI step.
|
||||
// Signature: TestShouldAttemptKnownHostsRepairFalseWithoutError(t *testing.T).
|
||||
// Why: ensures repair logic does not trigger when command succeeded.
|
||||
func TestShouldAttemptKnownHostsRepairFalseWithoutError(t *testing.T) {
|
||||
if ShouldAttemptKnownHostsRepair("ok", nil) {
|
||||
t.Fatalf("expected false when no error exists")
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsHostKeyErrorRequiresErr runs one orchestration or CLI step.
|
||||
// Signature: TestIsHostKeyErrorRequiresErr(t *testing.T).
|
||||
// Why: covers guard branch that skips marker parsing when err is nil.
|
||||
func TestIsHostKeyErrorRequiresErr(t *testing.T) {
|
||||
if IsHostKeyError("REMOTE HOST IDENTIFICATION HAS CHANGED", nil) {
|
||||
t.Fatalf("expected false when err is nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRepairKnownHostsRemovesEntries runs one orchestration or CLI step.
|
||||
// Signature: TestRepairKnownHostsRemovesEntries(t *testing.T).
|
||||
// Why: validates known_hosts repair path actually removes target entries.
|
||||
func TestRepairKnownHostsRemovesEntries(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
knownHosts := filepath.Join(tmp, "known_hosts")
|
||||
content := strings.Join([]string{
|
||||
"titan-0a ssh-ed25519 AAAATESTKEYONE",
|
||||
"[titan-0a]:2277 ssh-ed25519 AAAATESTKEYTWO",
|
||||
"titan-0b ssh-ed25519 AAAATESTKEYTHREE",
|
||||
"",
|
||||
}, "\n")
|
||||
if err := os.WriteFile(knownHosts, []byte(content), 0o600); err != nil {
|
||||
t.Fatalf("write known_hosts: %v", err)
|
||||
}
|
||||
|
||||
RepairKnownHosts(context.Background(), log.New(io.Discard, "", 0), []string{knownHosts}, []string{"titan-0a", "titan-0a", ""}, 2277)
|
||||
|
||||
b, err := os.ReadFile(knownHosts)
|
||||
if err != nil {
|
||||
t.Fatalf("read known_hosts: %v", err)
|
||||
}
|
||||
got := string(b)
|
||||
if strings.Contains(got, "titan-0a") {
|
||||
t.Fatalf("expected titan-0a entries removed, got:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, "titan-0b") {
|
||||
t.Fatalf("expected unrelated host to remain, got:\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRepairKnownHostsNoSshKeygen runs one orchestration or CLI step.
|
||||
// Signature: TestRepairKnownHostsNoSshKeygen(t *testing.T).
|
||||
// Why: covers early-return branch when ssh-keygen is unavailable.
|
||||
func TestRepairKnownHostsNoSshKeygen(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
t.Setenv("PATH", tmp)
|
||||
RepairKnownHosts(context.Background(), log.New(io.Discard, "", 0), []string{"/tmp/does-not-matter"}, []string{"titan-0a"}, 2277)
|
||||
}
|
||||
|
||||
// TestRestoreOwnershipNoopOnMissing runs one orchestration or CLI step.
|
||||
// Signature: TestRestoreOwnershipNoopOnMissing(t *testing.T).
|
||||
// Why: covers missing-file branch in ownership restoration helper.
|
||||
func TestRestoreOwnershipNoopOnMissing(t *testing.T) {
|
||||
restoreOwnership(filepath.Join(t.TempDir(), "missing"), "", -1, -1, 0)
|
||||
}
|
||||
|
||||
// TestCaptureOwnershipMissingFile runs one orchestration or CLI step.
|
||||
// Signature: TestCaptureOwnershipMissingFile(t *testing.T).
|
||||
// Why: covers missing-path branch in ownership capture helper.
|
||||
func TestCaptureOwnershipMissingFile(t *testing.T) {
|
||||
uid, gid, mode := captureOwnership(filepath.Join(t.TempDir(), "missing"))
|
||||
if uid != -1 || gid != -1 || mode != 0 {
|
||||
t.Fatalf("unexpected ownership for missing file uid=%d gid=%d mode=%v", uid, gid, mode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRemoveKnownHostEntryAbsentDoesNotFail runs one orchestration or CLI step.
|
||||
// Signature: TestRemoveKnownHostEntryAbsentDoesNotFail(t *testing.T).
|
||||
// Why: covers ssh-keygen "not found in" handling branch.
|
||||
func TestRemoveKnownHostEntryAbsentDoesNotFail(t *testing.T) {
|
||||
file := filepath.Join(t.TempDir(), "known_hosts")
|
||||
if err := os.WriteFile(file, []byte("titan-0b ssh-ed25519 AAAA\n"), 0o600); err != nil {
|
||||
t.Fatalf("write known_hosts: %v", err)
|
||||
}
|
||||
removeKnownHostEntry(context.Background(), log.New(io.Discard, "", 0), file, "titan-0a")
|
||||
b, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
t.Fatalf("read known_hosts after remove: %v", err)
|
||||
}
|
||||
if !strings.Contains(string(b), "titan-0b") {
|
||||
t.Fatalf("expected file content to remain for unrelated hosts")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCaptureAndRestoreOwnershipRoundTrip runs one orchestration or CLI step.
|
||||
// Signature: TestCaptureAndRestoreOwnershipRoundTrip(t *testing.T).
|
||||
// Why: covers successful ownership/mode capture and restore path.
|
||||
func TestCaptureAndRestoreOwnershipRoundTrip(t *testing.T) {
|
||||
file := filepath.Join(t.TempDir(), "known_hosts")
|
||||
if err := os.WriteFile(file, []byte("titan-0b ssh-ed25519 AAAA\n"), 0o600); err != nil {
|
||||
t.Fatalf("write file: %v", err)
|
||||
}
|
||||
uid, gid, mode := captureOwnership(file)
|
||||
restoreOwnership(file, "", uid, gid, mode)
|
||||
info, err := os.Stat(file)
|
||||
if err != nil {
|
||||
t.Fatalf("stat restored file: %v", err)
|
||||
}
|
||||
if info.Mode().Perm() != mode {
|
||||
t.Fatalf("expected mode %v, got %v", mode, info.Mode().Perm())
|
||||
}
|
||||
}
|
||||
|
||||
// TestLogfNoLoggerDoesNotPanic runs one orchestration or CLI step.
|
||||
// Signature: TestLogfNoLoggerDoesNotPanic(t *testing.T).
|
||||
// Why: covers no-op logger branch.
|
||||
func TestLogfNoLoggerDoesNotPanic(t *testing.T) {
|
||||
logf(nil, "message %v", errors.New("x"))
|
||||
}
|
||||
@ -19,6 +19,9 @@ var hostKeyErrorMarkers = []string{
|
||||
"possible dns spoofing detected",
|
||||
}
|
||||
|
||||
// IsHostKeyError runs one orchestration or CLI step.
|
||||
// Signature: IsHostKeyError(output string, err error) bool.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func IsHostKeyError(output string, err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
@ -35,6 +38,9 @@ func IsHostKeyError(output string, err error) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// ShouldAttemptKnownHostsRepair runs one orchestration or CLI step.
|
||||
// Signature: ShouldAttemptKnownHostsRepair(output string, err error) bool.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
||||
if IsHostKeyError(output, err) {
|
||||
return true
|
||||
@ -50,6 +56,9 @@ func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// KnownHostsFiles runs one orchestration or CLI step.
|
||||
// Signature: KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
||||
seen := map[string]struct{}{}
|
||||
add := func(path string) {
|
||||
@ -86,6 +95,9 @@ func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
||||
return out
|
||||
}
|
||||
|
||||
// RepairKnownHosts runs one orchestration or CLI step.
|
||||
// Signature: RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles []string, hosts []string, port int) {
|
||||
if _, err := exec.LookPath("ssh-keygen"); err != nil {
|
||||
logf(logger, "warning: cannot repair known_hosts (ssh-keygen missing): %v", err)
|
||||
@ -134,6 +146,9 @@ func RepairKnownHosts(ctx context.Context, logger *log.Logger, knownHostsFiles [
|
||||
}
|
||||
}
|
||||
|
||||
// removeKnownHostEntry runs one orchestration or CLI step.
|
||||
// Signature: removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string, entry string) {
|
||||
uid, gid, mode := captureOwnership(file)
|
||||
|
||||
@ -155,6 +170,9 @@ func removeKnownHostEntry(ctx context.Context, logger *log.Logger, file string,
|
||||
logf(logger, "warning: known_hosts cleanup failed for %s in %s: %v: %s", entry, file, err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
|
||||
// captureOwnership runs one orchestration or CLI step.
|
||||
// Signature: captureOwnership(path string) (int, int, os.FileMode).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func captureOwnership(path string) (int, int, os.FileMode) {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
@ -167,6 +185,9 @@ func captureOwnership(path string) (int, int, os.FileMode) {
|
||||
return int(st.Uid), int(st.Gid), info.Mode().Perm()
|
||||
}
|
||||
|
||||
// restoreOwnership runs one orchestration or CLI step.
|
||||
// Signature: restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func restoreOwnership(path string, backupPath string, uid int, gid int, mode os.FileMode) {
|
||||
if uid < 0 || gid < 0 {
|
||||
return
|
||||
@ -185,6 +206,9 @@ func restoreOwnership(path string, backupPath string, uid int, gid int, mode os.
|
||||
}
|
||||
}
|
||||
|
||||
// logf runs one orchestration or CLI step.
|
||||
// Signature: logf(logger *log.Logger, format string, args ...any).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func logf(logger *log.Logger, format string, args ...any) {
|
||||
if logger != nil {
|
||||
logger.Printf(format, args...)
|
||||
|
||||
@ -6,6 +6,9 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestIsHostKeyErrorDetectsMismatch runs one orchestration or CLI step.
|
||||
// Signature: TestIsHostKeyErrorDetectsMismatch(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestIsHostKeyErrorDetectsMismatch(t *testing.T) {
|
||||
out := "WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!"
|
||||
if !IsHostKeyError(out, errors.New("ssh failed")) {
|
||||
@ -13,6 +16,9 @@ func TestIsHostKeyErrorDetectsMismatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsHostKeyErrorIgnoresGenericFailures runs one orchestration or CLI step.
|
||||
// Signature: TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
||||
out := "connection timed out"
|
||||
if IsHostKeyError(out, errors.New("ssh failed")) {
|
||||
@ -20,12 +26,18 @@ func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestShouldAttemptKnownHostsRepairOnSilent255 runs one orchestration or CLI step.
|
||||
// Signature: TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T) {
|
||||
if !ShouldAttemptKnownHostsRepair("", errors.New("ssh ...: exit status 255")) {
|
||||
t.Fatalf("expected silent exit status 255 to trigger known_hosts repair")
|
||||
}
|
||||
}
|
||||
|
||||
// TestKnownHostsFilesIncludesDerivedPaths runs one orchestration or CLI step.
|
||||
// Signature: TestKnownHostsFilesIncludesDerivedPaths(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
|
||||
configFile := "/home/atlas/.ssh/config"
|
||||
identityFile := "/home/tethys/.ssh/id_ed25519"
|
||||
|
||||
@ -7,6 +7,9 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// quarantineCorruptFile runs one orchestration or CLI step.
|
||||
// Signature: quarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func quarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
||||
return err
|
||||
|
||||
46
internal/state/heal_test.go
Normal file
46
internal/state/heal_test.go
Normal file
@ -0,0 +1,46 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestQuarantineCorruptFileWritesBackupAndReplacement runs one orchestration or CLI step.
|
||||
// Signature: TestQuarantineCorruptFileWritesBackupAndReplacement(t *testing.T).
|
||||
// Why: covers successful corruption quarantine flow.
|
||||
func TestQuarantineCorruptFileWritesBackupAndReplacement(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "intent.json")
|
||||
if err := quarantineCorruptFile(path, []byte("{bad"), []byte("{}\n"), 0o640); err != nil {
|
||||
t.Fatalf("quarantine failed: %v", err)
|
||||
}
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read replacement: %v", err)
|
||||
}
|
||||
if string(b) != "{}\n" {
|
||||
t.Fatalf("unexpected replacement payload: %q", string(b))
|
||||
}
|
||||
}
|
||||
|
||||
// TestQuarantineCorruptFileFailsOnEmptyPath runs one orchestration or CLI step.
|
||||
// Signature: TestQuarantineCorruptFileFailsOnEmptyPath(t *testing.T).
|
||||
// Why: covers mkdir failure branch for invalid destination path.
|
||||
func TestQuarantineCorruptFileFailsOnEmptyPath(t *testing.T) {
|
||||
if err := quarantineCorruptFile("", []byte("x"), []byte("y"), 0o640); err == nil {
|
||||
t.Fatalf("expected failure for empty path")
|
||||
}
|
||||
}
|
||||
|
||||
// TestQuarantineCorruptFileFailsWhenReplacementIsDirectory runs one orchestration or CLI step.
|
||||
// Signature: TestQuarantineCorruptFileFailsWhenReplacementIsDirectory(t *testing.T).
|
||||
// Why: covers replacement-write error branch after backup succeeds.
|
||||
func TestQuarantineCorruptFileFailsWhenReplacementIsDirectory(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "intent-dir")
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
t.Fatalf("mkdir replacement dir: %v", err)
|
||||
}
|
||||
if err := quarantineCorruptFile(path, []byte("{bad"), []byte("{}\n"), 0o640); err == nil {
|
||||
t.Fatalf("expected write replacement failure when path is a directory")
|
||||
}
|
||||
}
|
||||
@ -22,6 +22,9 @@ type Intent struct {
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// ReadIntent runs one orchestration or CLI step.
|
||||
// Signature: ReadIntent(path string) (Intent, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func ReadIntent(path string) (Intent, error) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
@ -43,6 +46,9 @@ func ReadIntent(path string) (Intent, error) {
|
||||
return in, nil
|
||||
}
|
||||
|
||||
// WriteIntent runs one orchestration or CLI step.
|
||||
// Signature: WriteIntent(path string, in Intent) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func WriteIntent(path string, in Intent) error {
|
||||
if in.UpdatedAt.IsZero() {
|
||||
in.UpdatedAt = time.Now().UTC()
|
||||
@ -50,13 +56,13 @@ func WriteIntent(path string, in Intent) error {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
||||
return err
|
||||
}
|
||||
b, err := json.MarshalIndent(in, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b, _ := json.MarshalIndent(in, "", " ")
|
||||
return os.WriteFile(path, b, 0o640)
|
||||
}
|
||||
|
||||
// MustWriteIntent runs one orchestration or CLI step.
|
||||
// Signature: MustWriteIntent(path string, state string, reason string, source string) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func MustWriteIntent(path string, state string, reason string, source string) error {
|
||||
switch state {
|
||||
case IntentNormal, IntentStartupInProgress, IntentShuttingDown, IntentShutdownComplete:
|
||||
|
||||
135
internal/state/intent_additional_test.go
Normal file
135
internal/state/intent_additional_test.go
Normal file
@ -0,0 +1,135 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestReadIntentHandlesMissingAndEmpty runs one orchestration or CLI step.
|
||||
// Signature: TestReadIntentHandlesMissingAndEmpty(t *testing.T).
|
||||
// Why: covers nil-state branches for missing and empty intent files.
|
||||
func TestReadIntentHandlesMissingAndEmpty(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "intent.json")
|
||||
in, err := ReadIntent(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read missing intent: %v", err)
|
||||
}
|
||||
if in.State != "" {
|
||||
t.Fatalf("expected empty state for missing file, got %q", in.State)
|
||||
}
|
||||
if err := os.WriteFile(path, nil, 0o640); err != nil {
|
||||
t.Fatalf("write empty intent file: %v", err)
|
||||
}
|
||||
in, err = ReadIntent(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read empty intent file: %v", err)
|
||||
}
|
||||
if in.State != "" {
|
||||
t.Fatalf("expected empty state for empty file, got %q", in.State)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWriteIntentSetsUpdatedAtWhenZero runs one orchestration or CLI step.
|
||||
// Signature: TestWriteIntentSetsUpdatedAtWhenZero(t *testing.T).
|
||||
// Why: verifies write helper auto-populates timestamp for callers.
|
||||
func TestWriteIntentSetsUpdatedAtWhenZero(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "intent.json")
|
||||
if err := WriteIntent(path, Intent{State: IntentNormal, Reason: "unit", Source: "test"}); err != nil {
|
||||
t.Fatalf("write intent: %v", err)
|
||||
}
|
||||
in, err := ReadIntent(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read intent: %v", err)
|
||||
}
|
||||
if in.UpdatedAt.IsZero() {
|
||||
t.Fatalf("expected non-zero updated_at")
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseIntentOutputErrorsOnBadUpdatedAt runs one orchestration or CLI step.
|
||||
// Signature: TestParseIntentOutputErrorsOnBadUpdatedAt(t *testing.T).
|
||||
// Why: covers parser error branch for malformed timestamp values.
|
||||
func TestParseIntentOutputErrorsOnBadUpdatedAt(t *testing.T) {
|
||||
raw := `intent=normal reason="x" source=y updated_at=not-a-time`
|
||||
if _, err := ParseIntentOutput(raw); err == nil {
|
||||
t.Fatalf("expected updated_at parse error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseIntentOutputErrorsWhenMissingToken runs one orchestration or CLI step.
|
||||
// Signature: TestParseIntentOutputErrorsWhenMissingToken(t *testing.T).
|
||||
// Why: covers parser terminal error when intent token is absent.
|
||||
func TestParseIntentOutputErrorsWhenMissingToken(t *testing.T) {
|
||||
if _, err := ParseIntentOutput("no intent line here"); err == nil {
|
||||
t.Fatalf("expected parse failure without intent token")
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseIntentOutputWithoutReasonOrSource runs one orchestration or CLI step.
|
||||
// Signature: TestParseIntentOutputWithoutReasonOrSource(t *testing.T).
|
||||
// Why: covers parser branch where optional fields are omitted.
|
||||
func TestParseIntentOutputWithoutReasonOrSource(t *testing.T) {
|
||||
in, err := ParseIntentOutput("intent=shutdown_complete")
|
||||
if err != nil {
|
||||
t.Fatalf("parse intent output: %v", err)
|
||||
}
|
||||
if in.State != IntentShutdownComplete {
|
||||
t.Fatalf("expected shutdown_complete, got %q", in.State)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMustWriteIntentPersistsProvidedTimestampType runs one orchestration or CLI step.
|
||||
// Signature: TestMustWriteIntentPersistsProvidedTimestampType(t *testing.T).
|
||||
// Why: sanity check that written timestamps round-trip RFC3339 parsing.
|
||||
func TestMustWriteIntentPersistsProvidedTimestampType(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "intent.json")
|
||||
if err := MustWriteIntent(path, IntentNormal, "ok", "test"); err != nil {
|
||||
t.Fatalf("must write intent: %v", err)
|
||||
}
|
||||
in, err := ReadIntent(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read intent: %v", err)
|
||||
}
|
||||
if time.Since(in.UpdatedAt) > time.Minute {
|
||||
t.Fatalf("expected recent timestamp, got %s", in.UpdatedAt)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWriteIntentFailsWhenParentIsFile runs one orchestration or CLI step.
|
||||
// Signature: TestWriteIntentFailsWhenParentIsFile(t *testing.T).
|
||||
// Why: covers mkdir failure branch when parent path is not a directory.
|
||||
func TestWriteIntentFailsWhenParentIsFile(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
parent := filepath.Join(tmp, "not-a-dir")
|
||||
if err := os.WriteFile(parent, []byte("x"), 0o600); err != nil {
|
||||
t.Fatalf("write parent file: %v", err)
|
||||
}
|
||||
err := WriteIntent(filepath.Join(parent, "intent.json"), Intent{State: IntentNormal})
|
||||
if err == nil {
|
||||
t.Fatalf("expected write failure for non-directory parent")
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadIntentFailsOnPermissionError runs one orchestration or CLI step.
|
||||
// Signature: TestReadIntentFailsOnPermissionError(t *testing.T).
|
||||
// Why: covers read error branch distinct from not-exist and empty-file handling.
|
||||
func TestReadIntentFailsOnPermissionError(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "intent.json")
|
||||
if err := os.WriteFile(path, []byte(`{"state":"normal"}`), 0o640); err != nil {
|
||||
t.Fatalf("write intent file: %v", err)
|
||||
}
|
||||
if err := os.Chmod(path, 0o000); err != nil {
|
||||
t.Fatalf("chmod intent file: %v", err)
|
||||
}
|
||||
defer os.Chmod(path, 0o640)
|
||||
_, err := ReadIntent(path)
|
||||
if err == nil {
|
||||
t.Fatalf("expected permission error")
|
||||
}
|
||||
if strings.Contains(strings.ToLower(err.Error()), "not exist") {
|
||||
t.Fatalf("expected permission-related error, got: %v", err)
|
||||
}
|
||||
}
|
||||
@ -7,6 +7,10 @@ import (
|
||||
)
|
||||
|
||||
// ParseIntentOutput parses `ananke intent` CLI output from local/remote commands.
|
||||
// Signature: ParseIntentOutput(raw string) (Intent, error)
|
||||
// Why: Startup/shutdown coordination depends on intent state being interpreted
|
||||
// consistently from command output so remote peers and local orchestration can
|
||||
// share one durable control-plane signal.
|
||||
func ParseIntentOutput(raw string) (Intent, error) {
|
||||
for _, line := range strings.Split(raw, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
@ -19,9 +23,6 @@ func ParseIntentOutput(raw string) (Intent, error) {
|
||||
}
|
||||
payload := strings.TrimSpace(line[idx:])
|
||||
fields := strings.Fields(payload)
|
||||
if len(fields) == 0 || !strings.HasPrefix(fields[0], "intent=") {
|
||||
continue
|
||||
}
|
||||
stateValue := strings.TrimSpace(strings.TrimPrefix(fields[0], "intent="))
|
||||
if stateValue == "" || stateValue == "none" {
|
||||
return Intent{}, nil
|
||||
@ -29,12 +30,10 @@ func ParseIntentOutput(raw string) (Intent, error) {
|
||||
in := Intent{State: stateValue}
|
||||
if strings.Contains(payload, `reason="`) {
|
||||
parts := strings.SplitN(payload, `reason="`, 2)
|
||||
if len(parts) == 2 {
|
||||
if end := strings.Index(parts[1], `"`); end >= 0 {
|
||||
in.Reason = parts[1][:end]
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, field := range fields[1:] {
|
||||
if strings.HasPrefix(field, "source=") {
|
||||
in.Source = strings.TrimSpace(strings.TrimPrefix(field, "source="))
|
||||
|
||||
@ -6,6 +6,9 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestWriteReadIntentRoundTrip runs one orchestration or CLI step.
|
||||
// Signature: TestWriteReadIntentRoundTrip(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestWriteReadIntentRoundTrip(t *testing.T) {
|
||||
p := filepath.Join(t.TempDir(), "intent.json")
|
||||
if err := MustWriteIntent(p, IntentShuttingDown, "ups-threshold", "daemon"); err != nil {
|
||||
@ -23,6 +26,9 @@ func TestWriteReadIntentRoundTrip(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMustWriteIntentRejectsUnknownState runs one orchestration or CLI step.
|
||||
// Signature: TestMustWriteIntentRejectsUnknownState(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestMustWriteIntentRejectsUnknownState(t *testing.T) {
|
||||
p := filepath.Join(t.TempDir(), "intent.json")
|
||||
if err := MustWriteIntent(p, "weird", "x", "y"); err == nil {
|
||||
@ -30,6 +36,9 @@ func TestMustWriteIntentRejectsUnknownState(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadIntentAutoHealsCorruptJSON runs one orchestration or CLI step.
|
||||
// Signature: TestReadIntentAutoHealsCorruptJSON(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestReadIntentAutoHealsCorruptJSON(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
p := filepath.Join(dir, "intent.json")
|
||||
@ -60,6 +69,9 @@ func TestReadIntentAutoHealsCorruptJSON(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseIntentOutputParsesStructuredLine runs one orchestration or CLI step.
|
||||
// Signature: TestParseIntentOutputParsesStructuredLine(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestParseIntentOutputParsesStructuredLine(t *testing.T) {
|
||||
raw := `[ananke] 2026/04/05 11:24:49 intent=normal reason="guard-test-clear-2" source=drill updated_at=2026-04-05T16:24:33Z`
|
||||
in, err := ParseIntentOutput(raw)
|
||||
@ -80,6 +92,9 @@ func TestParseIntentOutputParsesStructuredLine(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseIntentOutputHandlesNone runs one orchestration or CLI step.
|
||||
// Signature: TestParseIntentOutputHandlesNone(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestParseIntentOutputHandlesNone(t *testing.T) {
|
||||
in, err := ParseIntentOutput(`[ananke] 2026/04/05 11:24:49 intent=none`)
|
||||
if err != nil {
|
||||
|
||||
@ -32,10 +32,16 @@ type Store struct {
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// New runs one orchestration or CLI step.
|
||||
// Signature: New(path string) *Store.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func New(path string) *Store {
|
||||
return &Store{path: path}
|
||||
}
|
||||
|
||||
// EnsureDir runs one orchestration or CLI step.
|
||||
// Signature: EnsureDir(dir string) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func EnsureDir(dir string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("state dir must not be empty")
|
||||
@ -43,6 +49,9 @@ func EnsureDir(dir string) error {
|
||||
return os.MkdirAll(dir, 0o750)
|
||||
}
|
||||
|
||||
// AcquireLock runs one orchestration or CLI step.
|
||||
// Signature: AcquireLock(path string) (func(), error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func AcquireLock(path string) (func(), error) {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
|
||||
return nil, err
|
||||
@ -85,6 +94,9 @@ func AcquireLock(path string) (func(), error) {
|
||||
return unlock, nil
|
||||
}
|
||||
|
||||
// staleLock runs one orchestration or CLI step.
|
||||
// Signature: staleLock(path string) (bool, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func staleLock(path string) (bool, error) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
@ -99,6 +111,9 @@ func staleLock(path string) (bool, error) {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "pid=") {
|
||||
v := strings.TrimPrefix(line, "pid=")
|
||||
if fields := strings.Fields(v); len(fields) > 0 {
|
||||
v = fields[0]
|
||||
}
|
||||
parsed, parseErr := strconv.Atoi(v)
|
||||
if parseErr != nil {
|
||||
return true, nil
|
||||
@ -118,6 +133,9 @@ func staleLock(path string) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Append runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) Append(record RunRecord) error.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) Append(record RunRecord) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
@ -133,19 +151,22 @@ func (s *Store) Append(record RunRecord) error {
|
||||
if err := os.MkdirAll(filepath.Dir(s.path), 0o750); err != nil {
|
||||
return err
|
||||
}
|
||||
b, err := json.MarshalIndent(records, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b, _ := json.MarshalIndent(records, "", " ")
|
||||
return os.WriteFile(s.path, b, 0o640)
|
||||
}
|
||||
|
||||
// Load runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) Load() ([]RunRecord, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) Load() ([]RunRecord, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.loadUnlocked()
|
||||
}
|
||||
|
||||
// loadUnlocked runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) loadUnlocked() ([]RunRecord, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) loadUnlocked() ([]RunRecord, error) {
|
||||
b, err := os.ReadFile(s.path)
|
||||
if err != nil {
|
||||
@ -167,18 +188,30 @@ func (s *Store) loadUnlocked() ([]RunRecord, error) {
|
||||
return records, nil
|
||||
}
|
||||
|
||||
// ShutdownP95 runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) ShutdownP95(defaultSeconds int) int.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) ShutdownP95(defaultSeconds int) int {
|
||||
return s.shutdownP95(defaultSeconds, 1, nil)
|
||||
}
|
||||
|
||||
// ShutdownP95WithMinSamples runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) ShutdownP95WithMinSamples(defaultSeconds int, minSamples int) int.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) ShutdownP95WithMinSamples(defaultSeconds int, minSamples int) int {
|
||||
return s.shutdownP95(defaultSeconds, minSamples, nil)
|
||||
}
|
||||
|
||||
// ShutdownP95ByReasonPrefix runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) ShutdownP95ByReasonPrefix(defaultSeconds int, minSamples int, reasonPrefixes []string) int.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) ShutdownP95ByReasonPrefix(defaultSeconds int, minSamples int, reasonPrefixes []string) int {
|
||||
return s.shutdownP95(defaultSeconds, minSamples, reasonPrefixes)
|
||||
}
|
||||
|
||||
// shutdownP95 runs one orchestration or CLI step.
|
||||
// Signature: (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes []string) int.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes []string) int {
|
||||
if minSamples <= 0 {
|
||||
minSamples = 1
|
||||
@ -217,14 +250,5 @@ func (s *Store) shutdownP95(defaultSeconds int, minSamples int, reasonPrefixes [
|
||||
}
|
||||
sort.Ints(d)
|
||||
idx := int(math.Ceil(0.95*float64(len(d)))) - 1
|
||||
if idx < 0 {
|
||||
idx = 0
|
||||
}
|
||||
if idx >= len(d) {
|
||||
idx = len(d) - 1
|
||||
}
|
||||
if d[idx] <= 0 {
|
||||
return defaultSeconds
|
||||
}
|
||||
return d[idx]
|
||||
}
|
||||
|
||||
156
internal/state/store_additional_test.go
Normal file
156
internal/state/store_additional_test.go
Normal file
@ -0,0 +1,156 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestEnsureDirRejectsEmpty runs one orchestration or CLI step.
|
||||
// Signature: TestEnsureDirRejectsEmpty(t *testing.T).
|
||||
// Why: covers explicit guard branch for empty state directory inputs.
|
||||
func TestEnsureDirRejectsEmpty(t *testing.T) {
|
||||
if err := EnsureDir(""); err == nil {
|
||||
t.Fatalf("expected empty directory error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestStoreAppendTrimToMaxRecords runs one orchestration or CLI step.
|
||||
// Signature: TestStoreAppendTrimToMaxRecords(t *testing.T).
|
||||
// Why: covers retention branch that trims run history to the 200-record cap.
|
||||
func TestStoreAppendTrimToMaxRecords(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "runs.json")
|
||||
s := New(path)
|
||||
now := time.Now().UTC()
|
||||
for i := 0; i < 205; i++ {
|
||||
if err := s.Append(RunRecord{
|
||||
ID: "r-" + strconv.Itoa(i),
|
||||
Action: "shutdown",
|
||||
StartedAt: now,
|
||||
EndedAt: now,
|
||||
DurationSeconds: i + 1,
|
||||
Success: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("append %d failed: %v", i, err)
|
||||
}
|
||||
}
|
||||
recs, err := s.Load()
|
||||
if err != nil {
|
||||
t.Fatalf("load failed: %v", err)
|
||||
}
|
||||
if len(recs) != 200 {
|
||||
t.Fatalf("expected trim to 200 records, got %d", len(recs))
|
||||
}
|
||||
}
|
||||
|
||||
// TestStoreLoadHandlesEmptyFile runs one orchestration or CLI step.
|
||||
// Signature: TestStoreLoadHandlesEmptyFile(t *testing.T).
|
||||
// Why: covers load branch for empty existing run-history file.
|
||||
func TestStoreLoadHandlesEmptyFile(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "runs.json")
|
||||
if err := os.WriteFile(path, nil, 0o640); err != nil {
|
||||
t.Fatalf("write empty file: %v", err)
|
||||
}
|
||||
recs, err := New(path).Load()
|
||||
if err != nil {
|
||||
t.Fatalf("load empty file: %v", err)
|
||||
}
|
||||
if len(recs) != 0 {
|
||||
t.Fatalf("expected no records, got %d", len(recs))
|
||||
}
|
||||
}
|
||||
|
||||
// TestStoreLoadReturnsErrorOnUnhealableDecode runs one orchestration or CLI step.
|
||||
// Signature: TestStoreLoadReturnsErrorOnUnhealableDecode(t *testing.T).
|
||||
// Why: covers decode failure path where replacement write itself can fail.
|
||||
func TestStoreLoadReturnsErrorOnUnhealableDecode(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "runs.json")
|
||||
if err := os.WriteFile(path, []byte("{bad-json"), 0o640); err != nil {
|
||||
t.Fatalf("write invalid file: %v", err)
|
||||
}
|
||||
// Make directory readonly so quarantine replacement cannot be written.
|
||||
if err := os.Chmod(dir, 0o500); err != nil {
|
||||
t.Fatalf("chmod dir readonly: %v", err)
|
||||
}
|
||||
defer os.Chmod(dir, 0o700)
|
||||
if _, err := New(path).Load(); err == nil {
|
||||
t.Fatalf("expected load failure when auto-heal cannot write replacement")
|
||||
}
|
||||
}
|
||||
|
||||
// TestShutdownP95FallsBackOnLoadError runs one orchestration or CLI step.
|
||||
// Signature: TestShutdownP95FallsBackOnLoadError(t *testing.T).
|
||||
// Why: covers load-error fallback branch in percentile helper.
|
||||
func TestShutdownP95FallsBackOnLoadError(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "runs.json")
|
||||
if err := os.WriteFile(path, []byte("{bad"), 0o640); err != nil {
|
||||
t.Fatalf("write invalid file: %v", err)
|
||||
}
|
||||
// Use impossible perms to force read failure.
|
||||
if err := os.Chmod(path, 0o000); err != nil {
|
||||
t.Fatalf("chmod file: %v", err)
|
||||
}
|
||||
defer os.Chmod(path, 0o640)
|
||||
if got := New(path).ShutdownP95(321); got != 321 {
|
||||
t.Fatalf("expected fallback default 321, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestShutdownP95ReturnsDefaultOnNonPositiveQuantile runs one orchestration or CLI step.
|
||||
// Signature: TestShutdownP95ReturnsDefaultOnNonPositiveQuantile(t *testing.T).
|
||||
// Why: covers branch where computed percentile record is non-positive.
|
||||
func TestShutdownP95ReturnsDefaultOnNonPositiveQuantile(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "runs.json")
|
||||
now := time.Now().UTC()
|
||||
records := []RunRecord{
|
||||
{Action: "shutdown", StartedAt: now, EndedAt: now, DurationSeconds: 0, Success: true},
|
||||
{Action: "shutdown", StartedAt: now, EndedAt: now, DurationSeconds: -1, Success: true},
|
||||
}
|
||||
b, err := json.Marshal(records)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal records: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(path, b, 0o640); err != nil {
|
||||
t.Fatalf("write records: %v", err)
|
||||
}
|
||||
if got := New(path).ShutdownP95WithMinSamples(777, 1); got != 777 {
|
||||
t.Fatalf("expected default 777, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStaleLockHelpers runs one orchestration or CLI step.
|
||||
// Signature: TestStaleLockHelpers(t *testing.T).
|
||||
// Why: covers stale-lock parser branches directly for reliability.
|
||||
func TestStaleLockHelpers(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
missing := filepath.Join(tmp, "missing.lock")
|
||||
stale, err := staleLock(missing)
|
||||
if err != nil || !stale {
|
||||
t.Fatalf("expected missing lock to be stale=true err=nil, got stale=%v err=%v", stale, err)
|
||||
}
|
||||
|
||||
invalidPID := filepath.Join(tmp, "invalid.lock")
|
||||
if err := os.WriteFile(invalidPID, []byte("pid=notanumber\n"), 0o600); err != nil {
|
||||
t.Fatalf("write invalid pid lock: %v", err)
|
||||
}
|
||||
stale, err = staleLock(invalidPID)
|
||||
if err != nil || !stale {
|
||||
t.Fatalf("expected invalid pid lock to be stale=true err=nil, got stale=%v err=%v", stale, err)
|
||||
}
|
||||
|
||||
active := filepath.Join(tmp, "active.lock")
|
||||
if err := os.WriteFile(active, []byte("pid="+strconv.Itoa(os.Getpid())+"\n"), 0o600); err != nil {
|
||||
t.Fatalf("write active lock: %v", err)
|
||||
}
|
||||
stale, err = staleLock(active)
|
||||
if err != nil {
|
||||
t.Fatalf("active staleLock error: %v", err)
|
||||
}
|
||||
if stale {
|
||||
t.Fatalf("expected active lock to report stale=false")
|
||||
}
|
||||
}
|
||||
@ -10,6 +10,9 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestAcquireLockLifecycle runs one orchestration or CLI step.
|
||||
// Signature: TestAcquireLockLifecycle(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestAcquireLockLifecycle(t *testing.T) {
|
||||
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
||||
unlock, err := AcquireLock(lockPath)
|
||||
@ -25,6 +28,9 @@ func TestAcquireLockLifecycle(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAcquireLockReclaimsStaleLock runs one orchestration or CLI step.
|
||||
// Signature: TestAcquireLockReclaimsStaleLock(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestAcquireLockReclaimsStaleLock(t *testing.T) {
|
||||
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
||||
if err := os.WriteFile(lockPath, []byte("pid=999999\n"), 0o600); err != nil {
|
||||
@ -46,6 +52,9 @@ func TestAcquireLockReclaimsStaleLock(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAcquireLockRejectsActiveLock runs one orchestration or CLI step.
|
||||
// Signature: TestAcquireLockRejectsActiveLock(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestAcquireLockRejectsActiveLock(t *testing.T) {
|
||||
lockPath := filepath.Join(t.TempDir(), "ananke.lock")
|
||||
active := "pid=" + strconv.Itoa(os.Getpid()) + "\n"
|
||||
@ -58,6 +67,9 @@ func TestAcquireLockRejectsActiveLock(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestStoreLoadAutoHealsCorruptJSON runs one orchestration or CLI step.
|
||||
// Signature: TestStoreLoadAutoHealsCorruptJSON(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestStoreLoadAutoHealsCorruptJSON(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
p := filepath.Join(dir, "runs.json")
|
||||
@ -88,6 +100,9 @@ func TestStoreLoadAutoHealsCorruptJSON(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse runs one orchestration or CLI step.
|
||||
// Signature: TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T) {
|
||||
p := filepath.Join(t.TempDir(), "runs.json")
|
||||
records := []RunRecord{
|
||||
@ -115,6 +130,9 @@ func TestShutdownP95WithMinSamplesFallsBackWhenHistorySparse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestShutdownP95ByReasonPrefixFiltersSamples runs one orchestration or CLI step.
|
||||
// Signature: TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T) {
|
||||
p := filepath.Join(t.TempDir(), "runs.json")
|
||||
now := time.Now().UTC()
|
||||
@ -161,6 +179,9 @@ func TestShutdownP95ByReasonPrefixFiltersSamples(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestShutdownP95IgnoresDryRunSamples runs one orchestration or CLI step.
|
||||
// Signature: TestShutdownP95IgnoresDryRunSamples(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestShutdownP95IgnoresDryRunSamples(t *testing.T) {
|
||||
p := filepath.Join(t.TempDir(), "runs.json")
|
||||
now := time.Now().UTC()
|
||||
|
||||
10
internal/state/testhooks.go
Normal file
10
internal/state/testhooks.go
Normal file
@ -0,0 +1,10 @@
|
||||
package state
|
||||
|
||||
import "os"
|
||||
|
||||
// TestHookQuarantineCorruptFile runs one orchestration or CLI step.
|
||||
// Signature: TestHookQuarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error.
|
||||
// Why: exposes corrupt-file healing internals to the top-level testing module without package-local tests.
|
||||
func TestHookQuarantineCorruptFile(path string, payload []byte, replacement []byte, mode os.FileMode) error {
|
||||
return quarantineCorruptFile(path, payload, replacement, mode)
|
||||
}
|
||||
@ -28,10 +28,16 @@ type NUTProvider struct {
|
||||
Target string
|
||||
}
|
||||
|
||||
// NewNUTProvider runs one orchestration or CLI step.
|
||||
// Signature: NewNUTProvider(target string) *NUTProvider.
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func NewNUTProvider(target string) *NUTProvider {
|
||||
return &NUTProvider{Target: target}
|
||||
}
|
||||
|
||||
// Read runs one orchestration or CLI step.
|
||||
// Signature: (p *NUTProvider) Read(ctx context.Context) (Sample, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func (p *NUTProvider) Read(ctx context.Context) (Sample, error) {
|
||||
if p.Target == "" {
|
||||
return Sample{}, fmt.Errorf("NUT target must not be empty")
|
||||
@ -44,6 +50,9 @@ func (p *NUTProvider) Read(ctx context.Context) (Sample, error) {
|
||||
return parseNUT(string(out))
|
||||
}
|
||||
|
||||
// parseNUT runs one orchestration or CLI step.
|
||||
// Signature: parseNUT(raw string) (Sample, error).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func parseNUT(raw string) (Sample, error) {
|
||||
kv := map[string]string{}
|
||||
s := bufio.NewScanner(strings.NewReader(raw))
|
||||
@ -106,6 +115,9 @@ func parseNUT(raw string) (Sample, error) {
|
||||
|
||||
var parseNumberCleaner = regexp.MustCompile(`[^0-9.+-]`)
|
||||
|
||||
// parseNumber runs one orchestration or CLI step.
|
||||
// Signature: parseNumber(raw string) (float64, bool).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func parseNumber(raw string) (float64, bool) {
|
||||
cleaned := strings.TrimSpace(parseNumberCleaner.ReplaceAllString(raw, ""))
|
||||
if cleaned == "" {
|
||||
|
||||
108
internal/ups/nut_additional_test.go
Normal file
108
internal/ups/nut_additional_test.go
Normal file
@ -0,0 +1,108 @@
|
||||
package ups
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestParseNUTRejectsMissingStatus runs one orchestration or CLI step.
|
||||
// Signature: TestParseNUTRejectsMissingStatus(t *testing.T).
|
||||
// Why: covers parser error path when mandatory status line is absent.
|
||||
func TestParseNUTRejectsMissingStatus(t *testing.T) {
|
||||
if _, err := parseNUT("battery.charge: 88"); err == nil {
|
||||
t.Fatalf("expected missing status error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseNUTParsesOptionalNumbers runs one orchestration or CLI step.
|
||||
// Signature: TestParseNUTParsesOptionalNumbers(t *testing.T).
|
||||
// Why: covers numeric extraction branches for charge/load/nominal fields.
|
||||
func TestParseNUTParsesOptionalNumbers(t *testing.T) {
|
||||
raw := strings.Join([]string{
|
||||
"ups.status: OB LB",
|
||||
"battery.runtime: 1024",
|
||||
"battery.charge: 71.5 Percent",
|
||||
"ups.load: 12.0 Percent",
|
||||
"ups.realpower.nominal: 900 W",
|
||||
"",
|
||||
}, "\n")
|
||||
s, err := parseNUT(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("parseNUT failed: %v", err)
|
||||
}
|
||||
if !s.OnBattery || !s.LowBattery || s.RuntimeSeconds != 1024 {
|
||||
t.Fatalf("unexpected status parse: %+v", s)
|
||||
}
|
||||
if s.BatteryCharge != 71.5 || s.LoadPercent != 12 || s.NominalPowerW != 900 {
|
||||
t.Fatalf("unexpected numeric parse: %+v", s)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNUTProviderReadViaPathShim runs one orchestration or CLI step.
|
||||
// Signature: TestNUTProviderReadViaPathShim(t *testing.T).
|
||||
// Why: covers provider command execution success path deterministically.
|
||||
func TestNUTProviderReadViaPathShim(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
upscPath := filepath.Join(tmp, "upsc")
|
||||
script := `#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
echo "ups.status: OL"
|
||||
echo "battery.runtime: 500"
|
||||
`
|
||||
if err := os.WriteFile(upscPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake upsc: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
|
||||
sample, err := NewNUTProvider("statera@localhost").Read(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("provider read failed: %v", err)
|
||||
}
|
||||
if sample.OnBattery {
|
||||
t.Fatalf("expected OL to report not-on-battery")
|
||||
}
|
||||
if sample.RuntimeSeconds != 500 {
|
||||
t.Fatalf("expected runtime 500, got %d", sample.RuntimeSeconds)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNUTProviderReadRejectsEmptyTarget runs one orchestration or CLI step.
|
||||
// Signature: TestNUTProviderReadRejectsEmptyTarget(t *testing.T).
|
||||
// Why: covers provider guard for empty NUT target values.
|
||||
func TestNUTProviderReadRejectsEmptyTarget(t *testing.T) {
|
||||
if _, err := NewNUTProvider("").Read(context.Background()); err == nil {
|
||||
t.Fatalf("expected empty-target read error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseNumberRejectsInvalid runs one orchestration or CLI step.
|
||||
// Signature: TestParseNumberRejectsInvalid(t *testing.T).
|
||||
// Why: covers parseNumber false-return branch for invalid input.
|
||||
func TestParseNumberRejectsInvalid(t *testing.T) {
|
||||
if _, ok := parseNumber("not-a-number"); ok {
|
||||
t.Fatalf("expected parseNumber to reject invalid input")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNUTProviderReadCommandFailure runs one orchestration or CLI step.
|
||||
// Signature: TestNUTProviderReadCommandFailure(t *testing.T).
|
||||
// Why: covers provider error propagation when upsc exits non-zero.
|
||||
func TestNUTProviderReadCommandFailure(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
upscPath := filepath.Join(tmp, "upsc")
|
||||
script := `#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
echo "upsc failed" >&2
|
||||
exit 2
|
||||
`
|
||||
if err := os.WriteFile(upscPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake upsc: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||||
if _, err := NewNUTProvider("pyrphoros@localhost").Read(context.Background()); err == nil {
|
||||
t.Fatalf("expected provider read error on upsc failure")
|
||||
}
|
||||
}
|
||||
@ -2,6 +2,9 @@ package ups
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestParseNUT runs one orchestration or CLI step.
|
||||
// Signature: TestParseNUT(t *testing.T).
|
||||
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||||
func TestParseNUT(t *testing.T) {
|
||||
raw := `battery.runtime: 384
|
||||
battery.charge: 72
|
||||
|
||||
@ -9,7 +9,7 @@ ANANKE_COORDINATOR_RELAY="${ANANKE_COORDINATOR_RELAY:-}"
|
||||
LOG_DIR="${ANANKE_DRILL_LOG_DIR:-/tmp/ananke-drills}"
|
||||
STARTUP_TIMEOUT_SECONDS="${ANANKE_DRILL_STARTUP_TIMEOUT_SECONDS:-1800}"
|
||||
SHUTDOWN_TIMEOUT_SECONDS="${ANANKE_DRILL_SHUTDOWN_TIMEOUT_SECONDS:-1800}"
|
||||
SHUTDOWN_CONFIG="${ANANKE_DRILL_SHUTDOWN_CONFIG:-/tmp/ananke-drill-no-poweroff.yaml}"
|
||||
SHUTDOWN_CONFIG="${ANANKE_DRILL_SHUTDOWN_CONFIG:-/tmp/ananke-drill-cluster-only.yaml}"
|
||||
STARTUP_RETRY_DELAY_SECONDS="${ANANKE_DRILL_STARTUP_RETRY_DELAY_SECONDS:-10}"
|
||||
STARTUP_RETRY_MAX="${ANANKE_DRILL_STARTUP_RETRY_MAX:-12}"
|
||||
EXECUTE=0
|
||||
@ -25,7 +25,7 @@ Drills:
|
||||
foundation-recovery Simulate vault/postgres/gitea outage and require layered restore.
|
||||
reconciliation-resume Simulate global Flux suspend + source-controller down and require resume.
|
||||
startup-intent-guard Assert startup is blocked when shutdown intent is active.
|
||||
controlled-cycle Run full shutdown->startup recovery cycle (uses no-poweroff config).
|
||||
controlled-cycle Run full shutdown->startup recovery cycle (uses cluster-only shutdown config).
|
||||
|
||||
Notes:
|
||||
- Drills are intentionally disruptive and are not part of regular `make test`.
|
||||
@ -405,7 +405,7 @@ run_drill_controlled_cycle() {
|
||||
run_coordinator_bash "[ -s '${SHUTDOWN_CONFIG}' ]" || die "shutdown drill config missing on coordinator: ${SHUTDOWN_CONFIG}"
|
||||
fi
|
||||
|
||||
log "running controlled shutdown cycle (poweroff disabled config)"
|
||||
log "running controlled shutdown cycle (cluster-only shutdown config)"
|
||||
run_ananke_shutdown "drill-controlled-cycle-shutdown"
|
||||
|
||||
log "running startup recovery cycle"
|
||||
|
||||
@ -9,6 +9,7 @@ fi
|
||||
REPO_URL="${ANANKE_REPO_URL:-ssh://git@scm.bstein.dev:2242/bstein/ananke.git}"
|
||||
BRANCH="${ANANKE_REPO_BRANCH:-main}"
|
||||
REPO_DIR="${ANANKE_REPO_DIR:-/opt/ananke}"
|
||||
HOST_SHORT="$(hostname -s 2>/dev/null || hostname)"
|
||||
|
||||
mkdir -p "$(dirname "${REPO_DIR}")"
|
||||
if [[ ! -d "${REPO_DIR}/.git" ]]; then
|
||||
@ -23,4 +24,16 @@ git checkout "${BRANCH}"
|
||||
git reset --hard "origin/${BRANCH}"
|
||||
|
||||
echo "[self-update] running installer"
|
||||
# Keep host configs aligned with tracked templates so startup/shutdown drills
|
||||
# always use the latest checklist and safety logic.
|
||||
if [[ -z "${ANANKE_FORCE_CONFIG_TEMPLATE:-}" ]]; then
|
||||
case "${HOST_SHORT}" in
|
||||
titan-db)
|
||||
export ANANKE_FORCE_CONFIG_TEMPLATE="coordinator"
|
||||
;;
|
||||
titan-24)
|
||||
export ANANKE_FORCE_CONFIG_TEMPLATE="peer"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
"${REPO_DIR}/scripts/install.sh"
|
||||
|
||||
@ -22,6 +22,7 @@ NUT_PRODUCT_ID="${ANANKE_NUT_PRODUCT_ID:-0601}"
|
||||
NUT_MONITOR_USER="${ANANKE_NUT_MONITOR_USER:-monuser}"
|
||||
NUT_MONITOR_PASSWORD="${ANANKE_NUT_MONITOR_PASSWORD:-anankeupsmon}"
|
||||
FORCE_CONFIG_TEMPLATE="${ANANKE_FORCE_CONFIG_TEMPLATE:-}"
|
||||
ENFORCE_QUALITY_GATE="${ANANKE_ENFORCE_QUALITY_GATE:-1}"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
@ -228,6 +229,28 @@ migrate_ananke_config() {
|
||||
echo "[install] added coordination.startup_guard_max_age_seconds=900"
|
||||
changed=1
|
||||
fi
|
||||
if grep -Eq '^[[:space:]]*poweroff_enabled:[[:space:]]*(true|false)' "${CONF_DIR}/ananke.yaml"; then
|
||||
sed -Ei \
|
||||
-e '/^[[:space:]]*poweroff_enabled:[[:space:]]*(true|false)/d' \
|
||||
-e '/^[[:space:]]*poweroff_delay_seconds:[[:space:]]*[0-9]+/d' \
|
||||
-e '/^[[:space:]]*poweroff_local_host:[[:space:]]*(true|false)/d' \
|
||||
-e '/^[[:space:]]*extra_poweroff_hosts:[[:space:]]*(\[\])?[[:space:]]*$/d' \
|
||||
"${CONF_DIR}/ananke.yaml"
|
||||
echo "[install] removed deprecated host-poweroff shutdown config keys"
|
||||
changed=1
|
||||
fi
|
||||
if grep -Eq '^ minimum_battery_percent:[[:space:]]*[0-9.]+' "${CONF_DIR}/ananke.yaml" \
|
||||
&& ! grep -Eq '^ require_node_inventory_reachability:[[:space:]]*(true|false)' "${CONF_DIR}/ananke.yaml"; then
|
||||
sed -Ei '/^ minimum_battery_percent:[[:space:]]*[0-9.]+/a\ require_node_inventory_reachability: true\n node_inventory_reachability_wait_seconds: 300\n node_inventory_reachability_poll_seconds: 5' "${CONF_DIR}/ananke.yaml"
|
||||
echo "[install] added startup node inventory reachability gate defaults"
|
||||
changed=1
|
||||
fi
|
||||
if grep -Eq '^state:[[:space:]]*$' "${CONF_DIR}/ananke.yaml" \
|
||||
&& ! grep -Eq '^ reports_dir:[[:space:]]*/var/lib/ananke/reports' "${CONF_DIR}/ananke.yaml"; then
|
||||
sed -Ei '/^ dir:[[:space:]]*\/var\/lib\/ananke$/a\ reports_dir: /var/lib/ananke/reports' "${CONF_DIR}/ananke.yaml"
|
||||
echo "[install] added state.reports_dir default"
|
||||
changed=1
|
||||
fi
|
||||
if ! grep -Eq '^ peer_hosts:' "${CONF_DIR}/ananke.yaml"; then
|
||||
if [[ "${role_hint}" == "peer" ]] && grep -Eq '^ forward_shutdown_host:[[:space:]]*[A-Za-z0-9._-]+' "${CONF_DIR}/ananke.yaml"; then
|
||||
local peer_host
|
||||
@ -838,6 +861,13 @@ EOF
|
||||
ensure_dependencies
|
||||
migrate_legacy_hecate_install
|
||||
|
||||
if [[ "${ENFORCE_QUALITY_GATE}" == "1" ]]; then
|
||||
echo "[install] running quality gate"
|
||||
"${REPO_DIR}/scripts/quality_gate.sh"
|
||||
else
|
||||
echo "[install] skipping quality gate (ANANKE_ENFORCE_QUALITY_GATE=${ENFORCE_QUALITY_GATE})"
|
||||
fi
|
||||
|
||||
echo "[install] building ananke"
|
||||
cd "${REPO_DIR}"
|
||||
mkdir -p dist
|
||||
@ -855,6 +885,7 @@ install -m 0755 dist/ananke "${BIN_DIR}/ananke"
|
||||
echo "[install] installing config + state dirs"
|
||||
install -d -m 0750 "${CONF_DIR}"
|
||||
install -d -m 0750 "${STATE_DIR}"
|
||||
install -d -m 0750 "${STATE_DIR}/reports"
|
||||
install -d -m 0755 "${LIB_DIR}"
|
||||
|
||||
if [[ -n "${FORCE_CONFIG_TEMPLATE}" ]]; then
|
||||
|
||||
17
scripts/lint.sh
Executable file
17
scripts/lint.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "${REPO_DIR}"
|
||||
export PATH="$(go env GOPATH)/bin:${PATH}"
|
||||
|
||||
if ! command -v staticcheck >/dev/null 2>&1; then
|
||||
echo "[lint] installing staticcheck"
|
||||
go install honnef.co/go/tools/cmd/staticcheck@latest
|
||||
fi
|
||||
|
||||
echo "[lint] go vet"
|
||||
go vet ./...
|
||||
|
||||
echo "[lint] staticcheck (pedantic code-smell pass)"
|
||||
staticcheck ./...
|
||||
110
scripts/quality_gate.sh
Executable file
110
scripts/quality_gate.sh
Executable file
@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
QUALITY_METRICS_ENABLED="${ANANKE_QUALITY_METRICS_ENABLED:-1}"
|
||||
QUALITY_METRICS_FILE="${ANANKE_QUALITY_METRICS_FILE:-/var/lib/ananke/quality-gate.prom}"
|
||||
QUALITY_STATE_FILE="${ANANKE_QUALITY_STATE_FILE:-/var/lib/ananke/quality-gate.state}"
|
||||
|
||||
read_quality_counter() {
|
||||
local key="$1"
|
||||
if [[ ! -f "${QUALITY_STATE_FILE}" ]]; then
|
||||
echo 0
|
||||
return 0
|
||||
fi
|
||||
local value
|
||||
value="$(awk -F= -v key="${key}" '$1==key {print $2}' "${QUALITY_STATE_FILE}" | tail -n1)"
|
||||
if [[ ! "${value}" =~ ^[0-9]+$ ]]; then
|
||||
echo 0
|
||||
return 0
|
||||
fi
|
||||
echo "${value}"
|
||||
}
|
||||
|
||||
write_quality_metrics() {
|
||||
local exit_code="$1"
|
||||
if [[ "${QUALITY_METRICS_ENABLED}" != "1" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local metrics_dir state_dir
|
||||
metrics_dir="$(dirname "${QUALITY_METRICS_FILE}")"
|
||||
state_dir="$(dirname "${QUALITY_STATE_FILE}")"
|
||||
mkdir -p "${metrics_dir}" "${state_dir}" >/dev/null 2>&1 || return 0
|
||||
|
||||
local ok failed total last_success now success_percent
|
||||
ok="$(read_quality_counter ok)"
|
||||
failed="$(read_quality_counter failed)"
|
||||
last_success=0
|
||||
if [[ "${exit_code}" -eq 0 ]]; then
|
||||
ok=$((ok + 1))
|
||||
last_success=1
|
||||
else
|
||||
failed=$((failed + 1))
|
||||
fi
|
||||
total=$((ok + failed))
|
||||
now="$(date +%s)"
|
||||
success_percent="$(awk -v ok="${ok}" -v total="${total}" 'BEGIN { if (total <= 0) { print "0.00" } else { printf "%.2f", (ok * 100.0) / total } }')"
|
||||
|
||||
local tmp_metrics tmp_state
|
||||
tmp_metrics="$(mktemp "${metrics_dir}/quality-gate.prom.XXXXXX")"
|
||||
tmp_state="$(mktemp "${state_dir}/quality-gate.state.XXXXXX")"
|
||||
|
||||
cat > "${tmp_metrics}" <<EOF
|
||||
# HELP ananke_quality_gate_runs_total Total Ananke quality gate runs by status.
|
||||
# TYPE ananke_quality_gate_runs_total counter
|
||||
ananke_quality_gate_runs_total{suite="ananke",status="ok"} ${ok}
|
||||
ananke_quality_gate_runs_total{suite="ananke",status="failed"} ${failed}
|
||||
# HELP ananke_quality_gate_last_run_success Whether the latest quality gate run succeeded.
|
||||
# TYPE ananke_quality_gate_last_run_success gauge
|
||||
ananke_quality_gate_last_run_success{suite="ananke"} ${last_success}
|
||||
# HELP ananke_quality_gate_last_run_timestamp_seconds Unix timestamp of the latest quality gate run.
|
||||
# TYPE ananke_quality_gate_last_run_timestamp_seconds gauge
|
||||
ananke_quality_gate_last_run_timestamp_seconds{suite="ananke"} ${now}
|
||||
# HELP ananke_quality_gate_success_percent Running quality gate success percentage for Ananke.
|
||||
# TYPE ananke_quality_gate_success_percent gauge
|
||||
ananke_quality_gate_success_percent{suite="ananke"} ${success_percent}
|
||||
EOF
|
||||
|
||||
cat > "${tmp_state}" <<EOF
|
||||
ok=${ok}
|
||||
failed=${failed}
|
||||
last_success=${last_success}
|
||||
last_run=${now}
|
||||
EOF
|
||||
|
||||
mv -f "${tmp_metrics}" "${QUALITY_METRICS_FILE}"
|
||||
mv -f "${tmp_state}" "${QUALITY_STATE_FILE}"
|
||||
}
|
||||
|
||||
quality_gate_finalize() {
|
||||
local exit_code="$1"
|
||||
set +e
|
||||
write_quality_metrics "${exit_code}" || true
|
||||
exit "${exit_code}"
|
||||
}
|
||||
|
||||
trap 'quality_gate_finalize $?' EXIT
|
||||
|
||||
cd "${REPO_DIR}"
|
||||
|
||||
echo "[quality] unit tests"
|
||||
go test ./...
|
||||
|
||||
echo "[quality] hygiene: doc contracts"
|
||||
cd testing
|
||||
go test ./hygiene -run TestHygieneContracts/doc_contract -count=1
|
||||
|
||||
echo "[quality] hygiene: naming contracts"
|
||||
go test ./hygiene -run TestHygieneContracts/naming_contract -count=1
|
||||
|
||||
echo "[quality] hygiene: LOC limits"
|
||||
go test ./hygiene -run TestHygieneContracts/loc_limit -count=1
|
||||
cd "${REPO_DIR}"
|
||||
|
||||
echo "[quality] lint"
|
||||
./scripts/lint.sh
|
||||
|
||||
echo "[quality] per-file coverage gate (95%)"
|
||||
cd testing
|
||||
ANANKE_ENFORCE_COVERAGE=1 ANANKE_PER_FILE_COVERAGE_TARGET=95 go test ./coverage -run TestPerFileCoverageReport -count=1 -v
|
||||
238
testing/config/config_quality_matrix_test.go
Normal file
238
testing/config/config_quality_matrix_test.go
Normal file
@ -0,0 +1,238 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
icfg "scm.bstein.dev/bstein/ananke/internal/config"
|
||||
)
|
||||
|
||||
func loadBaselineConfig(t *testing.T) icfg.Config {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "ananke.yaml")
|
||||
if err := os.WriteFile(path, []byte("ups:\n enabled: false\n"), 0o600); err != nil {
|
||||
t.Fatalf("write baseline config: %v", err)
|
||||
}
|
||||
cfg, err := icfg.Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load baseline config: %v", err)
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
// TestHookServiceCatalogAndMergeContracts runs one orchestration or CLI step.
|
||||
// Signature: TestHookServiceCatalogAndMergeContracts(t *testing.T).
|
||||
// Why: validates startup checklist defaults and merge semantics so host-level
|
||||
// overrides cannot silently drop required service behavior checks.
|
||||
func TestHookServiceCatalogAndMergeContracts(t *testing.T) {
|
||||
checks := icfg.TestHookDefaultServiceChecklist()
|
||||
if len(checks) < 20 {
|
||||
t.Fatalf("expected substantial default checklist, got %d checks", len(checks))
|
||||
}
|
||||
|
||||
seen := map[string]icfg.ServiceChecklistCheck{}
|
||||
for _, check := range checks {
|
||||
seen[strings.TrimSpace(check.Name)] = check
|
||||
}
|
||||
logging, ok := seen["logging-ui-user-session"]
|
||||
if !ok || !logging.RequireRobotAuth || strings.TrimSpace(logging.FinalURLNotContains) == "" {
|
||||
t.Fatalf("expected logging-ui-user-session to require robot auth + final URL validation")
|
||||
}
|
||||
keycloak, ok := seen["keycloak-admin-user-session"]
|
||||
if !ok || !keycloak.RequireRobotAuth || strings.TrimSpace(keycloak.FinalURLNotContains) == "" {
|
||||
t.Fatalf("expected keycloak-admin-user-session hard auth assertions")
|
||||
}
|
||||
|
||||
critical := icfg.TestHookDefaultCriticalServiceEndpoints()
|
||||
if len(critical) == 0 {
|
||||
t.Fatalf("expected critical endpoint defaults")
|
||||
}
|
||||
foundMonitoring := false
|
||||
for _, entry := range critical {
|
||||
if entry == "monitoring/grafana" {
|
||||
foundMonitoring = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundMonitoring {
|
||||
t.Fatalf("expected monitoring/grafana critical endpoint default")
|
||||
}
|
||||
|
||||
mergedChecks := icfg.TestHookMergeServiceChecklistDefaults(
|
||||
[]icfg.ServiceChecklistCheck{
|
||||
{Name: "custom", URL: "https://custom.bstein.dev/", TimeoutSeconds: 5},
|
||||
{Name: "logging-ui-user-session", URL: "https://override.invalid/", TimeoutSeconds: 5},
|
||||
},
|
||||
[]icfg.ServiceChecklistCheck{
|
||||
{Name: "logging-ui-user-session", URL: "https://logs.bstein.dev/", TimeoutSeconds: 5},
|
||||
{Name: "metrics-ui-user-session", URL: "https://metrics.bstein.dev/", TimeoutSeconds: 5},
|
||||
},
|
||||
)
|
||||
if len(mergedChecks) != 3 {
|
||||
t.Fatalf("expected 3 merged checks with dedupe, got %d", len(mergedChecks))
|
||||
}
|
||||
|
||||
mergedStrings := icfg.TestHookMergeStringDefaults(
|
||||
[]string{" one ", "one", "", "two"},
|
||||
[]string{"two", "three", " "},
|
||||
)
|
||||
if strings.Join(mergedStrings, ",") != "one,two,three" {
|
||||
t.Fatalf("unexpected merged string defaults: %v", mergedStrings)
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateServiceChecklistAuthContracts runs one orchestration or CLI step.
|
||||
// Signature: TestValidateServiceChecklistAuthContracts(t *testing.T).
|
||||
// Why: covers service-checklist auth and final-url validation branches that are
|
||||
// critical for preventing false-positive startup success.
|
||||
func TestValidateServiceChecklistAuthContracts(t *testing.T) {
|
||||
t.Run("invalid auth mode", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.Mode = "bad-mode"
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected invalid mode validation error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid keycloak base url", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.KeycloakBaseURL = "://broken"
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected invalid keycloak base URL validation error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("missing secret key fields", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.AdminSecretPasswordKey = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing admin secret password key validation error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("require robot auth with mode none", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.Mode = "none"
|
||||
cfg.Startup.ServiceChecklist = append(cfg.Startup.ServiceChecklist, icfg.ServiceChecklistCheck{
|
||||
Name: "robot-only",
|
||||
URL: "https://logs.bstein.dev/",
|
||||
RequireRobotAuth: true,
|
||||
TimeoutSeconds: 5,
|
||||
})
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected require_robot_auth + mode none validation error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("final url markers without redirects", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklist = append(cfg.Startup.ServiceChecklist, icfg.ServiceChecklistCheck{
|
||||
Name: "final-url-invalid",
|
||||
URL: "https://logs.bstein.dev/",
|
||||
AcceptedStatuses: []int{200},
|
||||
FinalURLContains: "/app/home",
|
||||
TimeoutSeconds: 5,
|
||||
})
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected final_url marker validation error when redirects disabled")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid accepted status code", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklist[0].AcceptedStatuses = []int{700}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected invalid accepted status code error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("required node label map contracts", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{" ": {"k": "v"}}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected empty required-node-label key error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{"titan-23": {}}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected empty required-node-label map error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{"titan-23": {"zone": " "}}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected empty required-node-label value error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("missing auth fields", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.Realm = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing realm error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.RobotUsername = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing robot username error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.AdminSecretNamespace = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing admin secret namespace error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.AdminSecretName = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing admin secret name error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.AdminSecretUsernameKey = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing admin secret username key error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("service checklist missing url", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Startup.ServiceChecklist[0].URL = " "
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected missing checklist URL error")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("coordination and state contracts", func(t *testing.T) {
|
||||
cfg := loadBaselineConfig(t)
|
||||
cfg.Coordination.ForwardShutdownHost = "titan-24"
|
||||
cfg.Coordination.ForwardShutdownConfig = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected forward-shutdown config error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Coordination.PeerHosts = []string{"titan-24", " "}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected peer host empty entry error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.Coordination.Role = "invalid"
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected invalid coordination role error")
|
||||
}
|
||||
|
||||
cfg = loadBaselineConfig(t)
|
||||
cfg.State.ReportsDir = ""
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Fatalf("expected state reports_dir required error")
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -101,9 +101,18 @@ func TestPerFileCoverageReport(t *testing.T) {
|
||||
root := repoRoot(t)
|
||||
tmp := t.TempDir()
|
||||
rootCover := filepath.Join(tmp, "ananke.root.cover.out")
|
||||
configCover := filepath.Join(tmp, "ananke.testing.config.cover.out")
|
||||
testingCover := filepath.Join(tmp, "ananke.testing.cover.out")
|
||||
|
||||
runCoverageCommand(t, root, rootCover, "./...")
|
||||
runCoverageCommand(
|
||||
t,
|
||||
filepath.Join(root, "testing"),
|
||||
configCover,
|
||||
"./config",
|
||||
"-coverpkg=scm.bstein.dev/bstein/ananke/...",
|
||||
)
|
||||
|
||||
runCoverageCommand(
|
||||
t,
|
||||
filepath.Join(root, "testing"),
|
||||
@ -118,6 +127,7 @@ func TestPerFileCoverageReport(t *testing.T) {
|
||||
|
||||
blocks := map[string]coverageBlock{}
|
||||
parseCoverageProfile(t, rootCover, blocks)
|
||||
parseCoverageProfile(t, configCover, blocks)
|
||||
parseCoverageProfile(t, testingCover, blocks)
|
||||
|
||||
byFile := map[string]*fileCoverage{}
|
||||
|
||||
@ -279,8 +279,8 @@ func TestHookGapMatrixPart11RemainingClosure(t *testing.T) {
|
||||
_, _, probeErr := orchBodyErr.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{
|
||||
URL: "http://" + ln.Addr().String() + "/health",
|
||||
})
|
||||
if probeErr == nil || !strings.Contains(probeErr.Error(), "read response body") {
|
||||
t.Fatalf("expected checklist body read-error branch, got %v", probeErr)
|
||||
if probeErr == nil || (!strings.Contains(probeErr.Error(), "read response body") && !strings.Contains(probeErr.Error(), "request failed")) {
|
||||
t.Fatalf("expected checklist probe failure branch, got %v", probeErr)
|
||||
}
|
||||
|
||||
cfgStability := lifecycleConfig(t)
|
||||
|
||||
536
testing/orchestrator/hooks_service_auth_matrix_test.go
Normal file
536
testing/orchestrator/hooks_service_auth_matrix_test.go
Normal file
@ -0,0 +1,536 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
||||
"scm.bstein.dev/bstein/ananke/internal/config"
|
||||
)
|
||||
|
||||
func testSecretJSON(username, password string) string {
|
||||
return fmt.Sprintf(
|
||||
`{"data":{"username":"%s","password":"%s"}}`,
|
||||
base64.StdEncoding.EncodeToString([]byte(username)),
|
||||
base64.StdEncoding.EncodeToString([]byte(password)),
|
||||
)
|
||||
}
|
||||
|
||||
func authSettings(baseURL string) config.ServiceChecklistAuthSettings {
|
||||
return config.ServiceChecklistAuthSettings{
|
||||
Mode: "keycloak_robotuser",
|
||||
KeycloakBaseURL: baseURL,
|
||||
Realm: "atlas",
|
||||
RobotUsername: "robotuser",
|
||||
AdminSecretNamespace: "sso",
|
||||
AdminSecretName: "keycloak-admin",
|
||||
AdminSecretUsernameKey: "username",
|
||||
AdminSecretPasswordKey: "password",
|
||||
}
|
||||
}
|
||||
|
||||
// TestHookServiceAuthChecklistSuccess runs one orchestration or CLI step.
|
||||
// Signature: TestHookServiceAuthChecklistSuccess(t *testing.T).
|
||||
// Why: validates full robotuser-authenticated checklist flow with final URL and
|
||||
// body markers so startup gates reflect real post-login user behavior.
|
||||
func TestHookServiceAuthChecklistSuccess(t *testing.T) {
|
||||
var appServer *httptest.Server
|
||||
appMux := http.NewServeMux()
|
||||
appMux.HandleFunc("/session/bootstrap", func(w http.ResponseWriter, _ *http.Request) {
|
||||
http.SetCookie(w, &http.Cookie{Name: "robot_session", Value: "ok", Path: "/"})
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("bootstrap ok"))
|
||||
})
|
||||
appMux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/" {
|
||||
http.Redirect(w, r, "/app/home", http.StatusFound)
|
||||
return
|
||||
}
|
||||
cookie, err := r.Cookie("robot_session")
|
||||
if err != nil || strings.TrimSpace(cookie.Value) == "" {
|
||||
http.Redirect(w, r, "/oauth2/sign_in", http.StatusFound)
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/app/home" {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("OpenSearch Dashboards"))
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/oauth2/sign_in" {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("sign in"))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
})
|
||||
appServer = httptest.NewTLSServer(appMux)
|
||||
defer appServer.Close()
|
||||
|
||||
kcMux := http.NewServeMux()
|
||||
kcMux.HandleFunc("/realms/master/protocol/openid-connect/token", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||
})
|
||||
kcMux.HandleFunc("/admin/realms/atlas/users", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||
})
|
||||
kcMux.HandleFunc("/admin/realms/atlas/users/robot-id/impersonation", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(fmt.Sprintf(`{"redirect":"%s/session/bootstrap"}`, appServer.URL)))
|
||||
})
|
||||
kcServer := httptest.NewTLSServer(kcMux)
|
||||
defer kcServer.Close()
|
||||
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth = authSettings(kcServer.URL)
|
||||
|
||||
recorder := &commandRecorder{}
|
||||
base := lifecycleDispatcher(recorder)
|
||||
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
command := name + " " + strings.Join(args, " ")
|
||||
if name == "kubectl" && strings.Contains(command, "-n sso get secret keycloak-admin -o json") {
|
||||
recorder.record(name, args)
|
||||
return testSecretJSON("admin", "password"), nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
||||
|
||||
check := config.ServiceChecklistCheck{
|
||||
Name: "logs-ui-user-session",
|
||||
URL: appServer.URL + "/",
|
||||
AcceptedStatuses: []int{200},
|
||||
RequireRobotAuth: true,
|
||||
FollowRedirects: true,
|
||||
InsecureSkipTLS: true,
|
||||
FinalURLContains: "/app/home",
|
||||
FinalURLNotContains: "/oauth2/sign_in",
|
||||
BodyContains: "OpenSearch Dashboards",
|
||||
TimeoutSeconds: 5,
|
||||
}
|
||||
ok, detail := orch.TestHookServiceCheckReady(context.Background(), check)
|
||||
if !ok {
|
||||
t.Fatalf("expected authenticated checklist success, detail=%q", detail)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHookServiceAuthModeAndSecretErrors runs one orchestration or CLI step.
|
||||
// Signature: TestHookServiceAuthModeAndSecretErrors(t *testing.T).
|
||||
// Why: covers auth mode guards and secret decode error branches to keep startup
|
||||
// failures explicit when robot-auth prerequisites are missing.
|
||||
func TestHookServiceAuthModeAndSecretErrors(t *testing.T) {
|
||||
cfg := lifecycleConfig(t)
|
||||
client := &http.Client{Timeout: time.Second}
|
||||
|
||||
cfgNone := lifecycleConfig(t)
|
||||
cfgNone.Startup.ServiceChecklistAuth.Mode = "none"
|
||||
orchNone, _ := newHookOrchestrator(t, cfgNone, nil, nil)
|
||||
if err := orchNone.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected auth mode none to fail")
|
||||
}
|
||||
if _, err := orchNone.TestHookChecklistAuthHTTPClient(context.Background(), time.Second, false); err == nil {
|
||||
t.Fatalf("expected checklist auth client init to fail when mode=none")
|
||||
}
|
||||
|
||||
cfgBad := lifecycleConfig(t)
|
||||
cfgBad.Startup.ServiceChecklistAuth.Mode = "bad-mode"
|
||||
orchBad, _ := newHookOrchestrator(t, cfgBad, nil, nil)
|
||||
if err := orchBad.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected unsupported auth mode to fail")
|
||||
}
|
||||
|
||||
base := lifecycleDispatcher(&commandRecorder{})
|
||||
runKubectlErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
if name == "kubectl" {
|
||||
return "", errors.New("kubectl denied")
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orchKubectlErr, _ := newHookOrchestrator(t, cfg, runKubectlErr, runKubectlErr)
|
||||
if _, err := orchKubectlErr.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||
t.Fatalf("expected kubectl error branch")
|
||||
}
|
||||
if _, _, err := orchKubectlErr.TestHookKeycloakAdminCredentials(context.Background(), cfg.Startup.ServiceChecklistAuth); err == nil {
|
||||
t.Fatalf("expected keycloakAdminCredentials to fail on username secret lookup")
|
||||
}
|
||||
if err := orchKubectlErr.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected auth session failure when secret lookup fails")
|
||||
}
|
||||
|
||||
runBadJSON := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
if name == "kubectl" {
|
||||
return "{bad", nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orchBadJSON, _ := newHookOrchestrator(t, cfg, runBadJSON, runBadJSON)
|
||||
if _, err := orchBadJSON.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||
t.Fatalf("expected secret decode error branch")
|
||||
}
|
||||
|
||||
runMissingKey := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
if name == "kubectl" {
|
||||
return `{"data":{"password":"cGFzcw=="}}`, nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orchMissingKey, _ := newHookOrchestrator(t, cfg, runMissingKey, runMissingKey)
|
||||
if _, err := orchMissingKey.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||
t.Fatalf("expected missing key branch")
|
||||
}
|
||||
if err := orchMissingKey.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected auth session failure when username key is missing")
|
||||
}
|
||||
|
||||
runMissingPassword := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
if name == "kubectl" {
|
||||
return `{"data":{"username":"YWRtaW4="}}`, nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orchMissingPassword, _ := newHookOrchestrator(t, cfg, runMissingPassword, runMissingPassword)
|
||||
if _, _, err := orchMissingPassword.TestHookKeycloakAdminCredentials(context.Background(), cfg.Startup.ServiceChecklistAuth); err == nil {
|
||||
t.Fatalf("expected keycloakAdminCredentials to fail on password secret lookup")
|
||||
}
|
||||
if err := orchMissingPassword.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected auth session failure when password key is missing")
|
||||
}
|
||||
|
||||
runBadB64 := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
if name == "kubectl" {
|
||||
return `{"data":{"username":"###"}}`, nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orchBadB64, _ := newHookOrchestrator(t, cfg, runBadB64, runBadB64)
|
||||
if _, err := orchBadB64.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||
t.Fatalf("expected base64 decode branch")
|
||||
}
|
||||
|
||||
runEmptyValue := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
if name == "kubectl" {
|
||||
return `{"data":{"username":"IA=="}}`, nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orchEmptyValue, _ := newHookOrchestrator(t, cfg, runEmptyValue, runEmptyValue)
|
||||
if _, err := orchEmptyValue.TestHookKubernetesSecretValue(context.Background(), "sso", "keycloak-admin", "username"); err == nil {
|
||||
t.Fatalf("expected empty decoded value branch")
|
||||
}
|
||||
|
||||
if got := cluster.TestHookCompactHTTPBody([]byte(" hello world \n test ")); got != "hello world test" {
|
||||
t.Fatalf("unexpected compact body %q", got)
|
||||
}
|
||||
if got := cluster.TestHookCompactHTTPBody([]byte(" \n\t ")); got != "" {
|
||||
t.Fatalf("expected compact empty body, got %q", got)
|
||||
}
|
||||
if got := cluster.TestHookKeycloakBaseURL(config.ServiceChecklistAuthSettings{KeycloakBaseURL: "https://sso.bstein.dev/"}); got != "https://sso.bstein.dev" {
|
||||
t.Fatalf("unexpected normalized base URL %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHookServiceAuthHTTPErrorBranches runs one orchestration or CLI step.
|
||||
// Signature: TestHookServiceAuthHTTPErrorBranches(t *testing.T).
|
||||
// Why: covers token/user/impersonation parser and status branches so startup
|
||||
// diagnostics remain actionable during auth failures.
|
||||
func TestHookServiceAuthHTTPErrorBranches(t *testing.T) {
|
||||
cfg := lifecycleConfig(t)
|
||||
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
||||
client := &http.Client{Timeout: 2 * time.Second}
|
||||
|
||||
authBadURL := authSettings("://bad-url")
|
||||
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authBadURL, "admin", "pw"); err == nil {
|
||||
t.Fatalf("expected request-build failure for bad base URL")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authBadURL, "token"); err == nil {
|
||||
t.Fatalf("expected robot-user request-build failure for bad base URL")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authBadURL, "token", "robot"); err == nil {
|
||||
t.Fatalf("expected impersonation request-build failure for bad base URL")
|
||||
}
|
||||
authRequestErr := authSettings("http://127.0.0.1:1")
|
||||
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authRequestErr, "admin", "pw"); err == nil {
|
||||
t.Fatalf("expected admin token request error branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authRequestErr, "token"); err == nil {
|
||||
t.Fatalf("expected robot user request error branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authRequestErr, "token", "robot"); err == nil {
|
||||
t.Fatalf("expected impersonation request error branch")
|
||||
}
|
||||
|
||||
kcError := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
w.WriteHeader(http.StatusUnauthorized)
|
||||
_, _ = w.Write([]byte(`{"error":"unauthorized"}`))
|
||||
case strings.Contains(r.URL.Path, "/users") && strings.Contains(r.URL.RawQuery, "username=robotuser"):
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
_, _ = w.Write([]byte(`{"error":"boom"}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
}
|
||||
}))
|
||||
defer kcError.Close()
|
||||
authError := authSettings(kcError.URL)
|
||||
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authError, "admin", "pw"); err == nil {
|
||||
t.Fatalf("expected non-2xx token branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authError, "token"); err == nil {
|
||||
t.Fatalf("expected non-2xx robot user branch")
|
||||
}
|
||||
|
||||
kcDecode := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("not-json"))
|
||||
case strings.Contains(r.URL.Path, "/users") && strings.Contains(r.URL.RawQuery, "username=robotuser"):
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("not-json"))
|
||||
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("not-json"))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}))
|
||||
defer kcDecode.Close()
|
||||
authDecode := authSettings(kcDecode.URL)
|
||||
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authDecode, "admin", "pw"); err == nil {
|
||||
t.Fatalf("expected token decode error branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authDecode, "token"); err == nil {
|
||||
t.Fatalf("expected robot user decode error branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authDecode, "token", "robot"); err == nil {
|
||||
t.Fatalf("expected impersonation decode error branch")
|
||||
}
|
||||
|
||||
kcMissing := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"access_token":""}`))
|
||||
case strings.Contains(r.URL.Path, "/users") && strings.Contains(r.URL.RawQuery, "username=robotuser"):
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`[]`))
|
||||
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
_, _ = w.Write([]byte(`{"error":"bad request"}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}))
|
||||
defer kcMissing.Close()
|
||||
authMissing := authSettings(kcMissing.URL)
|
||||
if _, err := orch.TestHookKeycloakAdminToken(context.Background(), client, authMissing, "admin", "pw"); err == nil {
|
||||
t.Fatalf("expected missing access_token branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakRobotUserID(context.Background(), client, authMissing, "token"); err == nil {
|
||||
t.Fatalf("expected missing robot user branch")
|
||||
}
|
||||
if _, err := orch.TestHookKeycloakImpersonationRedirect(context.Background(), client, authMissing, "token", "robot"); err == nil {
|
||||
t.Fatalf("expected impersonation non-2xx branch")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHookServiceChecklistProbeBranches runs one orchestration or CLI step.
|
||||
// Signature: TestHookServiceChecklistProbeBranches(t *testing.T).
|
||||
// Why: exercises redirect + final-url probe branches, including robot-auth
|
||||
// initialization failures and redirect suppression behavior.
|
||||
func TestHookServiceChecklistProbeBranches(t *testing.T) {
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth.Mode = "none"
|
||||
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
||||
if _, _, _, _, err := orch.TestHookHTTPChecklistProbeWithLocation(context.Background(), config.ServiceChecklistCheck{
|
||||
URL: "https://example.invalid/",
|
||||
RequireRobotAuth: true,
|
||||
TimeoutSeconds: 1,
|
||||
}); err == nil {
|
||||
t.Fatalf("expected robot auth initialization failure when mode=none")
|
||||
}
|
||||
|
||||
redirectServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, "/next", http.StatusFound)
|
||||
}))
|
||||
defer redirectServer.Close()
|
||||
|
||||
orchNoAuth, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
|
||||
status, _, location, finalURL, err := orchNoAuth.TestHookHTTPChecklistProbeWithLocation(context.Background(), config.ServiceChecklistCheck{
|
||||
URL: redirectServer.URL,
|
||||
FollowRedirects: false,
|
||||
TimeoutSeconds: 2,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected redirect probe error: %v", err)
|
||||
}
|
||||
if status != http.StatusFound {
|
||||
t.Fatalf("expected 302 status when redirects disabled, got %d", status)
|
||||
}
|
||||
if !strings.Contains(location, "/next") {
|
||||
t.Fatalf("expected location header for redirect response, got %q", location)
|
||||
}
|
||||
if !strings.Contains(finalURL, redirectServer.URL) {
|
||||
t.Fatalf("expected final URL to remain original request URL, got %q", finalURL)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHookAuthenticateRobotChecklistSessionFailureStages runs one orchestration or CLI step.
|
||||
// Signature: TestHookAuthenticateRobotChecklistSessionFailureStages(t *testing.T).
|
||||
// Why: drives authenticateRobotChecklistSession through downstream error stages
|
||||
// (robot lookup, impersonation, redirect-build, redirect-request) to maintain
|
||||
// resilient startup diagnostics.
|
||||
func TestHookAuthenticateRobotChecklistSessionFailureStages(t *testing.T) {
|
||||
client := &http.Client{Timeout: 3 * time.Second}
|
||||
recorder := &commandRecorder{}
|
||||
base := lifecycleDispatcher(recorder)
|
||||
secretRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
command := name + " " + strings.Join(args, " ")
|
||||
if name == "kubectl" && strings.Contains(command, "-n sso get secret keycloak-admin -o json") {
|
||||
return testSecretJSON("admin", "password"), nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
|
||||
t.Run("robot-user lookup failure", func(t *testing.T) {
|
||||
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||
case strings.Contains(r.URL.Path, "/users"):
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
_, _ = w.Write([]byte(`{"error":"lookup failed"}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
}))
|
||||
defer kc.Close()
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected robot-user lookup failure branch")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("impersonation failure", func(t *testing.T) {
|
||||
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||
case strings.Contains(r.URL.Path, "/users"):
|
||||
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
_, _ = w.Write([]byte(`{"error":"impersonation failed"}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
}))
|
||||
defer kc.Close()
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected impersonation failure branch")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("redirect url build failure", func(t *testing.T) {
|
||||
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||
case strings.Contains(r.URL.Path, "/users"):
|
||||
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||
_, _ = w.Write([]byte(`{"redirect":"://bad"}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
}))
|
||||
defer kc.Close()
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected redirect request-build failure branch")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("redirect request failure", func(t *testing.T) {
|
||||
kc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "/token"):
|
||||
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||
case strings.Contains(r.URL.Path, "/users"):
|
||||
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||
case strings.Contains(r.URL.Path, "/impersonation"):
|
||||
_, _ = w.Write([]byte(`{"redirect":"http://127.0.0.1:1/nowhere"}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
}))
|
||||
defer kc.Close()
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth = authSettings(kc.URL)
|
||||
orch, _ := newHookOrchestrator(t, cfg, secretRun, secretRun)
|
||||
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), client); err == nil {
|
||||
t.Fatalf("expected redirect request failure branch")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestHookServiceAuthFallbackRedirect runs one orchestration or CLI step.
|
||||
// Signature: TestHookServiceAuthFallbackRedirect(t *testing.T).
|
||||
// Why: covers empty impersonation redirect fallback to realm account URL so
|
||||
// session bootstrap is resilient to Keycloak response shape differences.
|
||||
func TestHookServiceAuthFallbackRedirect(t *testing.T) {
|
||||
kcMux := http.NewServeMux()
|
||||
kcMux.HandleFunc("/realms/master/protocol/openid-connect/token", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"access_token":"admin-token"}`))
|
||||
})
|
||||
kcMux.HandleFunc("/admin/realms/atlas/users", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`[{"id":"robot-id"}]`))
|
||||
})
|
||||
kcMux.HandleFunc("/admin/realms/atlas/users/robot-id/impersonation", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"redirect":""}`))
|
||||
})
|
||||
kcMux.HandleFunc("/realms/atlas/account/", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("account ok"))
|
||||
})
|
||||
kcServer := httptest.NewTLSServer(kcMux)
|
||||
defer kcServer.Close()
|
||||
|
||||
cfg := lifecycleConfig(t)
|
||||
cfg.Startup.ServiceChecklistAuth = authSettings(kcServer.URL)
|
||||
recorder := &commandRecorder{}
|
||||
base := lifecycleDispatcher(recorder)
|
||||
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
||||
command := name + " " + strings.Join(args, " ")
|
||||
if name == "kubectl" && strings.Contains(command, "-n sso get secret keycloak-admin -o json") {
|
||||
return testSecretJSON("admin", "password"), nil
|
||||
}
|
||||
return base(ctx, timeout, name, args...)
|
||||
}
|
||||
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
||||
if err := orch.TestHookAuthenticateRobotChecklistSession(context.Background(), &http.Client{Timeout: 4 * time.Second, Transport: &http.Transport{}}); err == nil {
|
||||
t.Fatalf("expected auth bootstrap without TLS skip to fail against TLS test server")
|
||||
}
|
||||
if _, err := orch.TestHookChecklistAuthHTTPClient(context.Background(), 4*time.Second, true); err != nil {
|
||||
t.Fatalf("expected checklist auth client fallback redirect path success, got %v", err)
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user