package cluster import ( "context" "crypto/tls" "encoding/base64" "encoding/json" "fmt" "io" "net/http" "net/http/cookiejar" neturl "net/url" "strings" "time" "scm.bstein.dev/bstein/ananke/internal/config" ) type keycloakTokenResponse struct { AccessToken string `json:"access_token"` } type keycloakUser struct { ID string `json:"id"` } type keycloakImpersonationResponse struct { Redirect string `json:"redirect"` } type kubernetesSecret struct { Data map[string]string `json:"data"` } // checklistAuthHTTPClient runs one orchestration or CLI step. // Signature: (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error). // Why: startup checklist checks that require real user behavior need an // authenticated robotuser browser-like session before probing service pages. func (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) { jar, err := cookiejar.New(nil) if err != nil { return nil, fmt.Errorf("create cookie jar: %w", err) } transport := &http.Transport{} if insecureSkipTLS { transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } client := &http.Client{ Timeout: timeout, Transport: transport, Jar: jar, } if err := o.authenticateRobotChecklistSession(ctx, client); err != nil { return nil, err } return client, nil } // authenticateRobotChecklistSession runs one orchestration or CLI step. // Signature: (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error. // Why: authenticated checklist probes must reflect what a human sees after // Keycloak login, not only pre-auth redirects. func (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error { auth := o.cfg.Startup.ServiceChecklistAuth mode := strings.TrimSpace(auth.Mode) if mode == "" || mode == "none" { return fmt.Errorf("startup checklist auth mode is disabled") } if mode != "keycloak_robotuser" { return fmt.Errorf("unsupported startup checklist auth mode %q", mode) } adminUser, adminPassword, err := o.keycloakAdminCredentials(ctx, auth) if err != nil { return err } adminToken, err := o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword) if err != nil { return err } robotUserID, err := o.keycloakRobotUserID(ctx, client, auth, adminToken) if err != nil { return err } redirectURL, err := o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID) if err != nil { return err } if strings.TrimSpace(redirectURL) == "" { redirectURL = keycloakBaseURL(auth) + "/realms/" + strings.TrimSpace(auth.Realm) + "/account/" } req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirectURL, nil) if err != nil { return fmt.Errorf("build robot redirect request: %w", err) } req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return fmt.Errorf("initialize robot session redirect: %w", err) } defer resp.Body.Close() _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024)) return nil } // keycloakAdminCredentials runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error). // Why: robotuser impersonation uses a cluster-managed admin secret so startup // checks do not rely on interactive credentials. func (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) { namespace := strings.TrimSpace(auth.AdminSecretNamespace) name := strings.TrimSpace(auth.AdminSecretName) userKey := strings.TrimSpace(auth.AdminSecretUsernameKey) passwordKey := strings.TrimSpace(auth.AdminSecretPasswordKey) username, err := o.kubernetesSecretValue(ctx, namespace, name, userKey) if err != nil { return "", "", fmt.Errorf("read keycloak admin username from secret %s/%s: %w", namespace, name, err) } password, err := o.kubernetesSecretValue(ctx, namespace, name, passwordKey) if err != nil { return "", "", fmt.Errorf("read keycloak admin password from secret %s/%s: %w", namespace, name, err) } return username, password, nil } // kubernetesSecretValue runs one orchestration or CLI step. // Signature: (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error). // Why: checklist auth depends on secret-backed credentials and should decode // them directly from Kubernetes rather than shelling out to external tools. func (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) { out, err := o.kubectl(ctx, 25*time.Second, "-n", namespace, "get", "secret", name, "-o", "json") if err != nil { return "", fmt.Errorf("kubectl get secret: %w", err) } var doc kubernetesSecret if err := json.Unmarshal([]byte(out), &doc); err != nil { return "", fmt.Errorf("decode secret json: %w", err) } encoded, ok := doc.Data[key] if !ok { return "", fmt.Errorf("key %q not present in secret", key) } decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(encoded)) if err != nil { return "", fmt.Errorf("decode base64 secret value: %w", err) } value := strings.TrimSpace(string(decoded)) if value == "" { return "", fmt.Errorf("decoded value is empty") } return value, nil } // keycloakAdminToken runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error). // Why: admin API access is needed to impersonate robotuser for deterministic // user-journey checks across OIDC-gated services. func (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) { form := neturl.Values{} form.Set("grant_type", "password") form.Set("client_id", "admin-cli") form.Set("username", adminUser) form.Set("password", adminPassword) tokenURL := keycloakBaseURL(auth) + "/realms/master/protocol/openid-connect/token" req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(form.Encode())) if err != nil { return "", fmt.Errorf("build admin token request: %w", err) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request admin token: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("admin token request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } var payload keycloakTokenResponse if err := json.Unmarshal(body, &payload); err != nil { return "", fmt.Errorf("decode admin token response: %w", err) } token := strings.TrimSpace(payload.AccessToken) if token == "" { return "", fmt.Errorf("admin token response missing access_token") } return token, nil } // keycloakRobotUserID runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error). // Why: impersonation requires the concrete user id and should fail fast when // robotuser is missing from the realm. func (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) { base := keycloakBaseURL(auth) realm := strings.TrimSpace(auth.Realm) username := strings.TrimSpace(auth.RobotUsername) query := neturl.Values{} query.Set("username", username) query.Set("exact", "true") usersURL := base + "/admin/realms/" + realm + "/users?" + query.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, usersURL, nil) if err != nil { return "", fmt.Errorf("build robot user lookup request: %w", err) } req.Header.Set("Authorization", "Bearer "+adminToken) req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("lookup robot user: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("robot user lookup failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } var users []keycloakUser if err := json.Unmarshal(body, &users); err != nil { return "", fmt.Errorf("decode robot user lookup response: %w", err) } if len(users) == 0 || strings.TrimSpace(users[0].ID) == "" { return "", fmt.Errorf("robot user %q not found in realm %q", username, realm) } return strings.TrimSpace(users[0].ID), nil } // keycloakImpersonationRedirect runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error). // Why: opening a real impersonated browser session guarantees checks evaluate // post-login app behavior instead of only auth-gateway redirects. func (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) { base := keycloakBaseURL(auth) realm := strings.TrimSpace(auth.Realm) impersonateURL := base + "/admin/realms/" + realm + "/users/" + strings.TrimSpace(robotUserID) + "/impersonation" req, err := http.NewRequestWithContext(ctx, http.MethodPost, impersonateURL, http.NoBody) if err != nil { return "", fmt.Errorf("build robot impersonation request: %w", err) } req.Header.Set("Authorization", "Bearer "+adminToken) req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request robot impersonation: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("robot impersonation failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } var payload keycloakImpersonationResponse if err := json.Unmarshal(body, &payload); err != nil { return "", fmt.Errorf("decode robot impersonation response: %w", err) } return strings.TrimSpace(payload.Redirect), nil } // keycloakBaseURL runs one orchestration or CLI step. // Signature: keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string. // Why: centralizing URL normalization keeps auth request construction stable. func keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string { return strings.TrimRight(strings.TrimSpace(auth.KeycloakBaseURL), "/") } // compactHTTPBody runs one orchestration or CLI step. // Signature: compactHTTPBody(raw []byte) string. // Why: checklist auth errors should include a readable body summary without // leaking multi-line payload noise into orchestrator logs. func compactHTTPBody(raw []byte) string { text := strings.TrimSpace(string(raw)) if text == "" { return "" } return strings.Join(strings.Fields(text), " ") }