package cluster import ( "context" "crypto/tls" "encoding/base64" "encoding/json" "fmt" "html" "io" "net/http" "net/http/cookiejar" neturl "net/url" "regexp" "strings" "time" "scm.bstein.dev/bstein/ananke/internal/config" ) type keycloakTokenResponse struct { AccessToken string `json:"access_token"` } type keycloakUser struct { ID string `json:"id"` } type keycloakImpersonationResponse struct { Redirect string `json:"redirect"` } type kubernetesSecret struct { Data map[string]string `json:"data"` } var keycloakLoginFormActionPattern = regexp.MustCompile(`(?is)]*id=["']kc-form-login["'][^>]*action=["']([^"']+)["']`) // checklistAuthHTTPClient runs one orchestration or CLI step. // Signature: (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error). // Why: startup checklist checks that require real user behavior need an // authenticated robotuser browser-like session before probing service pages. func (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) { jar, _ := cookiejar.New(nil) transport := &http.Transport{} if insecureSkipTLS { transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } client := &http.Client{ Timeout: timeout, Transport: transport, Jar: jar, } if err := o.authenticateRobotChecklistSession(ctx, client); err != nil { return nil, err } return client, nil } // authenticateRobotChecklistSession runs one orchestration or CLI step. // Signature: (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error. // Why: authenticated checklist probes must reflect what a human sees after // Keycloak login, not only pre-auth redirects. func (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error { auth := o.cfg.Startup.ServiceChecklistAuth mode := strings.TrimSpace(auth.Mode) if mode == "" || mode == "none" { return fmt.Errorf("startup checklist auth mode is disabled") } if mode != "keycloak_robotuser" { return fmt.Errorf("unsupported startup checklist auth mode %q", mode) } adminUser, adminPassword, err := o.keycloakAdminCredentials(ctx, auth) if err != nil { return err } if err := o.keycloakAdminBrowserLogin(ctx, client, auth, adminUser, adminPassword); err != nil { return fmt.Errorf("initialize keycloak admin browser session: %w", err) } adminToken, err := o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword) if err != nil { return err } robotUserID, err := o.keycloakRobotUserID(ctx, client, auth, adminToken) if err != nil { return err } redirectURL, err := o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID) if err != nil { return err } if strings.TrimSpace(redirectURL) == "" { redirectURL = keycloakBaseURL(auth) + "/realms/" + strings.TrimSpace(auth.Realm) + "/account/" } req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirectURL, nil) if err != nil { return fmt.Errorf("build robot redirect request: %w", err) } req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return fmt.Errorf("initialize robot session redirect: %w", err) } defer resp.Body.Close() _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024)) finalURL := "" if resp.Request != nil && resp.Request.URL != nil { finalURL = strings.TrimSpace(resp.Request.URL.String()) } if strings.Contains(finalURL, "/protocol/openid-connect/auth") || strings.Contains(finalURL, "/login-actions/authenticate") { return fmt.Errorf("robot session bootstrap ended on keycloak login flow: %s", finalURL) } return nil } // keycloakAdminBrowserLogin runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakAdminBrowserLogin(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) error. // Why: Keycloak impersonation only yields a usable robot session cookie when the // client already has a real admin browser session; token-only API calls are not // sufficient for downstream OIDC-gated service checks. func (o *Orchestrator) keycloakAdminBrowserLogin(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) error { baseURL := keycloakBaseURL(auth) authURL := baseURL + "/realms/master/protocol/openid-connect/auth?" + keycloakAdminConsoleAuthQuery(baseURL).Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, authURL, nil) if err != nil { return fmt.Errorf("build keycloak admin auth request: %w", err) } req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return fmt.Errorf("request keycloak admin auth page: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) if resp.StatusCode/100 != 2 { return fmt.Errorf("keycloak admin auth page request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } actionURL, err := keycloakLoginFormAction(string(body), baseURL) if err != nil { return err } form := neturl.Values{} form.Set("username", adminUser) form.Set("password", adminPassword) form.Set("credentialId", "") loginReq, err := http.NewRequestWithContext(ctx, http.MethodPost, actionURL, strings.NewReader(form.Encode())) if err != nil { return fmt.Errorf("build keycloak admin login request: %w", err) } loginReq.Header.Set("Content-Type", "application/x-www-form-urlencoded") loginReq.Header.Set("User-Agent", "ananke/startup-checklist") loginResp, err := client.Do(loginReq) if err != nil { return fmt.Errorf("request keycloak admin login submit: %w", err) } defer loginResp.Body.Close() loginBody, _ := io.ReadAll(io.LimitReader(loginResp.Body, 512*1024)) finalURL := "" if loginResp.Request != nil && loginResp.Request.URL != nil { finalURL = strings.TrimSpace(loginResp.Request.URL.String()) } if loginResp.StatusCode >= 500 { return fmt.Errorf("keycloak admin login failed status=%d body=%q", loginResp.StatusCode, compactHTTPBody(loginBody)) } if strings.Contains(finalURL, "/login-actions/authenticate") || strings.Contains(finalURL, "/protocol/openid-connect/auth") { return fmt.Errorf("keycloak admin login did not complete (final_url=%q)", finalURL) } if strings.Contains(strings.ToLower(string(loginBody)), "kc-form-login") { return fmt.Errorf("keycloak admin login form still present after submit") } return nil } // keycloakAdminCredentials runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error). // Why: robotuser impersonation uses a cluster-managed admin secret so startup // checks do not rely on interactive credentials. func (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) { namespace := strings.TrimSpace(auth.AdminSecretNamespace) name := strings.TrimSpace(auth.AdminSecretName) userKey := strings.TrimSpace(auth.AdminSecretUsernameKey) passwordKey := strings.TrimSpace(auth.AdminSecretPasswordKey) username, err := o.kubernetesSecretValue(ctx, namespace, name, userKey) if err != nil { return "", "", fmt.Errorf("read keycloak admin username from secret %s/%s: %w", namespace, name, err) } password, err := o.kubernetesSecretValue(ctx, namespace, name, passwordKey) if err != nil { return "", "", fmt.Errorf("read keycloak admin password from secret %s/%s: %w", namespace, name, err) } return username, password, nil } // kubernetesSecretValue runs one orchestration or CLI step. // Signature: (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error). // Why: checklist auth depends on secret-backed credentials and should decode // them directly from Kubernetes rather than shelling out to external tools. func (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) { out, err := o.kubectl(ctx, 25*time.Second, "-n", namespace, "get", "secret", name, "-o", "json") if err != nil { return "", fmt.Errorf("kubectl get secret: %w", err) } var doc kubernetesSecret if err := json.Unmarshal([]byte(out), &doc); err != nil { return "", fmt.Errorf("decode secret json: %w", err) } encoded, ok := doc.Data[key] if !ok { return "", fmt.Errorf("key %q not present in secret", key) } decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(encoded)) if err != nil { return "", fmt.Errorf("decode base64 secret value: %w", err) } value := strings.TrimSpace(string(decoded)) if value == "" { return "", fmt.Errorf("decoded value is empty") } return value, nil } // keycloakAdminToken runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error). // Why: admin API access is needed to impersonate robotuser for deterministic // user-journey checks across OIDC-gated services. func (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) { form := neturl.Values{} form.Set("grant_type", "password") form.Set("client_id", "admin-cli") form.Set("username", adminUser) form.Set("password", adminPassword) tokenURL := keycloakBaseURL(auth) + "/realms/master/protocol/openid-connect/token" req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(form.Encode())) if err != nil { return "", fmt.Errorf("build admin token request: %w", err) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request admin token: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("admin token request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } var payload keycloakTokenResponse if err := json.Unmarshal(body, &payload); err != nil { return "", fmt.Errorf("decode admin token response: %w", err) } token := strings.TrimSpace(payload.AccessToken) if token == "" { return "", fmt.Errorf("admin token response missing access_token") } return token, nil } // keycloakRobotUserID runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error). // Why: impersonation requires the concrete user id and should fail fast when // robotuser is missing from the realm. func (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) { base := keycloakBaseURL(auth) realm := strings.TrimSpace(auth.Realm) username := strings.TrimSpace(auth.RobotUsername) query := neturl.Values{} query.Set("username", username) query.Set("exact", "true") usersURL := base + "/admin/realms/" + realm + "/users?" + query.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, usersURL, nil) if err != nil { return "", fmt.Errorf("build robot user lookup request: %w", err) } req.Header.Set("Authorization", "Bearer "+adminToken) req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("lookup robot user: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("robot user lookup failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } var users []keycloakUser if err := json.Unmarshal(body, &users); err != nil { return "", fmt.Errorf("decode robot user lookup response: %w", err) } if len(users) == 0 || strings.TrimSpace(users[0].ID) == "" { return "", fmt.Errorf("robot user %q not found in realm %q", username, realm) } return strings.TrimSpace(users[0].ID), nil } // keycloakImpersonationRedirect runs one orchestration or CLI step. // Signature: (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error). // Why: opening a real impersonated browser session guarantees checks evaluate // post-login app behavior instead of only auth-gateway redirects. func (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) { base := keycloakBaseURL(auth) realm := strings.TrimSpace(auth.Realm) impersonateURL := base + "/admin/realms/" + realm + "/users/" + strings.TrimSpace(robotUserID) + "/impersonation" req, err := http.NewRequestWithContext(ctx, http.MethodPost, impersonateURL, http.NoBody) if err != nil { return "", fmt.Errorf("build robot impersonation request: %w", err) } req.Header.Set("Authorization", "Bearer "+adminToken) req.Header.Set("User-Agent", "ananke/startup-checklist") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request robot impersonation: %w", err) } defer resp.Body.Close() body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if resp.StatusCode/100 != 2 { return "", fmt.Errorf("robot impersonation failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body)) } var payload keycloakImpersonationResponse if err := json.Unmarshal(body, &payload); err != nil { return "", fmt.Errorf("decode robot impersonation response: %w", err) } return strings.TrimSpace(payload.Redirect), nil } // keycloakBaseURL runs one orchestration or CLI step. // Signature: keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string. // Why: centralizing URL normalization keeps auth request construction stable. func keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string { return strings.TrimRight(strings.TrimSpace(auth.KeycloakBaseURL), "/") } // keycloakAdminConsoleAuthQuery runs one orchestration or CLI step. // Signature: keycloakAdminConsoleAuthQuery() neturl.Values. // Why: centralizes required Keycloak admin-console auth parameters, including // PKCE fields required by current Keycloak defaults. func keycloakAdminConsoleAuthQuery(baseURL string) neturl.Values { query := neturl.Values{} query.Set("client_id", "security-admin-console") query.Set("redirect_uri", strings.TrimRight(strings.TrimSpace(baseURL), "/")+"/admin/master/console/") query.Set("response_type", "code") query.Set("scope", "openid") query.Set("state", "ananke-startup-checklist") query.Set("nonce", "ananke-startup-checklist") query.Set("code_challenge_method", "S256") query.Set("code_challenge", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") return query } // keycloakLoginFormAction runs one orchestration or CLI step. // Signature: keycloakLoginFormAction(page string, baseURL string) (string, error). // Why: Keycloak's login form action carries session-bound query params and must // be parsed from the rendered page before posting credentials. func keycloakLoginFormAction(page string, baseURL string) (string, error) { matches := keycloakLoginFormActionPattern.FindStringSubmatch(page) if len(matches) < 2 || strings.TrimSpace(matches[1]) == "" { return "", fmt.Errorf("keycloak login page missing kc-form-login action") } action := html.UnescapeString(strings.TrimSpace(matches[1])) if strings.HasPrefix(action, "/") { return strings.TrimRight(strings.TrimSpace(baseURL), "/") + action, nil } if strings.HasPrefix(action, "http://") || strings.HasPrefix(action, "https://") { return action, nil } return "", fmt.Errorf("keycloak login action uses unsupported format %q", action) } // compactHTTPBody runs one orchestration or CLI step. // Signature: compactHTTPBody(raw []byte) string. // Why: checklist auth errors should include a readable body summary without // leaking multi-line payload noise into orchestrator logs. func compactHTTPBody(raw []byte) string { text := strings.TrimSpace(string(raw)) if text == "" { return "" } return strings.Join(strings.Fields(text), " ") }