ananke/internal/cluster/orchestrator_service_auth.go

395 lines
16 KiB
Go

package cluster
import (
"context"
"crypto/tls"
"encoding/base64"
"encoding/json"
"fmt"
"html"
"io"
"net/http"
"net/http/cookiejar"
neturl "net/url"
"regexp"
"strings"
"time"
"scm.bstein.dev/bstein/ananke/internal/config"
)
type keycloakTokenResponse struct {
AccessToken string `json:"access_token"`
}
type keycloakUser struct {
ID string `json:"id"`
}
type keycloakImpersonationResponse struct {
Redirect string `json:"redirect"`
}
type kubernetesSecret struct {
Data map[string]string `json:"data"`
}
var keycloakLoginFormActionPattern = regexp.MustCompile(`(?is)<form[^>]*id=["']kc-form-login["'][^>]*action=["']([^"']+)["']`)
// checklistAuthHTTPClient runs one orchestration or CLI step.
// Signature: (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error).
// Why: startup checklist checks that require real user behavior need an
// authenticated robotuser browser-like session before probing service pages.
func (o *Orchestrator) checklistAuthHTTPClient(ctx context.Context, timeout time.Duration, insecureSkipTLS bool) (*http.Client, error) {
jar, _ := cookiejar.New(nil)
transport := &http.Transport{}
if insecureSkipTLS {
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
}
client := &http.Client{
Timeout: timeout,
Transport: transport,
Jar: jar,
}
if err := o.authenticateRobotChecklistSession(ctx, client); err != nil {
return nil, err
}
return client, nil
}
// authenticateRobotChecklistSession runs one orchestration or CLI step.
// Signature: (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error.
// Why: authenticated checklist probes must reflect what a human sees after
// Keycloak login, not only pre-auth redirects.
func (o *Orchestrator) authenticateRobotChecklistSession(ctx context.Context, client *http.Client) error {
auth := o.cfg.Startup.ServiceChecklistAuth
mode := strings.TrimSpace(auth.Mode)
if mode == "" || mode == "none" {
return fmt.Errorf("startup checklist auth mode is disabled")
}
if mode != "keycloak_robotuser" {
return fmt.Errorf("unsupported startup checklist auth mode %q", mode)
}
adminUser, adminPassword, err := o.keycloakAdminCredentials(ctx, auth)
if err != nil {
return err
}
if err := o.keycloakAdminBrowserLogin(ctx, client, auth, adminUser, adminPassword); err != nil {
return fmt.Errorf("initialize keycloak admin browser session: %w", err)
}
adminToken, err := o.keycloakAdminToken(ctx, client, auth, adminUser, adminPassword)
if err != nil {
return err
}
robotUserID, err := o.keycloakRobotUserID(ctx, client, auth, adminToken)
if err != nil {
return err
}
redirectURL, err := o.keycloakImpersonationRedirect(ctx, client, auth, adminToken, robotUserID)
if err != nil {
return err
}
if strings.TrimSpace(redirectURL) == "" {
redirectURL = keycloakBaseURL(auth) + "/realms/" + strings.TrimSpace(auth.Realm) + "/account/"
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirectURL, nil)
if err != nil {
return fmt.Errorf("build robot redirect request: %w", err)
}
req.Header.Set("User-Agent", "ananke/startup-checklist")
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("initialize robot session redirect: %w", err)
}
defer resp.Body.Close()
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024))
finalURL := ""
if resp.Request != nil && resp.Request.URL != nil {
finalURL = strings.TrimSpace(resp.Request.URL.String())
}
if strings.Contains(finalURL, "/protocol/openid-connect/auth") || strings.Contains(finalURL, "/login-actions/authenticate") {
return fmt.Errorf("robot session bootstrap ended on keycloak login flow: %s", finalURL)
}
return nil
}
// keycloakAdminBrowserLogin runs one orchestration or CLI step.
// Signature: (o *Orchestrator) keycloakAdminBrowserLogin(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) error.
// Why: Keycloak impersonation only yields a usable robot session cookie when the
// client already has a real admin browser session; token-only API calls are not
// sufficient for downstream OIDC-gated service checks.
func (o *Orchestrator) keycloakAdminBrowserLogin(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) error {
baseURL := keycloakBaseURL(auth)
authURL := baseURL + "/realms/master/protocol/openid-connect/auth?" + keycloakAdminConsoleAuthQuery(baseURL).Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, authURL, nil)
if err != nil {
return fmt.Errorf("build keycloak admin auth request: %w", err)
}
req.Header.Set("User-Agent", "ananke/startup-checklist")
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("request keycloak admin auth page: %w", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
if resp.StatusCode/100 != 2 {
return fmt.Errorf("keycloak admin auth page request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
}
actionURL, err := keycloakLoginFormAction(string(body), baseURL)
if err != nil {
return err
}
form := neturl.Values{}
form.Set("username", adminUser)
form.Set("password", adminPassword)
form.Set("credentialId", "")
loginReq, err := http.NewRequestWithContext(ctx, http.MethodPost, actionURL, strings.NewReader(form.Encode()))
if err != nil {
return fmt.Errorf("build keycloak admin login request: %w", err)
}
loginReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
loginReq.Header.Set("User-Agent", "ananke/startup-checklist")
loginResp, err := client.Do(loginReq)
if err != nil {
return fmt.Errorf("request keycloak admin login submit: %w", err)
}
defer loginResp.Body.Close()
loginBody, _ := io.ReadAll(io.LimitReader(loginResp.Body, 512*1024))
finalURL := ""
if loginResp.Request != nil && loginResp.Request.URL != nil {
finalURL = strings.TrimSpace(loginResp.Request.URL.String())
}
if loginResp.StatusCode >= 500 {
return fmt.Errorf("keycloak admin login failed status=%d body=%q", loginResp.StatusCode, compactHTTPBody(loginBody))
}
if strings.Contains(finalURL, "/login-actions/authenticate") || strings.Contains(finalURL, "/protocol/openid-connect/auth") {
return fmt.Errorf("keycloak admin login did not complete (final_url=%q)", finalURL)
}
if strings.Contains(strings.ToLower(string(loginBody)), "kc-form-login") {
return fmt.Errorf("keycloak admin login form still present after submit")
}
return nil
}
// keycloakAdminCredentials runs one orchestration or CLI step.
// Signature: (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error).
// Why: robotuser impersonation uses a cluster-managed admin secret so startup
// checks do not rely on interactive credentials.
func (o *Orchestrator) keycloakAdminCredentials(ctx context.Context, auth config.ServiceChecklistAuthSettings) (string, string, error) {
namespace := strings.TrimSpace(auth.AdminSecretNamespace)
name := strings.TrimSpace(auth.AdminSecretName)
userKey := strings.TrimSpace(auth.AdminSecretUsernameKey)
passwordKey := strings.TrimSpace(auth.AdminSecretPasswordKey)
username, err := o.kubernetesSecretValue(ctx, namespace, name, userKey)
if err != nil {
return "", "", fmt.Errorf("read keycloak admin username from secret %s/%s: %w", namespace, name, err)
}
password, err := o.kubernetesSecretValue(ctx, namespace, name, passwordKey)
if err != nil {
return "", "", fmt.Errorf("read keycloak admin password from secret %s/%s: %w", namespace, name, err)
}
return username, password, nil
}
// kubernetesSecretValue runs one orchestration or CLI step.
// Signature: (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error).
// Why: checklist auth depends on secret-backed credentials and should decode
// them directly from Kubernetes rather than shelling out to external tools.
func (o *Orchestrator) kubernetesSecretValue(ctx context.Context, namespace string, name string, key string) (string, error) {
out, err := o.kubectl(ctx, 25*time.Second, "-n", namespace, "get", "secret", name, "-o", "json")
if err != nil {
return "", fmt.Errorf("kubectl get secret: %w", err)
}
var doc kubernetesSecret
if err := json.Unmarshal([]byte(out), &doc); err != nil {
return "", fmt.Errorf("decode secret json: %w", err)
}
encoded, ok := doc.Data[key]
if !ok {
return "", fmt.Errorf("key %q not present in secret", key)
}
decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(encoded))
if err != nil {
return "", fmt.Errorf("decode base64 secret value: %w", err)
}
value := strings.TrimSpace(string(decoded))
if value == "" {
return "", fmt.Errorf("decoded value is empty")
}
return value, nil
}
// keycloakAdminToken runs one orchestration or CLI step.
// Signature: (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error).
// Why: admin API access is needed to impersonate robotuser for deterministic
// user-journey checks across OIDC-gated services.
func (o *Orchestrator) keycloakAdminToken(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminUser string, adminPassword string) (string, error) {
form := neturl.Values{}
form.Set("grant_type", "password")
form.Set("client_id", "admin-cli")
form.Set("username", adminUser)
form.Set("password", adminPassword)
tokenURL := keycloakBaseURL(auth) + "/realms/master/protocol/openid-connect/token"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(form.Encode()))
if err != nil {
return "", fmt.Errorf("build admin token request: %w", err)
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("User-Agent", "ananke/startup-checklist")
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("request admin token: %w", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
if resp.StatusCode/100 != 2 {
return "", fmt.Errorf("admin token request failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
}
var payload keycloakTokenResponse
if err := json.Unmarshal(body, &payload); err != nil {
return "", fmt.Errorf("decode admin token response: %w", err)
}
token := strings.TrimSpace(payload.AccessToken)
if token == "" {
return "", fmt.Errorf("admin token response missing access_token")
}
return token, nil
}
// keycloakRobotUserID runs one orchestration or CLI step.
// Signature: (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error).
// Why: impersonation requires the concrete user id and should fail fast when
// robotuser is missing from the realm.
func (o *Orchestrator) keycloakRobotUserID(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string) (string, error) {
base := keycloakBaseURL(auth)
realm := strings.TrimSpace(auth.Realm)
username := strings.TrimSpace(auth.RobotUsername)
query := neturl.Values{}
query.Set("username", username)
query.Set("exact", "true")
usersURL := base + "/admin/realms/" + realm + "/users?" + query.Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, usersURL, nil)
if err != nil {
return "", fmt.Errorf("build robot user lookup request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+adminToken)
req.Header.Set("User-Agent", "ananke/startup-checklist")
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("lookup robot user: %w", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
if resp.StatusCode/100 != 2 {
return "", fmt.Errorf("robot user lookup failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
}
var users []keycloakUser
if err := json.Unmarshal(body, &users); err != nil {
return "", fmt.Errorf("decode robot user lookup response: %w", err)
}
if len(users) == 0 || strings.TrimSpace(users[0].ID) == "" {
return "", fmt.Errorf("robot user %q not found in realm %q", username, realm)
}
return strings.TrimSpace(users[0].ID), nil
}
// keycloakImpersonationRedirect runs one orchestration or CLI step.
// Signature: (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error).
// Why: opening a real impersonated browser session guarantees checks evaluate
// post-login app behavior instead of only auth-gateway redirects.
func (o *Orchestrator) keycloakImpersonationRedirect(ctx context.Context, client *http.Client, auth config.ServiceChecklistAuthSettings, adminToken string, robotUserID string) (string, error) {
base := keycloakBaseURL(auth)
realm := strings.TrimSpace(auth.Realm)
impersonateURL := base + "/admin/realms/" + realm + "/users/" + strings.TrimSpace(robotUserID) + "/impersonation"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, impersonateURL, http.NoBody)
if err != nil {
return "", fmt.Errorf("build robot impersonation request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+adminToken)
req.Header.Set("User-Agent", "ananke/startup-checklist")
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("request robot impersonation: %w", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
if resp.StatusCode/100 != 2 {
return "", fmt.Errorf("robot impersonation failed status=%d body=%q", resp.StatusCode, compactHTTPBody(body))
}
var payload keycloakImpersonationResponse
if err := json.Unmarshal(body, &payload); err != nil {
return "", fmt.Errorf("decode robot impersonation response: %w", err)
}
return strings.TrimSpace(payload.Redirect), nil
}
// keycloakBaseURL runs one orchestration or CLI step.
// Signature: keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string.
// Why: centralizing URL normalization keeps auth request construction stable.
func keycloakBaseURL(auth config.ServiceChecklistAuthSettings) string {
return strings.TrimRight(strings.TrimSpace(auth.KeycloakBaseURL), "/")
}
// keycloakAdminConsoleAuthQuery runs one orchestration or CLI step.
// Signature: keycloakAdminConsoleAuthQuery() neturl.Values.
// Why: centralizes required Keycloak admin-console auth parameters, including
// PKCE fields required by current Keycloak defaults.
func keycloakAdminConsoleAuthQuery(baseURL string) neturl.Values {
query := neturl.Values{}
query.Set("client_id", "security-admin-console")
query.Set("redirect_uri", strings.TrimRight(strings.TrimSpace(baseURL), "/")+"/admin/master/console/")
query.Set("response_type", "code")
query.Set("scope", "openid")
query.Set("state", "ananke-startup-checklist")
query.Set("nonce", "ananke-startup-checklist")
query.Set("code_challenge_method", "S256")
query.Set("code_challenge", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
return query
}
// keycloakLoginFormAction runs one orchestration or CLI step.
// Signature: keycloakLoginFormAction(page string, baseURL string) (string, error).
// Why: Keycloak's login form action carries session-bound query params and must
// be parsed from the rendered page before posting credentials.
func keycloakLoginFormAction(page string, baseURL string) (string, error) {
matches := keycloakLoginFormActionPattern.FindStringSubmatch(page)
if len(matches) < 2 || strings.TrimSpace(matches[1]) == "" {
return "", fmt.Errorf("keycloak login page missing kc-form-login action")
}
action := html.UnescapeString(strings.TrimSpace(matches[1]))
if strings.HasPrefix(action, "/") {
return strings.TrimRight(strings.TrimSpace(baseURL), "/") + action, nil
}
if strings.HasPrefix(action, "http://") || strings.HasPrefix(action, "https://") {
return action, nil
}
return "", fmt.Errorf("keycloak login action uses unsupported format %q", action)
}
// compactHTTPBody runs one orchestration or CLI step.
// Signature: compactHTTPBody(raw []byte) string.
// Why: checklist auth errors should include a readable body summary without
// leaking multi-line payload noise into orchestrator logs.
func compactHTTPBody(raw []byte) string {
text := strings.TrimSpace(string(raw))
if text == "" {
return ""
}
return strings.Join(strings.Fields(text), " ")
}