test(atlasbot): rename support coverage tests

security(atlasbot): run images as non-root
ci(atlasbot): use preloaded quality scanner image
2026-04-22 05:02:21 -03:00 · 2026-04-22 00:00:09 -03:00 · 2026-04-21 22:50:19 -03:00 · 2026-04-21 22:17:55 -03:00 · 2026-04-21 22:09:06 -03:00 · 2026-04-21 20:16:25 -03:00
48 changed files with 10313 additions and 5744 deletions
--- a/17
+++ b/17
@ -1,19 +1,28 @@
-FROM python:3.12-slim AS base
+FROM registry.bstein.dev/bstein/python:3.12-slim AS base

 ENV PYTHONDONTWRITEBYTECODE=1 \
-    PYTHONUNBUFFERED=1
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_DEFAULT_TIMEOUT=60

 WORKDIR /app
 COPY requirements.txt /app/requirements.txt
-COPY requirements-dev.txt /app/requirements-dev.txt
-RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
+COPY pyproject.toml /app/pyproject.toml
+RUN pip install --no-cache-dir --retries 10 -r /app/requirements.txt

 COPY atlasbot /app/atlasbot
+RUN addgroup --system atlasbot && \
+    adduser --system --ingroup atlasbot --home /app atlasbot && \
+    chown -R atlasbot:atlasbot /app

 FROM base AS test
+COPY requirements-dev.txt /app/requirements-dev.txt
+RUN pip install --no-cache-dir --retries 10 -r /app/requirements-dev.txt
+COPY testing /app/testing
 COPY tests /app/tests
 COPY scripts /app/scripts

 FROM base AS runtime
 EXPOSE 8090
+USER atlasbot
 CMD ["python", "-m", "atlasbot.main"]
--- a/Dockerfile.base
+++ b/Dockerfile.base
@ -6,4 +6,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 WORKDIR /app
 COPY requirements.txt /app/requirements.txt
 COPY requirements-dev.txt /app/requirements-dev.txt
-RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt && \
+    addgroup --system atlasbot && \
+    adduser --system --ingroup atlasbot --home /app atlasbot && \
+    chown -R atlasbot:atlasbot /app
+
+USER atlasbot
--- a/227
+++ b/227
@ -11,7 +11,7 @@ spec:
    node-role.kubernetes.io/worker: "true"
  containers:
    - name: dind
-      image: docker:27-dind
+      image: registry.bstein.dev/bstein/docker:27-dind
      securityContext:
        privileged: true
      env:
@ -21,13 +21,14 @@ spec:
        - "--mtu=1400"
        - "--host=unix:///var/run/docker.sock"
        - "--host=tcp://0.0.0.0:2375"
+        - "--tls=false"
      volumeMounts:
        - name: dind-storage
          mountPath: /var/lib/docker
        - name: workspace-volume
          mountPath: /home/jenkins/agent
    - name: builder
-      image: docker:27
+      image: registry.bstein.dev/bstein/docker:27
      command:
        - cat
      tty: true
@ -44,7 +45,15 @@ spec:
        - name: harbor-config
          mountPath: /docker-config
    - name: tester
-      image: python:3.12-slim
+      image: registry.bstein.dev/bstein/python:3.12-slim
+      command:
+        - cat
+      tty: true
+      volumeMounts:
+        - name: workspace-volume
+          mountPath: /home/jenkins/agent
+    - name: quality-tools
+      image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
      command:
        - cat
      tty: true
@ -72,6 +81,15 @@ spec:
    PYTHONUNBUFFERED = '1'
    SUITE_NAME = 'atlasbot'
    PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
+    SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
+    SONARQUBE_PROJECT_KEY = 'atlasbot'
+    SONARQUBE_TOKEN = credentials('sonarqube-token')
+    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
+    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
+  }
+  options {
+    disableConcurrentBuilds()
+    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
@ -79,6 +97,120 @@ spec:
        checkout scm
      }
    }
+    stage('Collect SonarQube evidence') {
+      steps {
+        container('quality-tools') {
+          sh '''#!/usr/bin/env bash
+            set -euo pipefail
+            mkdir -p build
+            args=(
+              "-Dsonar.host.url=${SONARQUBE_HOST_URL}"
+              "-Dsonar.login=${SONARQUBE_TOKEN}"
+              "-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
+              "-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
+              "-Dsonar.sources=."
+              "-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**"
+              "-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
+            )
+            [ -f build/coverage.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage.xml")
+            set +e
+            sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
+            rc=${PIPESTATUS[0]}
+            set -e
+            printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
+          '''
+        }
+        container('tester') {
+          sh '''
+            set -euo pipefail
+            mkdir -p build
+            python3 - <<'PY'
+import base64
+import json
+import os
+import urllib.parse
+import urllib.request
+
+host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
+project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
+token = os.getenv('SONARQUBE_TOKEN', '').strip()
+report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
+payload = {"status": "ERROR", "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY"}
+if host and project_key:
+    query = urllib.parse.urlencode({"projectKey": project_key})
+    request = urllib.request.Request(f"{host}/api/qualitygates/project_status?{query}", method="GET")
+    if token:
+        encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
+        request.add_header("Authorization", f"Basic {encoded}")
+    try:
+        with urllib.request.urlopen(request, timeout=12) as response:
+            payload = json.loads(response.read().decode("utf-8"))
+    except Exception as exc:  # noqa: BLE001
+        payload = {"status": "ERROR", "error": str(exc)}
+with open(report_path, "w", encoding="utf-8") as handle:
+    json.dump(payload, handle, indent=2, sort_keys=True)
+    handle.write("\\n")
+PY
+          '''
+        }
+      }
+    }
+    stage('Collect Supply Chain evidence') {
+      steps {
+        container('quality-tools') {
+          sh '''#!/usr/bin/env bash
+            set -euo pipefail
+            mkdir -p build
+            set +e
+            trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
+            trivy_rc=$?
+            set -e
+            if [ ! -s build/trivy-fs.json ]; then
+              cat > build/ironbank-compliance.json <<EOF
+{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
+EOF
+              exit 0
+            fi
+            critical="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' build/trivy-fs.json)"
+            high="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="HIGH")] | length' build/trivy-fs.json)"
+            secrets="$(jq '[.Results[]? | .Secrets[]?] | length' build/trivy-fs.json)"
+            misconfigs="$(jq '[.Results[]? | .Misconfigurations[]? | select(.Status=="FAIL" and (.Severity=="CRITICAL" or .Severity=="HIGH"))] | length' build/trivy-fs.json)"
+            status=ok
+            compliant=true
+            if [ "${critical}" -gt 0 ] || [ "${secrets}" -gt 0 ] || [ "${misconfigs}" -gt 0 ]; then
+              status=failed
+              compliant=false
+            fi
+            jq -n --arg status "${status}" --argjson compliant "${compliant}" --argjson critical "${critical}" --argjson high "${high}" --argjson secrets "${secrets}" --argjson misconfigs "${misconfigs}" --argjson trivy_rc "${trivy_rc}" \
+              '{status:$status, compliant:$compliant, category:"artifact_security", scan_type:"filesystem", scanner:"trivy", critical_vulnerabilities:$critical, high_vulnerabilities:$high, secrets:$secrets, high_or_critical_misconfigurations:$misconfigs, trivy_rc:$trivy_rc, high_vulnerability_policy:"observe"}' > build/ironbank-compliance.json
+          '''
+        }
+        container('tester') {
+          sh '''
+            set -euo pipefail
+            mkdir -p build
+            python3 - <<'PY'
+import json
+import os
+from pathlib import Path
+
+report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
+if report_path.exists():
+    raise SystemExit(0)
+status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
+compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
+payload = {"status": status or "unknown", "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None}
+payload = {k: v for k, v in payload.items() if v is not None}
+if "status" not in payload:
+    payload["status"] = "unknown"
+payload["note"] = "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
+report_path.parent.mkdir(parents=True, exist_ok=True)
+report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
+PY
+          '''
+        }
+      }
+    }
    stage('Prep toolchain') {
      steps {
        container('builder') {
@ -113,6 +245,7 @@ spec:
        container('builder') {
          sh '''
            set -euo pipefail
+            mkdir -p build
            ready=0
            for _ in $(seq 1 10); do
              if docker info >/dev/null 2>&1; then
@ -124,40 +257,91 @@ spec:
            if [ "${ready}" -ne 1 ]; then
              echo "docker daemon did not become ready on ${DOCKER_HOST}" >&2
              docker version || true
-              exit 1
+              printf '%s\n' 1 > build/buildx.rc
+              exit 0
            fi
            BUILDER_NAME="atlasbot-${BUILD_NUMBER}"
            docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
-            docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use
+            rc=1
+            for attempt in 1 2 3; do
+              if docker buildx create --name "${BUILDER_NAME}" --driver docker-container --driver-opt image=registry.bstein.dev/bstein/buildkit:buildx-stable-1 --bootstrap --use; then
+                rc=0
+                break
+              fi
+              docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
+              sleep $((attempt * 10))
+            done
+            printf '%s\n' "${rc}" > build/buildx.rc
+            if [ "${rc}" -ne 0 ]; then
+              echo "docker buildx bootstrap failed after retries; quality metrics will record the setup failure" >&2
+            fi
          '''
        }
      }
    }
-    stage('Unit tests') {
+    stage('Run quality gate') {
      steps {
-        container('builder') {
+        container('tester') {
          sh '''
            set -euo pipefail
            mkdir -p build
-            docker buildx build --platform linux/arm64 --target test --load -t atlasbot-test .
-            docker run --rm -v "$PWD/build:/out" atlasbot-test \
-              python -m ruff check atlasbot --select E9,F63,F7,F82
-            docker run --rm -v "$PWD/build:/out" atlasbot-test \
-              python -m slipcover --json --out /out/coverage.json --source atlasbot --fail-under 90 \
-              -m pytest -q --junitxml /out/junit.xml
+            python3 -m pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt
+            set +e
+            docs_rc=1
+            loc_rc=1
+            tests_rc=1
+            coverage_contract_rc=1
+            gate_rc=1
+            python -m ruff check atlasbot scripts --select E,F,W,B,C90,I,RUF,ARG --ignore E501
+            ruff_rc=$?
+            if [ "${ruff_rc}" -eq 0 ]; then
+              python scripts/check_docstrings.py --root atlasbot
+              docs_rc=$?
+            else
+              docs_rc=${ruff_rc}
+            fi
+            python scripts/check_file_sizes.py --root atlasbot --max-lines 500
+            loc_rc=$?
+            python -m slipcover --json --out build/coverage.json --source atlasbot --fail-under 95 \
+              -m pytest -q --junitxml build/junit.xml
+            tests_rc=$?
+            python scripts/check_coverage.py build/coverage.json --root atlasbot --threshold 95
+            coverage_contract_rc=$?
+            printf '%s\n' "${docs_rc}" > build/docs-naming.rc
+            gate_rc=0
+            [ "${docs_rc}" -eq 0 ] || gate_rc=1
+            [ "${loc_rc}" -eq 0 ] || gate_rc=1
+            [ "${tests_rc}" -eq 0 ] || gate_rc=1
+            [ "${coverage_contract_rc}" -eq 0 ] || gate_rc=1
+            set -e
+            printf '%s\n' "${gate_rc}" > build/quality-gate.rc
          '''
        }
      }
    }
+
    stage('Publish test metrics') {
      steps {
-        container('builder') {
+        container('tester') {
          sh '''
            set -euo pipefail
-            docker run --rm -v "$PWD/build:/out" \
-              -e JUNIT_PATH=/out/junit.xml \
-              -e COVERAGE_PATH=/out/coverage.json \
-              atlasbot-test python scripts/publish_test_metrics.py
+            export JUNIT_PATH='build/junit.xml'
+            export COVERAGE_PATH='build/coverage.json'
+            export SOURCE_ROOT='atlasbot'
+            export QUALITY_GATE_RC_PATH='build/quality-gate.rc'
+            export QUALITY_GATE_DOCS_RC_PATH='build/docs-naming.rc'
+            python scripts/publish_test_metrics.py || true
+          '''
+        }
+      }
+    }
+
+    stage('Enforce quality gate') {
+      steps {
+        container('tester') {
+          sh '''
+            set -euo pipefail
+            test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
          '''
        }
      }
@ -167,6 +351,7 @@ spec:
        container('builder') {
          sh '''
            set -euo pipefail
+            test "$(cat build/buildx.rc 2>/dev/null || echo 1)" -eq 0
            VERSION_TAG=$(cut -d= -f2 build.env)
            docker buildx build --platform linux/arm64 \
              --target runtime \
@ -182,11 +367,11 @@ spec:
    always {
      script {
        if (fileExists('build.env')) {
-          def env = readProperties file: 'build.env'
-          echo "Build complete for ${env.SEMVER}"
+          def envFile = readProperties file: 'build.env'
+          echo "Build complete for ${envFile.SEMVER}"
        }
      }
-      archiveArtifacts artifacts: 'build/*', allowEmptyArchive: true
+      archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
    }
  }
 }
--- a/atlasbot/api/http.py
+++ b/atlasbot/api/http.py
@ -1,7 +1,6 @@
 import logging
-from typing import Any
-
 from collections.abc import Awaitable, Callable
+from typing import Any

 from fastapi import FastAPI, Header, HTTPException
 from pydantic import BaseModel
@ -29,6 +28,16 @@ class AnswerResponse(BaseModel):


 class Api:
+    """Expose the answer API and enforce the shared internal token.
+
+    Input:
+    - `settings`: runtime configuration, including the optional internal token;
+    - `answer_handler`: async adapter that answers a normalized question.
+
+    Output:
+    - registers the HTTP routes on `self.app`.
+    """
+
    def __init__(
        self,
        settings: Settings,
--- a/atlasbot/config.py
+++ b/atlasbot/config.py
@ -1,6 +1,7 @@
 import os
 from dataclasses import dataclass

+
 def _env_bool(name: str, default: str = "false") -> bool:
    value = os.getenv(name, default).strip().lower()
    return value in {"1", "true", "yes", "y", "on"}
@ -121,6 +122,12 @@ def _load_matrix_bots(bot_mentions: tuple[str, ...]) -> tuple[MatrixBotConfig, .


 def load_settings() -> Settings:
+    """Load process settings from environment variables.
+
+    Output:
+    - a fully populated `Settings` instance with defaults for missing values.
+    """
+
    bot_mentions = tuple(
        [
            item.strip()
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
--- a/atlasbot/engine/answerer/init.py
+++ b/atlasbot/engine/answerer/init.py
@ -0,0 +1,12 @@
+"""Answer engine package."""
+
+from ._base import *
+from .common import *
+from .engine import *
+from .factsheet import *
+from .post import *
+from .post_ext import *
+from .retrieval import *
+from .retrieval_ext import *
+from .spine import *
+from .workflow import *
--- a/atlasbot/engine/answerer/_base.py
+++ b/atlasbot/engine/answerer/_base.py
@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import logging
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any
+
+log = logging.getLogger(__name__)
+
+FOLLOWUP_SHORT_WORDS = 6
+TOKEN_MIN_LEN = 3
+GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"}
+NS_ENTRY_MIN_LEN = 2
+DEDUP_MIN_SENTENCES = 3
+RUNBOOK_SIMILARITY_THRESHOLD = 0.4
+BYTES_KB = 1024
+BYTES_MB = 1024 * 1024
+
+
+class LLMLimitReached(RuntimeError):
+    pass
+
+
+class LLMTimeBudgetExceeded(RuntimeError):
+    pass
+
+
+@dataclass
+class AnswerScores:
+    confidence: int
+    relevance: int
+    satisfaction: int
+    hallucination_risk: str
+
+
+@dataclass
+class AnswerResult:
+    reply: str
+    scores: AnswerScores
+    meta: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class InsightGuardInput:
+    question: str
+    reply: str
+    classify: dict[str, Any]
+    context: str
+    plan: ModePlan
+    call_llm: Callable[..., Awaitable[str]]
+    facts: list[str]
+
+
+@dataclass
+class ContradictionContext:
+    call_llm: Callable[..., Awaitable[str]]
+    question: str
+    reply: str
+    facts: list[str]
+    plan: ModePlan
+
+
+@dataclass
+class EvidenceItem:
+    path: str
+    reason: str
+    value: Any | None = None
+    value_at_claim: Any | None = None
+
+
+@dataclass
+class ClaimItem:
+    id: str
+    claim: str
+    evidence: list[EvidenceItem]
+
+
+@dataclass
+class ConversationState:
+    updated_at: float
+    claims: list[ClaimItem]
+    snapshot_id: str | None = None
+    snapshot: dict[str, Any] | None = None
+
+
+@dataclass
+class ModePlan:
+    model: str
+    fast_model: str
+    max_subquestions: int
+    chunk_lines: int
+    chunk_top: int
+    chunk_group: int
+    kb_max_chars: int
+    kb_max_files: int
+    use_raw_snapshot: bool
+    parallelism: int
+    score_retries: int
+    use_deep_retrieval: bool
+    use_tool: bool
+    use_critic: bool
+    use_gap: bool
+    use_scores: bool
+    drafts: int
+    metric_retries: int
+    subanswer_retries: int
+
+
+@dataclass
+class ScoreContext:
+    question: str
+    sub_questions: list[str]
+    retries: int
+    parallelism: int
+    select_best: bool
+    fast_model: str
--- a/atlasbot/engine/answerer/common.py
+++ b/atlasbot/engine/answerer/common.py
@ -0,0 +1,395 @@
+from __future__ import annotations
+
+import json
+import time
+from collections.abc import Awaitable, Callable
+from typing import Any
+
+from atlasbot.config import Settings
+from atlasbot.llm import prompts
+from atlasbot.llm.client import parse_json
+
+from ._base import *
+from .factsheet import *
+from .post import *
+from .post_ext import *
+from .retrieval import _gather_limited
+from .retrieval_ext import *
+from .spine import *
+
+
+def _strip_followup_meta(reply: str) -> str:
+    cleaned = reply.strip()
+    if not cleaned:
+        return cleaned
+    prefixes = [
+        "The draft is correct based on the provided context.",
+        "The draft is correct based on the context.",
+        "The draft is correct based on the provided evidence.",
+        "The draft is correct.",
+        "Based on the provided context,",
+        "Based on the context,",
+        "Based on the provided evidence,",
+    ]
+    for prefix in prefixes:
+        if cleaned.lower().startswith(prefix.lower()):
+            cleaned = cleaned[len(prefix) :].lstrip(" .")
+            break
+    return cleaned
+
+
+def _build_meta(mode: str, call_count: int, call_cap: int, limit_hit: bool, time_budget_hit: bool, time_budget_sec: float, classify: dict[str, Any], tool_hint: dict[str, Any] | None, started: float) -> dict[str, Any]:
+    return {
+        "mode": mode,
+        "llm_calls": call_count,
+        "llm_limit": call_cap,
+        "llm_limit_hit": limit_hit,
+        "time_budget_sec": time_budget_sec,
+        "time_budget_hit": time_budget_hit,
+        "classify": classify,
+        "tool_hint": tool_hint,
+        "elapsed_sec": round(time.monotonic() - started, 2),
+    }
+
+
+def _debug_pipeline_log(settings: Settings, name: str, payload: Any) -> None:
+    """Write a structured debug event when pipeline tracing is enabled."""
+
+    if not settings.debug_pipeline:
+        return
+    log.info("atlasbot_debug", extra={"extra": {"name": name, "payload": payload}})
+
+
+def _mode_plan(settings: Settings, mode: str) -> ModePlan:
+    if mode == "genius":
+        return ModePlan(
+            model=settings.ollama_model_genius,
+            fast_model=settings.ollama_model_fast,
+            max_subquestions=6,
+            chunk_lines=6,
+            chunk_top=10,
+            chunk_group=4,
+            kb_max_chars=200000,
+            kb_max_files=200,
+            use_raw_snapshot=True,
+            parallelism=4,
+            score_retries=3,
+            use_deep_retrieval=True,
+            use_tool=True,
+            use_critic=True,
+            use_gap=True,
+            use_scores=True,
+            drafts=2,
+            metric_retries=3,
+            subanswer_retries=3,
+        )
+    if mode == "smart":
+        return ModePlan(
+            model=settings.ollama_model_smart,
+            fast_model=settings.ollama_model_fast,
+            max_subquestions=4,
+            chunk_lines=8,
+            chunk_top=8,
+            chunk_group=4,
+            kb_max_chars=3000,
+            kb_max_files=12,
+            use_raw_snapshot=False,
+            parallelism=2,
+            score_retries=2,
+            use_deep_retrieval=True,
+            use_tool=True,
+            use_critic=True,
+            use_gap=True,
+            use_scores=True,
+            drafts=1,
+            metric_retries=2,
+            subanswer_retries=2,
+        )
+    return ModePlan(
+        model=settings.ollama_model_fast,
+        fast_model=settings.ollama_model_fast,
+        max_subquestions=1,
+        chunk_lines=16,
+        chunk_top=3,
+        chunk_group=5,
+        kb_max_chars=800,
+        kb_max_files=4,
+        use_raw_snapshot=False,
+        parallelism=1,
+        score_retries=1,
+        use_deep_retrieval=False,
+        use_tool=False,
+        use_critic=False,
+        use_gap=False,
+        use_scores=False,
+        drafts=1,
+        metric_retries=1,
+        subanswer_retries=1,
+    )
+
+
+def _llm_call_limit(settings: Settings, mode: str) -> int:
+    if mode == "genius":
+        return settings.genius_llm_calls_max
+    if mode == "smart":
+        return settings.smart_llm_calls_max
+    return settings.fast_llm_calls_max
+
+
+def _mode_time_budget(settings: Settings, mode: str) -> float:
+    if mode == "genius":
+        return max(0.0, settings.genius_time_budget_sec)
+    if mode == "smart":
+        return max(0.0, settings.smart_time_budget_sec)
+    return max(0.0, settings.quick_time_budget_sec)
+
+
+def _select_subquestions(parts: list[dict[str, Any]], fallback: str, limit: int) -> list[str]:
+    if not parts:
+        return [fallback]
+    ranked = []
+    for entry in parts:
+        if not isinstance(entry, dict):
+            continue
+        question = str(entry.get("question") or "").strip()
+        if not question:
+            continue
+        priority = entry.get("priority")
+        try:
+            weight = float(priority)
+        except (TypeError, ValueError):
+            weight = 1.0
+        ranked.append((weight, question))
+    ranked.sort(key=lambda item: item[0], reverse=True)
+    questions = [item[1] for item in ranked][:limit]
+    return questions or [fallback]
+
+
+def _chunk_lines(lines: list[str], lines_per_chunk: int) -> list[dict[str, Any]]:
+    chunks: list[dict[str, Any]] = []
+    if not lines:
+        return chunks
+    for idx in range(0, len(lines), lines_per_chunk):
+        chunk_lines = lines[idx : idx + lines_per_chunk]
+        text = "\n".join(chunk_lines)
+        summary = " | ".join(chunk_lines[:4])
+        chunks.append({"id": f"c{idx//lines_per_chunk}", "text": text, "summary": summary})
+    return chunks
+
+
+def _raw_snapshot_chunks(snapshot: dict[str, Any] | None) -> list[dict[str, Any]]:
+    if not isinstance(snapshot, dict) or not snapshot:
+        return []
+    chunks: list[dict[str, Any]] = []
+    for key, value in snapshot.items():
+        try:
+            payload = json.dumps({key: value}, indent=2)
+        except Exception:
+            continue
+        summary = f"raw:{key}"
+        chunks.append({"id": f"r{key}", "text": payload, "summary": summary})
+    return chunks
+
+
+def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
+    groups: list[list[dict[str, Any]]] = []
+    group: list[dict[str, Any]] = []
+    for chunk in chunks:
+        group.append({"id": chunk["id"], "summary": chunk["summary"]})
+        if len(group) >= group_size:
+            groups.append(group)
+            group = []
+    if group:
+        groups.append(group)
+    return groups
+
+
+async def _score_chunks(call_llm: Callable[..., Any], chunks: list[dict[str, Any]], question: str, sub_questions: list[str], plan: ModePlan) -> dict[str, float]:
+    scores: dict[str, float] = {chunk["id"]: 0.0 for chunk in chunks}
+    if not chunks:
+        return scores
+    groups = _build_chunk_groups(chunks, plan.chunk_group)
+    ctx = ScoreContext(
+        question=question,
+        sub_questions=sub_questions,
+        retries=max(1, plan.score_retries),
+        parallelism=plan.parallelism,
+        select_best=plan.score_retries > 1,
+        fast_model=plan.fast_model,
+    )
+    if ctx.parallelism <= 1 or len(groups) * ctx.retries <= 1:
+        return await _score_groups_serial(call_llm, groups, ctx)
+    return await _score_groups_parallel(call_llm, groups, ctx)
+
+
+async def _score_groups_serial(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
+    scores: dict[str, float] = {}
+    for grp in groups:
+        runs = [await _score_chunk_group(call_llm, grp, ctx.question, ctx.sub_questions) for _ in range(ctx.retries)]
+        if ctx.select_best and len(runs) > 1:
+            best = await _select_best_score_run(call_llm, grp, runs, ctx)
+            scores.update(best)
+        else:
+            scores.update(_merge_score_runs(runs))
+    return scores
+
+
+async def _score_groups_parallel(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
+    coros: list[Awaitable[tuple[int, dict[str, float]]]] = []
+    for idx, grp in enumerate(groups):
+        for _ in range(ctx.retries):
+            coros.append(_score_chunk_group_run(call_llm, idx, grp, ctx.question, ctx.sub_questions))
+    results = await _gather_limited(coros, ctx.parallelism)
+    grouped: dict[int, list[dict[str, float]]] = {}
+    for idx, result in results:
+        grouped.setdefault(idx, []).append(result)
+    scores: dict[str, float] = {}
+    for idx, runs in grouped.items():
+        if ctx.select_best and len(runs) > 1:
+            group = groups[idx]
+            best = await _select_best_score_run(call_llm, group, runs, ctx)
+            scores.update(best)
+        else:
+            scores.update(_merge_score_runs(runs))
+    return scores
+
+
+async def _score_chunk_group(call_llm: Callable[..., Any], group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> dict[str, float]:
+    prompt = (
+        prompts.CHUNK_SCORE_PROMPT
+        + "\nQuestion: "
+        + question
+        + "\nSubQuestions: "
+        + json.dumps(sub_questions)
+        + "\nChunks: "
+        + json.dumps(group)
+    )
+    raw = await call_llm(prompts.RETRIEVER_SYSTEM, prompt, model=None, tag="chunk_score")
+    data = _parse_json_list(raw)
+    scored: dict[str, float] = {}
+    for entry in data:
+        if not isinstance(entry, dict):
+            continue
+        cid = str(entry.get("id") or "").strip()
+        if not cid:
+            continue
+        try:
+            score = float(entry.get("score") or 0)
+        except (TypeError, ValueError):
+            score = 0.0
+        scored[cid] = score
+    return scored
+
+
+async def _score_chunk_group_run(call_llm: Callable[..., Any], idx: int, group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> tuple[int, dict[str, float]]:
+    return idx, await _score_chunk_group(call_llm, group, question, sub_questions)
+
+
+def _merge_score_runs(runs: list[dict[str, float]]) -> dict[str, float]:
+    if not runs:
+        return {}
+    totals: dict[str, float] = {}
+    counts: dict[str, int] = {}
+    for run in runs:
+        for key, value in run.items():
+            totals[key] = totals.get(key, 0.0) + float(value)
+            counts[key] = counts.get(key, 0) + 1
+    return {key: totals[key] / counts[key] for key in totals}
+
+
+async def _select_best_score_run(call_llm: Callable[..., Any], group: list[dict[str, Any]], runs: list[dict[str, float]], ctx: ScoreContext) -> dict[str, float]:
+    if not runs:
+        return {}
+    prompt = (
+        prompts.RETRIEVER_SELECT_PROMPT
+        + "\nQuestion: "
+        + ctx.question
+        + "\nSubQuestions: "
+        + json.dumps(ctx.sub_questions)
+        + "\nChunks: "
+        + json.dumps(group)
+        + "\nRuns: "
+        + json.dumps(runs)
+    )
+    raw = await call_llm(prompts.RETRIEVER_SELECT_SYSTEM, prompt, model=ctx.fast_model, tag="chunk_select")
+    data = parse_json(raw)
+    idx = 0
+    if isinstance(data, dict):
+        try:
+            idx = int(data.get("selected_index") or 0)
+        except (TypeError, ValueError):
+            idx = 0
+    if idx < 0 or idx >= len(runs):
+        idx = 0
+    return runs[idx]
+
+
+def _keyword_hits(ranked: list[dict[str, Any]], head: dict[str, Any], keywords: list[str] | None) -> list[dict[str, Any]]:
+    if not keywords:
+        return []
+    lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
+    if not lowered:
+        return []
+    hits: list[dict[str, Any]] = []
+    for item in ranked:
+        if item is head:
+            continue
+        text = str(item.get("text") or "").lower()
+        if any(kw in text for kw in lowered):
+            hits.append(item)
+    return hits
+
+
+def _select_chunks(chunks: list[dict[str, Any]], scores: dict[str, float], plan: ModePlan, keywords: list[str] | None = None, must_ids: list[str] | None = None) -> list[dict[str, Any]]:
+    if not chunks:
+        return []
+    ranked = sorted(chunks, key=lambda item: scores.get(item["id"], 0.0), reverse=True)
+    selected: list[dict[str, Any]] = [chunks[0]]
+    if _append_must_chunks(chunks, selected, must_ids, plan.chunk_top):
+        return selected
+    if _append_keyword_chunks(ranked, selected, keywords, plan.chunk_top):
+        return selected
+    _append_ranked_chunks(ranked, selected, plan.chunk_top)
+    return selected
+
+
+def _append_must_chunks(chunks: list[dict[str, Any]], selected: list[dict[str, Any]], must_ids: list[str] | None, limit: int) -> bool:
+    if not must_ids:
+        return False
+    id_map = {item["id"]: item for item in chunks}
+    for cid in must_ids:
+        item = id_map.get(cid)
+        if item and item not in selected:
+            selected.append(item)
+            if len(selected) >= limit:
+                return True
+    return False
+
+
+def _append_keyword_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], keywords: list[str] | None, limit: int) -> bool:
+    if not ranked:
+        return False
+    head = ranked[0]
+    for item in _keyword_hits(ranked, head, keywords):
+        if item not in selected:
+            selected.append(item)
+            if len(selected) >= limit:
+                return True
+    return False
+
+
+def _append_ranked_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], limit: int) -> None:
+    for item in ranked:
+        if len(selected) >= limit:
+            break
+        if item not in selected:
+            selected.append(item)
+
+
+def _format_runbooks(runbooks: list[str]) -> str:
+    if not runbooks:
+        return ""
+    return "Relevant runbooks:\n" + "\n".join([f"- {item}" for item in runbooks])
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/engine.py
+++ b/atlasbot/engine/answerer/engine.py
@ -0,0 +1,267 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+import json
+import time
+from typing import Any
+
+from atlasbot.config import Settings
+from atlasbot.knowledge.loader import KnowledgeBase
+from atlasbot.llm import prompts
+from atlasbot.llm.client import LLMClient, build_messages
+from atlasbot.snapshot.builder import SnapshotProvider
+from atlasbot.state.store import ClaimStore
+
+from ._base import *
+from .common import *
+from .factsheet import *
+from .post import *
+from .post_ext import *
+from .retrieval import *
+from .retrieval_ext import *
+from .spine import *
+from .workflow import run_answer
+
+
+class AnswerEngine:
+    """Coordinate Atlas question answering across snapshots, KB, and LLMs.
+
+    Why:
+    - keep the public answer surface in one place while the retrieval and
+      post-processing helpers stay split across smaller modules.
+    """
+
+    def __init__(self, settings: Settings, llm: LLMClient, kb: KnowledgeBase, snapshot: SnapshotProvider) -> None:
+        self._settings = settings
+        self._llm = llm
+        self._kb = kb
+        self._snapshot = snapshot
+        self._store = ClaimStore(settings.state_db_path, settings.conversation_ttl_sec)
+
+    async def answer(self, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult:
+        """Answer a question by delegating to the staged workflow."""
+
+        return await run_answer(
+            self,
+            question,
+            mode=mode,
+            history=history,
+            observer=observer,
+            conversation_id=conversation_id,
+            snapshot_pin=snapshot_pin,
+        )
+
+    async def _answer_stock(self, question: str) -> AnswerResult:
+        messages = build_messages(prompts.STOCK_SYSTEM, question)
+        reply = await self._llm.chat(messages, model=self._settings.ollama_model)
+        return AnswerResult(reply, _default_scores(), {"mode": "stock"})
+
+    async def _synthesize_answer(self, question: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str:
+        style_hint = _style_hint(classify)
+        if not subanswers:
+            prompt = (
+                prompts.SYNTHESIZE_PROMPT
+                + "\nQuestion: "
+                + question
+                + "\nStyle: "
+                + style_hint
+                + "\nQuestionType: "
+                + (classify.get("question_type") or "unknown")
+            )
+            return await call_llm(prompts.SYNTHESIZE_SYSTEM, prompt, context=context, model=plan.model, tag="synth")
+        draft_prompts = []
+        for idx in range(plan.drafts):
+            draft_prompts.append(
+                prompts.SYNTHESIZE_PROMPT
+                + "\nQuestion: "
+                + question
+                + "\nStyle: "
+                + style_hint
+                + "\nQuestionType: "
+                + (classify.get("question_type") or "unknown")
+                + "\nSubanswers:\n"
+                + "\n".join([f"- {item}" for item in subanswers])
+                + f"\nDraftIndex: {idx + 1}"
+            )
+        drafts: list[str] = []
+        if plan.parallelism > 1 and len(draft_prompts) > 1:
+            drafts = await _gather_limited(
+                [
+                    call_llm(
+                        prompts.SYNTHESIZE_SYSTEM,
+                        prompt,
+                        context=context,
+                        model=plan.model,
+                        tag="synth",
+                    )
+                    for prompt in draft_prompts
+                ],
+                plan.parallelism,
+            )
+        else:
+            for prompt in draft_prompts:
+                drafts.append(
+                    await call_llm(
+                        prompts.SYNTHESIZE_SYSTEM,
+                        prompt,
+                        context=context,
+                        model=plan.model,
+                        tag="synth",
+                    )
+                )
+        if len(drafts) == 1:
+            return drafts[0]
+        select_prompt = (
+            prompts.DRAFT_SELECT_PROMPT
+            + "\nQuestion: "
+            + question
+            + "\nDrafts:\n"
+            + "\n\n".join([f"Draft {idx + 1}: {text}" for idx, text in enumerate(drafts)])
+        )
+        select_raw = await call_llm(prompts.CRITIC_SYSTEM, select_prompt, context=context, model=plan.fast_model, tag="draft_select")
+        selection = _parse_json_block(select_raw, fallback={})
+        idx = int(selection.get("best", 1)) - 1
+        if 0 <= idx < len(drafts):
+            return drafts[idx]
+        return drafts[0]
+
+    async def _score_answer(self, question: str, reply: str, plan: ModePlan, call_llm: Callable[..., Any]) -> AnswerScores:
+        if not plan.use_scores:
+            return _default_scores()
+        prompt = prompts.SCORE_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply
+        raw = await call_llm(prompts.SCORE_SYSTEM, prompt, model=plan.fast_model, tag="score")
+        data = _parse_json_block(raw, fallback={})
+        return _scores_from_json(data)
+
+    async def _extract_claims(self, question: str, reply: str, summary: dict[str, Any], facts_used: list[str], call_llm: Callable[..., Any]) -> list[ClaimItem]:
+        if not reply or not summary:
+            return []
+        summary_json = _json_excerpt(summary)
+        facts_used = [line.strip() for line in (facts_used or []) if line and line.strip()]
+        facts_block = ""
+        if facts_used:
+            facts_block = "\nFactsUsed:\n" + "\n".join([f"- {line}" for line in facts_used[:12]])
+        prompt = prompts.CLAIM_MAP_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply + facts_block
+        raw = await call_llm(
+            prompts.CLAIM_SYSTEM,
+            prompt,
+            context=f"SnapshotSummaryJson:{summary_json}",
+            model=self._settings.ollama_model_fast,
+            tag="claim_map",
+        )
+        data = _parse_json_block(raw, fallback={})
+        claims_raw = data.get("claims") if isinstance(data, dict) else None
+        claims: list[ClaimItem] = []
+        if isinstance(claims_raw, list):
+            for entry in claims_raw:
+                if not isinstance(entry, dict):
+                    continue
+                claim_text = str(entry.get("claim") or "").strip()
+                claim_id = str(entry.get("id") or "").strip() or f"c{len(claims)+1}"
+                evidence_items: list[EvidenceItem] = []
+                for ev in entry.get("evidence") or []:
+                    if not isinstance(ev, dict):
+                        continue
+                    path = str(ev.get("path") or "").strip()
+                    if not path:
+                        continue
+                    reason = str(ev.get("reason") or "").strip()
+                    value = _resolve_path(summary, path)
+                    evidence_items.append(EvidenceItem(path=path, reason=reason, value=value, value_at_claim=value))
+                if claim_text and evidence_items:
+                    claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
+        return claims
+
+    async def _dedup_reply(self, reply: str, plan: ModePlan, call_llm: Callable[..., Any], tag: str) -> str:
+        if not _needs_dedup(reply):
+            return reply
+        dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
+        return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
+
+    async def _answer_followup(self, question: str, state: ConversationState, summary: dict[str, Any], classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str:  # noqa: C901, ARG002
+        claim_ids = await self._select_claims(question, state.claims, plan, call_llm)
+        selected = [claim for claim in state.claims if claim.id in claim_ids] if claim_ids else state.claims[:2]
+        evidence_lines = []
+        lowered = question.lower()
+        for claim in selected:
+            evidence_lines.append(f"Claim: {claim.claim}")
+            for ev in claim.evidence:
+                current = _resolve_path(summary, ev.path)
+                ev.value = current
+                delta_note = ""
+                if ev.value_at_claim is not None and current is not None and current != ev.value_at_claim:
+                    delta_note = f" (now {current})"
+                evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
+        if any(term in lowered for term in ("hotspot", "hot spot", "hottest", "jetson", "rpi", "amd64", "arm64", "hardware", "class")):
+            hotspot_lines = _hotspot_evidence(summary)
+            if hotspot_lines:
+                evidence_lines.append("HotspotSummary:")
+                evidence_lines.extend(hotspot_lines)
+        evidence_ctx = "\n".join(evidence_lines)
+        prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
+        reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
+        allowed_nodes = _allowed_nodes(summary)
+        allowed_namespaces = _allowed_namespaces(summary)
+        unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
+        unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
+        extra_bits = []
+        if unknown_nodes:
+            extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
+        if unknown_namespaces:
+            extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
+        if allowed_nodes:
+            extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
+        if allowed_namespaces:
+            extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
+        if extra_bits:
+            fix_prompt = (
+                prompts.EVIDENCE_FIX_PROMPT
+                + "\nQuestion: "
+                + question
+                + "\nDraft: "
+                + reply
+                + "\n"
+                + "\n".join(extra_bits)
+            )
+            reply = await call_llm(
+                prompts.EVIDENCE_FIX_SYSTEM,
+                fix_prompt,
+                context="Evidence:\n" + evidence_ctx,
+                model=plan.model,
+                tag="followup_fix",
+            )
+        reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
+        reply = _strip_followup_meta(reply)
+        return reply
+
+    async def _select_claims(self, question: str, claims: list[ClaimItem], plan: ModePlan, call_llm: Callable[..., Any]) -> list[str]:
+        if not claims:
+            return []
+        claims_brief = [{"id": claim.id, "claim": claim.claim} for claim in claims]
+        prompt = prompts.SELECT_CLAIMS_PROMPT + "\nFollow-up: " + question + "\nClaims: " + json.dumps(claims_brief)
+        raw = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.fast_model, tag="select_claims")
+        data = _parse_json_block(raw, fallback={})
+        ids = data.get("claim_ids") if isinstance(data, dict) else []
+        if isinstance(ids, list):
+            return [str(item) for item in ids if item]
+        return []
+
+    def _get_state(self, conversation_id: str | None) -> ConversationState | None:
+        if not conversation_id:
+            return None
+        state_payload = self._store.get(conversation_id)
+        return _state_from_payload(state_payload) if state_payload else None
+
+    def _store_state(self, conversation_id: str, claims: list[ClaimItem], summary: dict[str, Any], snapshot: dict[str, Any] | None, pin_snapshot: bool) -> None:
+        snapshot_id = _snapshot_id(summary)
+        pinned_snapshot = snapshot if pin_snapshot else None
+        payload = {
+            "updated_at": time.monotonic(),
+            "claims": _claims_to_payload(claims),
+            "snapshot_id": snapshot_id,
+            "snapshot": pinned_snapshot,
+        }
+        self._store.set(conversation_id, payload)
+
+    def _cleanup_state(self) -> None:
+        self._store.cleanup()
--- a/atlasbot/engine/answerer/factsheet.py
+++ b/atlasbot/engine/answerer/factsheet.py
@ -0,0 +1,189 @@
+from __future__ import annotations
+
+import json
+import re
+from typing import Any
+
+from ._base import *
+
+MAX_FACT_LINE_CHARS = 180
+MAX_KB_LINE_CHARS = 220
+
+
+def _factsheet_kb_chars(mode: str, default_chars: int) -> int:
+    if mode == "genius":
+        return min(max(default_chars, 4000), 6000)
+    if mode == "smart":
+        return min(max(default_chars, 3000), 4500)
+    return max(1200, default_chars)
+
+
+def _factsheet_line_limit(mode: str) -> int:
+    if mode == "genius":
+        return 30
+    if mode == "smart":
+        return 22
+    return 14
+
+
+def _factsheet_instruction(mode: str) -> str:
+    if mode == "genius":
+        return (
+            "Start with a direct conclusion, then include the strongest supporting facts and one caveat. "
+            "Keep it to 4-8 sentences. If data is missing, name the missing metric explicitly."
+        )
+    if mode == "smart":
+        return (
+            "Start with a direct conclusion and support it with key facts. Keep it to 2-5 sentences. "
+            "If data is missing, say exactly what is missing and suggest atlas-genius."
+        )
+    return "Keep it to 1-3 sentences. If key data is missing, say what is missing and suggest atlas-smart."
+
+
+def _factsheet_model(mode: str, plan: ModePlan) -> str:
+    if mode in {"quick", "fast"}:
+        return plan.fast_model
+    return plan.model
+
+
+def _is_plain_math_question(question: str) -> bool:
+    lowered = question.lower().strip()
+    if not lowered:
+        return False
+    cluster_markers = (
+        "titan",
+        "atlas",
+        "cluster",
+        "node",
+        "pod",
+        "namespace",
+        "workload",
+        "grafana",
+        "alert",
+        "k8s",
+        "kubernetes",
+        "rpi",
+        "longhorn",
+        "postgres",
+        "victoria",
+        "ollama",
+    )
+    if any(token in lowered for token in cluster_markers):
+        return False
+    return bool(
+        re.fullmatch(r"[0-9\s+\-*/().=]+", lowered)
+        or re.search(r"\bwhat(?:'s| is)\s+\d+\s*[-+*/]\s*\d+\b", lowered)
+    )
+
+
+def _quick_fact_sheet_lines(question: str, summary_lines: list[str], kb_lines: list[str], *, limit: int) -> list[str]:  # noqa: C901
+    tokens = {
+        token
+        for token in re.findall(r"[a-z0-9][a-z0-9_-]{2,}", question.lower())
+        if token not in GENERIC_METRIC_TOKENS
+    }
+    priority_markers = (
+        "snapshot:",
+        "nodes_total",
+        "nodes_ready",
+        "nodes_not_ready",
+        "workers_ready",
+        "workers_not_ready",
+        "control_plane",
+        "worker_nodes",
+        "hottest",
+        "postgres",
+        "pods",
+        "longhorn",
+        "titan-",
+        "rpi5",
+        "rpi4",
+        "jetson",
+        "amd64",
+    )
+    scored: list[tuple[int, str]] = []
+    for raw in summary_lines:
+        line = raw.strip()
+        if not line:
+            continue
+        lowered = line.lower()
+        score = 0
+        if any(marker in lowered for marker in priority_markers):
+            score += 4
+        overlap = sum(1 for token in tokens if token in lowered)
+        score += overlap * 3
+        if len(line) <= MAX_FACT_LINE_CHARS:
+            score += 1
+        if score > 0:
+            scored.append((score, line))
+
+    scored.sort(key=lambda item: item[0], reverse=True)
+    selected = [line for _, line in scored[:limit]]
+    if not selected:
+        selected = [line.strip() for line in summary_lines if line.strip()][:limit]
+
+    kb_selected: list[str] = []
+    for raw in kb_lines:
+        line = raw.strip()
+        if not line or len(line) > MAX_KB_LINE_CHARS:
+            continue
+        lowered = line.lower()
+        if "kb file:" in lowered or "kb: atlas.json" in lowered:
+            continue
+        overlap = sum(1 for token in tokens if token in lowered)
+        if overlap > 0 or any(marker in lowered for marker in ("runbook", "titan-", "rpi5", "rpi4", "amd64", "jetson")):
+            kb_selected.append(line)
+        if len(kb_selected) >= max(4, limit // 3):
+            break
+
+    merged = []
+    seen: set[str] = set()
+    for line in selected + kb_selected:
+        if line not in seen:
+            seen.add(line)
+            merged.append(line)
+        if len(merged) >= limit:
+            break
+    return merged
+
+
+def _quick_fact_sheet_text(lines: list[str]) -> str:
+    if not lines:
+        return "Fact Sheet:\n- No snapshot facts available."
+    body = "\n".join([f"- {line}" for line in lines])
+    return "Fact Sheet:\n" + body
+
+
+def _quick_fact_sheet_heuristic_answer(question: str, fact_lines: list[str]) -> str:
+    lowered = question.lower()
+    if (
+        any(token in lowered for token in ("placement", "schedule", "last resort", "last-resort"))
+        and any(token in lowered for token in ("node", "workload", "worker", "titan"))
+    ):
+        return (
+            "General workload placement is: prefer rpi5 workers first, then rpi4 workers. "
+            "titan-22 is the last-resort general compute node, and titan-24 is the absolute last resort "
+            "reserved for heavy one-offs."
+        )
+
+    for line in fact_lines:
+        compact = line.replace(" ", "")
+        match = re.search(r"nodes_total[:=](\d+),ready[:=](\d+),not_ready[:=](\d+)", compact)
+        if not match:
+            continue
+        total = match.group(1)
+        ready = match.group(2)
+        not_ready = match.group(3)
+        if "how many" in lowered and "ready" in lowered and "node" in lowered:
+            return f"The latest snapshot shows {ready} ready nodes out of {total} total ({not_ready} not ready)."
+        if ("not ready" in lowered or "unready" in lowered) and "node" in lowered:
+            return f"The latest snapshot shows {not_ready} not-ready nodes ({ready} ready out of {total} total)."
+    return ""
+
+
+def _json_excerpt(summary: dict[str, Any], max_chars: int = 12000) -> str:
+    raw = json.dumps(summary, ensure_ascii=False)
+    return raw[:max_chars]
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/post.py
+++ b/atlasbot/engine/answerer/post.py
@ -0,0 +1,459 @@
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from atlasbot.llm import prompts
+from atlasbot.llm.client import parse_json
+
+from ._base import *
+from .retrieval_ext import _dedupe_lines
+
+
+def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
+    merged: list[str] = []
+    for line in primary + fallback:
+        value = (line or "").strip()
+        if value and value not in merged:
+            merged.append(value)
+    return merged
+
+
+def _strip_unknown_entities(reply: str, unknown_nodes: list[str], unknown_namespaces: list[str]) -> str:
+    if not reply:
+        return reply
+    if not unknown_nodes and not unknown_namespaces:
+        return reply
+    sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
+    if not sentences:
+        return reply
+    lowered_nodes = [node.lower() for node in unknown_nodes]
+    lowered_namespaces = [ns.lower() for ns in unknown_namespaces]
+    kept: list[str] = []
+    for sent in sentences:
+        lower = sent.lower()
+        if lowered_nodes and any(node in lower for node in lowered_nodes):
+            continue
+        if lowered_namespaces and any(f"namespace {ns}" in lower for ns in lowered_namespaces):
+            continue
+        kept.append(sent)
+    cleaned = " ".join(kept).strip()
+    return cleaned or reply
+
+
+def _needs_evidence_guard(reply: str, facts: list[str]) -> bool:
+    if not reply or not facts:
+        return False
+    lower_reply = reply.lower()
+    fact_text = " ".join(facts).lower()
+    node_pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
+    nodes = {m.group(1).lower() for m in node_pattern.finditer(reply)}
+    if nodes:
+        missing = [node for node in nodes if node not in fact_text]
+        if missing:
+            return True
+    pressure_terms = ("pressure", "diskpressure", "memorypressure", "pidpressure", "headroom")
+    if any(term in lower_reply for term in pressure_terms) and not any(term in fact_text for term in pressure_terms):
+        return True
+    arch_terms = ("amd64", "arm64", "rpi", "rpi4", "rpi5", "jetson")
+    return any(term in lower_reply for term in arch_terms) and not any(term in fact_text for term in arch_terms)
+
+
+async def _contradiction_decision(ctx: ContradictionContext, attempts: int = 1) -> dict[str, Any]:
+    best = {"use_facts": True, "confidence": 50}
+    facts_block = "\n".join(ctx.facts[:12])
+    for idx in range(max(1, attempts)):
+        variant = f"Variant: {idx + 1}" if attempts > 1 else ""
+        prompt = (
+            prompts.CONTRADICTION_PROMPT.format(question=ctx.question, draft=ctx.reply, facts=facts_block)
+            + ("\n" + variant if variant else "")
+        )
+        raw = await ctx.call_llm(
+            prompts.CONTRADICTION_SYSTEM,
+            prompt,
+            model=ctx.plan.fast_model,
+            tag="contradiction",
+        )
+        data = _parse_json_block(raw, fallback={})
+        try:
+            confidence = int(data.get("confidence", 50))
+        except Exception:
+            confidence = 50
+        use_facts = bool(data.get("use_facts", True))
+        if confidence >= best.get("confidence", 0):
+            best = {"use_facts": use_facts, "confidence": confidence}
+    return best
+
+
+def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
+    if not lines:
+        return []
+    tokens = _expand_tokens(keywords)
+    if not tokens:
+        return lines[:max_lines]
+    filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
+    return (filtered or lines)[:max_lines]
+
+
+def _rank_metric_lines(lines: list[str], tokens: set[str], max_lines: int) -> list[str]:
+    if not lines or not tokens:
+        return []
+    ranked: list[tuple[int, int, str]] = []
+    for line in lines:
+        lower = line.lower()
+        hits = sum(1 for tok in tokens if tok in lower)
+        if not hits:
+            continue
+        has_number = 1 if re.search(r"\d", line) else 0
+        ranked.append((has_number, hits, line))
+    ranked.sort(key=lambda item: (-item[0], -item[1], item[2]))
+    return [item[2] for item in ranked[:max_lines]]
+
+
+def _select_metric_line(lines: list[str], question: str, tokens: list[str] | set[str]) -> str | None:
+    if not lines or not tokens:
+        return None
+    token_set = {str(tok).lower() for tok in tokens if tok}
+    ranked = _rank_metric_lines(lines, token_set, max_lines=6)
+    if not ranked:
+        return None
+    question_lower = (question or "").lower()
+    if any(term in question_lower for term in ("how many", "count", "total")):
+        for line in ranked:
+            lower = line.lower()
+            if "total" in lower or "count" in lower:
+                return line
+    return ranked[0]
+
+
+def _format_direct_metric_line(line: str) -> str:
+    if not line:
+        return ""
+    if ":" in line:
+        formatted = _format_colon_metric(line)
+        if formatted:
+            return formatted
+    if "=" in line:
+        formatted = _format_equals_metric(line)
+        if formatted:
+            return formatted
+    return line
+
+
+def _format_colon_metric(line: str) -> str | None:
+    key, value = line.split(":", 1)
+    key = key.strip().replace("_", " ")
+    value = value.strip()
+    if not value:
+        return None
+    if key == "nodes":
+        formatted = _format_nodes_value(value)
+        if formatted:
+            return formatted
+    if key in {"nodes total", "nodes_total"}:
+        return f"Atlas has {value} total nodes."
+    return f"{key} is {value}."
+
+
+def _format_equals_metric(line: str) -> str | None:
+    pairs: list[str] = []
+    for part in line.split(","):
+        if "=" not in part:
+            continue
+        key, value = part.split("=", 1)
+        key = key.strip().replace("_", " ")
+        value = value.strip()
+        if not value:
+            continue
+        if key in {"nodes total", "nodes_total"}:
+            return f"Atlas has {value} total nodes."
+        pairs.append(f"{key} is {value}")
+    if not pairs:
+        return None
+    if len(pairs) == 1:
+        return f"{pairs[0]}."
+    return "; ".join(pairs) + "."
+
+
+def _format_nodes_value(value: str) -> str | None:
+    parts = [p.strip() for p in value.split(",") if p.strip()]
+    total = None
+    rest: list[str] = []
+    for part in parts:
+        if part.startswith("total="):
+            total = part.split("=", 1)[1]
+        else:
+            rest.append(part.replace("_", " "))
+    if not total:
+        return None
+    if rest:
+        return f"Atlas has {total} total nodes ({'; '.join(rest)})."
+    return f"Atlas has {total} total nodes."
+
+
+def _global_facts(lines: list[str]) -> list[str]:
+    if not lines:
+        return []
+    wanted = ("nodes_total", "nodes_ready", "cluster_name", "cluster", "nodes_not_ready")
+    facts: list[str] = []
+    for line in lines:
+        lower = line.lower()
+        if any(key in lower for key in wanted):
+            facts.append(line)
+    return _dedupe_lines(facts, limit=6)
+
+
+def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
+    if not lines or not keywords:
+        return False
+    tokens = _expand_tokens(keywords)
+    if not tokens:
+        return False
+    for line in lines:
+        lower = line.lower()
+        if any(tok in lower for tok in tokens):
+            return True
+    return False
+
+
+def _merge_tokens(primary: list[str], secondary: list[str], third: list[str] | None = None) -> list[str]:
+    merged: list[str] = []
+    for token in primary + secondary + (third or []):
+        if not token:
+            continue
+        if token not in merged:
+            merged.append(token)
+    return merged
+
+
+def _extract_question_tokens(question: str) -> list[str]:
+    if not question:
+        return []
+    tokens: list[str] = []
+    for part in re.split(r"[^a-zA-Z0-9_-]+", question.lower()):
+        if len(part) < TOKEN_MIN_LEN:
+            continue
+        if part not in tokens:
+            tokens.append(part)
+    return tokens
+
+
+def _expand_tokens(tokens: list[str]) -> list[str]:
+    if not tokens:
+        return []
+    expanded: list[str] = []
+    for token in tokens:
+        if not isinstance(token, str):
+            continue
+        for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
+            if len(part) < TOKEN_MIN_LEN:
+                continue
+            if part not in expanded:
+                expanded.append(part)
+    return expanded
+
+
+def _ensure_token_coverage(lines: list[str], tokens: list[str], summary_lines: list[str], max_add: int = 4) -> list[str]:
+    if not lines or not tokens or not summary_lines:
+        return lines
+    hay = " ".join(lines).lower()
+    missing = [tok for tok in tokens if tok and tok.lower() not in hay]
+    if not missing:
+        return lines
+    added: list[str] = []
+    for token in missing:
+        token_lower = token.lower()
+        for line in summary_lines:
+            if token_lower in line.lower() and line not in lines and line not in added:
+                added.append(line)
+                break
+        if len(added) >= max_add:
+            break
+    if not added:
+        return lines
+    return _merge_fact_lines(added, lines)
+
+
+def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
+    if not lines or not keywords:
+        return None
+    tokens = _expand_tokens(keywords)
+    if not tokens:
+        return None
+    best = None
+    best_score = 0
+    for line in lines:
+        lower = line.lower()
+        score = sum(1 for tok in tokens if tok in lower)
+        if score > best_score:
+            best_score = score
+            best = line
+    return best if best_score > 0 else None
+
+
+def _line_starting_with(lines: list[str], prefix: str) -> str | None:
+    if not lines or not prefix:
+        return None
+    lower_prefix = prefix.lower()
+    for line in lines:
+        if str(line).lower().startswith(lower_prefix):
+            return line
+    return None
+
+
+def _non_rpi_nodes(summary: dict[str, Any]) -> dict[str, list[str]]:
+    hardware = summary.get("hardware_by_node") if isinstance(summary, dict) else None
+    if not isinstance(hardware, dict):
+        return {}
+    grouped: dict[str, list[str]] = {}
+    for node, hw in hardware.items():
+        if not isinstance(node, str) or not isinstance(hw, str):
+            continue
+        if hw.startswith("rpi"):
+            continue
+        grouped.setdefault(hw, []).append(node)
+    for nodes in grouped.values():
+        nodes.sort()
+    return grouped
+
+
+def _format_hardware_groups(groups: dict[str, list[str]], label: str) -> str:
+    if not groups:
+        return ""
+    parts = []
+    for hw, nodes in sorted(groups.items()):
+        parts.append(f"{hw} ({', '.join(nodes)})")
+    return f"{label}: " + "; ".join(parts) + "."
+
+
+def _lexicon_context(summary: dict[str, Any]) -> str:  # noqa: C901
+    if not isinstance(summary, dict):
+        return ""
+    lexicon = summary.get("lexicon")
+    if not isinstance(lexicon, dict):
+        return ""
+    terms = lexicon.get("terms")
+    aliases = lexicon.get("aliases")
+    lines: list[str] = []
+    if isinstance(terms, list):
+        for entry in terms[:8]:
+            if not isinstance(entry, dict):
+                continue
+            term = entry.get("term")
+            meaning = entry.get("meaning")
+            if term and meaning:
+                lines.append(f"{term}: {meaning}")
+    if isinstance(aliases, dict):
+        for key, value in list(aliases.items())[:6]:
+            if key and value:
+                lines.append(f"alias {key} -> {value}")
+    if not lines:
+        return ""
+    return "Lexicon:\n" + "\n".join(lines)
+
+
+def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
+    raw = text.strip()
+    match = re.search(r"\{.*\}", raw, flags=re.S)
+    if match:
+        return parse_json(match.group(0), fallback=fallback)
+    return parse_json(raw, fallback=fallback)
+
+
+def _parse_json_list(text: str) -> list[dict[str, Any]]:
+    raw = text.strip()
+    match = re.search(r"\[.*\]", raw, flags=re.S)
+    data = parse_json(match.group(0), fallback={}) if match else parse_json(raw, fallback={})
+    if isinstance(data, list):
+        return [entry for entry in data if isinstance(entry, dict)]
+    return []
+
+
+def _scores_from_json(data: dict[str, Any]) -> AnswerScores:
+    return AnswerScores(
+        confidence=_coerce_int(data.get("confidence"), 60),
+        relevance=_coerce_int(data.get("relevance"), 60),
+        satisfaction=_coerce_int(data.get("satisfaction"), 60),
+        hallucination_risk=str(data.get("hallucination_risk") or "medium"),
+    )
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    try:
+        return int(float(value))
+    except (TypeError, ValueError):
+        return default
+
+
+def _default_scores() -> AnswerScores:
+    return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
+
+
+def _style_hint(classify: dict[str, Any]) -> str:
+    style = (classify.get("answer_style") or "").strip().lower()
+    qtype = (classify.get("question_type") or "").strip().lower()
+    if style == "insightful" or qtype in {"open_ended", "planning"}:
+        return "insightful"
+    return "direct"
+
+
+def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
+    if not reply:
+        return False
+    lowered = reply.lower()
+    missing_markers = (
+        "don't have",
+        "do not have",
+        "don't know",
+        "cannot",
+        "can't",
+        "need to",
+        "would need",
+        "does not provide",
+        "does not mention",
+        "not mention",
+        "not provided",
+        "not in context",
+        "not referenced",
+        "missing",
+        "no specific",
+        "no information",
+    )
+    if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
+        return True
+    return classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply)
+
+
+def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
+    style = (classify.get("answer_style") or "").strip().lower()
+    qtype = (classify.get("question_type") or "").strip().lower()
+    return style == "insightful" or qtype in {"open_ended", "planning"}
+
+
+async def _apply_insight_guard(inputs: InsightGuardInput) -> str:
+    if not inputs.reply or not _should_use_insight_guard(inputs.classify):
+        return inputs.reply
+    guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=inputs.question, answer=inputs.reply)
+    guard_raw = await inputs.call_llm(
+        prompts.INSIGHT_GUARD_SYSTEM,
+        guard_prompt,
+        context=inputs.context,
+        model=inputs.plan.fast_model,
+        tag="insight_guard",
+    )
+    guard = _parse_json_block(guard_raw, fallback={})
+    if guard.get("ok") is True:
+        return inputs.reply
+    fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=inputs.question, answer=inputs.reply)
+    if inputs.facts:
+        fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(inputs.facts[:6])
+    return await inputs.call_llm(
+        prompts.INSIGHT_FIX_SYSTEM,
+        fix_prompt,
+        context=inputs.context,
+        model=inputs.plan.model,
+        tag="insight_fix",
+    )
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/post_ext.py
+++ b/atlasbot/engine/answerer/post_ext.py
@ -0,0 +1,276 @@
+from __future__ import annotations
+
+import difflib
+import re
+import time
+from typing import Any
+
+from ._base import *
+
+
+def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
+    if not reply or not metric_facts:
+        return True
+    reply_numbers = set(re.findall(r"\d+(?:\\.\d+)?", reply))
+    if not reply_numbers:
+        return False
+    fact_numbers: set[str] = set()
+    value_pattern = re.compile(r"(?:>=|<=|=|:)\s*(\d+(?:\.\d+)?)")
+    filtered = metric_facts
+    if tokens:
+        token_set = {str(tok).lower() for tok in tokens if tok}
+        focused = []
+        for line in metric_facts:
+            key = line.split(":", 1)[0].lower()
+            if any(tok in key for tok in token_set):
+                focused.append(line)
+        if focused:
+            filtered = focused
+    for line in filtered:
+        for match in value_pattern.findall(line):
+            fact_numbers.add(match)
+    if not fact_numbers:
+        return False
+    return bool(reply_numbers & fact_numbers)
+
+
+def _needs_dedup(reply: str) -> bool:
+    if not reply:
+        return False
+    sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
+    if len(sentences) < DEDUP_MIN_SENTENCES:
+        return False
+    seen = set()
+    for sent in sentences:
+        norm = re.sub(r"\s+", " ", sent.lower())
+        if norm in seen:
+            return True
+        seen.add(norm)
+    return False
+
+
+def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
+    if not reply:
+        return False
+    q_lower = (question or "").lower()
+    if classify.get("question_type") not in {"metric", "diagnostic"} and not re.search(r"\b(how many|list|count)\b", q_lower):
+        return False
+    missing_markers = (
+        "does not provide",
+        "does not specify",
+        "not available",
+        "not provided",
+        "cannot determine",
+        "don't have",
+        "do not have",
+        "insufficient",
+        "no data",
+    )
+    if any(marker in reply.lower() for marker in missing_markers):
+        return True
+    if reply.count(".") <= 1:
+        return False
+    extra_markers = ("for more", "if you need", "additional", "based on")
+    return any(marker in reply.lower() for marker in extra_markers)
+
+
+def _extract_keywords(raw_question: str, normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
+    stopwords = {
+        "the",
+        "and",
+        "for",
+        "with",
+        "that",
+        "this",
+        "what",
+        "which",
+        "when",
+        "where",
+        "who",
+        "why",
+        "how",
+        "tell",
+        "show",
+        "list",
+        "give",
+        "about",
+        "right",
+        "now",
+    }
+    tokens: list[str] = []
+    for source in [raw_question, normalized, *sub_questions]:
+        for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
+            if len(part) < TOKEN_MIN_LEN or part in stopwords:
+                continue
+            tokens.append(part)
+    if keywords:
+        for kw in keywords:
+            if isinstance(kw, str):
+                part = kw.strip().lower()
+                if part and part not in stopwords and part not in tokens:
+                    tokens.append(part)
+    return list(dict.fromkeys(tokens))[:12]
+
+
+def _allowed_nodes(summary: dict[str, Any]) -> list[str]:
+    hardware = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
+    if hardware:
+        return sorted([node for node in hardware if isinstance(node, str)])
+    return []
+
+
+def _allowed_namespaces(summary: dict[str, Any]) -> list[str]:
+    namespaces: list[str] = []
+    for entry in summary.get("namespace_pods") or []:
+        if isinstance(entry, dict):
+            name = entry.get("namespace")
+            if name:
+                namespaces.append(str(name))
+    return sorted(set(namespaces))
+
+
+def _find_unknown_nodes(reply: str, allowed: list[str]) -> list[str]:
+    if not reply or not allowed:
+        return []
+    pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
+    found = {m.group(1) for m in pattern.finditer(reply)}
+    if not found:
+        return []
+    allowed_set = {a.lower() for a in allowed}
+    return sorted({item for item in found if item.lower() not in allowed_set})
+
+
+def _find_unknown_namespaces(reply: str, allowed: list[str]) -> list[str]:
+    if not reply or not allowed:
+        return []
+    pattern = re.compile(r"\bnamespace\s+([a-z0-9-]+)\b", re.IGNORECASE)
+    found = {m.group(1) for m in pattern.finditer(reply)}
+    if not found:
+        return []
+    allowed_set = {a.lower() for a in allowed}
+    return sorted({item for item in found if item.lower() not in allowed_set})
+
+
+def _needs_runbook_fix(reply: str, allowed: list[str]) -> bool:
+    if not reply or not allowed:
+        return False
+    paths = set(re.findall(r"runbooks/[A-Za-z0-9._-]+", reply))
+    if not paths:
+        return False
+    allowed_set = {p.lower() for p in allowed}
+    return any(path.lower() not in allowed_set for path in paths)
+
+
+def _needs_runbook_reference(question: str, allowed: list[str], reply: str) -> bool:
+    if not allowed or not question:
+        return False
+    lowered = question.lower()
+    cues = ("runbook", "checklist", "documented", "documentation", "where", "guide")
+    if not any(cue in lowered for cue in cues):
+        return False
+    if not reply:
+        return True
+    for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply):
+        if token.lower() in {p.lower() for p in allowed}:
+            return False
+    return True
+
+
+def _best_runbook_match(candidate: str, allowed: list[str]) -> str | None:
+    if not candidate or not allowed:
+        return None
+    best = None
+    best_score = 0.0
+    for path in allowed:
+        score = difflib.SequenceMatcher(a=candidate.lower(), b=path.lower()).ratio()
+        if score > best_score:
+            best_score = score
+            best = path
+    return best if best_score >= RUNBOOK_SIMILARITY_THRESHOLD else None
+
+
+def _resolve_path(data: Any, path: str) -> Any | None:
+    if path.startswith("line:"):
+        return path.split("line:", 1)[1].strip()
+    cursor = data
+    for part in re.split(r"\.(?![^\[]*\])", path):
+        if not part:
+            continue
+        match = re.match(r"^(\w+)(?:\[(\d+)\])?$", part)
+        if not match:
+            return None
+        key = match.group(1)
+        index = match.group(2)
+        if isinstance(cursor, dict):
+            cursor = cursor.get(key)
+        else:
+            return None
+        if index is not None:
+            idx = int(index)
+            if isinstance(cursor, list) and 0 <= idx < len(cursor):
+                cursor = cursor[idx]
+            else:
+                return None
+    return cursor
+
+
+def _snapshot_id(summary: dict[str, Any]) -> str | None:
+    if not summary:
+        return None
+    for key in ("generated_at", "snapshot_ts", "snapshot_id"):
+        value = summary.get(key)
+        if isinstance(value, str) and value:
+            return value
+    return None
+
+
+def _claims_to_payload(claims: list[ClaimItem]) -> list[dict[str, Any]]:
+    output: list[dict[str, Any]] = []
+    for claim in claims:
+        evidence = []
+        for ev in claim.evidence:
+            evidence.append(
+                {
+                    "path": ev.path,
+                    "reason": ev.reason,
+                    "value_at_claim": ev.value_at_claim,
+                }
+            )
+        output.append({"id": claim.id, "claim": claim.claim, "evidence": evidence})
+    return output
+
+
+def _state_from_payload(payload: dict[str, Any] | None) -> ConversationState | None:
+    if not payload:
+        return None
+    claims_raw = payload.get("claims") if isinstance(payload, dict) else None
+    claims: list[ClaimItem] = []
+    if isinstance(claims_raw, list):
+        for entry in claims_raw:
+            if not isinstance(entry, dict):
+                continue
+            claim_text = str(entry.get("claim") or "").strip()
+            claim_id = str(entry.get("id") or "").strip()
+            if not claim_text or not claim_id:
+                continue
+            evidence_items: list[EvidenceItem] = []
+            for ev in entry.get("evidence") or []:
+                if not isinstance(ev, dict):
+                    continue
+                path = str(ev.get("path") or "").strip()
+                if not path:
+                    continue
+                reason = str(ev.get("reason") or "").strip()
+                value_at_claim = ev.get("value_at_claim")
+                evidence_items.append(EvidenceItem(path=path, reason=reason, value_at_claim=value_at_claim))
+            if evidence_items:
+                claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
+    return ConversationState(
+        updated_at=float(payload.get("updated_at") or time.monotonic()),
+        claims=claims,
+        snapshot_id=payload.get("snapshot_id"),
+        snapshot=payload.get("snapshot"),
+    )
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/retrieval.py
+++ b/atlasbot/engine/answerer/retrieval.py
@ -0,0 +1,344 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import re
+from collections.abc import Awaitable
+from collections.abc import Callable
+from typing import Any
+
+from atlasbot.llm import prompts
+from atlasbot.llm.client import parse_json
+
+from ._base import *
+from .post_ext import _extract_keywords
+
+
+def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
+    raw = text.strip()
+    match = re.search(r"\{.*\}", raw, flags=re.S)
+    if match:
+        return parse_json(match.group(0), fallback=fallback)
+    return parse_json(raw, fallback=fallback)
+
+
+async def _select_metric_chunks(
+    call_llm: Callable[..., Awaitable[str]],
+    ctx: dict[str, Any],
+    chunks: list[dict[str, Any]],
+    plan: ModePlan,
+) -> tuple[list[str], list[str]]:
+    summary_lines, question, sub_questions, keywords, token_set = _metric_ctx_values(ctx)
+    if not summary_lines or not chunks:
+        return [], []
+    keys = _extract_metric_keys(summary_lines)
+    if not keys:
+        return [], []
+    max_keys = max(4, plan.max_subquestions * 2)
+    candidate_keys = _filter_metric_keys(keys, token_set)
+    available_keys = candidate_keys or keys
+    prompt = prompts.METRIC_KEYS_PROMPT.format(available="\n".join(available_keys), max_keys=max_keys)
+    raw = await call_llm(
+        prompts.METRIC_KEYS_SYSTEM,
+        prompt + "\nQuestion: " + str(question) + "\nSubQuestions:\n" + "\n".join([str(item) for item in sub_questions]),
+        context="Keywords:\n" + ", ".join([str(item) for item in keywords if item]),
+        model=plan.fast_model,
+        tag="metric_keys",
+    )
+    selected = _parse_key_list(raw, available_keys, max_keys)
+    if candidate_keys:
+        selected = _merge_metric_keys(selected, candidate_keys, max_keys)
+    if selected and candidate_keys and not _metric_key_overlap(selected, token_set):
+        selected = candidate_keys[:max_keys]
+    if not selected and candidate_keys:
+        selected = candidate_keys[:max_keys]
+    if available_keys:
+        missing = await _validate_metric_keys(
+            call_llm,
+            {
+                "question": question,
+                "sub_questions": sub_questions,
+                "selected": selected,
+            },
+            available_keys,
+            plan,
+        )
+        if missing:
+            selected = _merge_metric_keys(selected, missing, max_keys)
+    if not selected:
+        return [], []
+    ids = _chunk_ids_for_keys(chunks, selected)
+    return selected, ids
+
+
+async def _validate_metric_keys(
+    call_llm: Callable[..., Awaitable[str]],
+    ctx: dict[str, Any],
+    available: list[str],
+    plan: ModePlan,
+) -> list[str]:
+    if not available:
+        return []
+    question = str(ctx.get("question") or "")
+    sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
+    selected = ctx.get("selected") if isinstance(ctx.get("selected"), list) else []
+    cap = max(12, plan.max_subquestions * 4)
+    available_list = available[:cap]
+    prompt = prompts.METRIC_KEYS_VALIDATE_PROMPT.format(
+        question=question,
+        sub_questions=json.dumps(sub_questions),
+        selected=json.dumps(selected),
+        available="\n".join(available_list),
+    )
+    raw = await call_llm(
+        prompts.METRIC_KEYS_VALIDATE_SYSTEM,
+        prompt,
+        model=plan.fast_model,
+        tag="metric_keys_validate",
+    )
+    parsed = _parse_json_block(raw, fallback={})
+    items = parsed.get("missing") if isinstance(parsed, dict) else []
+    if not isinstance(items, list):
+        return []
+    available_set = set(available_list)
+    out: list[str] = []
+    for item in items:
+        if isinstance(item, str) and item in available_set and item not in out:
+            out.append(item)
+    return out
+
+
+async def _gather_limited(coros: list[Awaitable[Any]], limit: int) -> list[Any]:
+    if not coros:
+        return []
+    semaphore = asyncio.Semaphore(max(1, limit))
+
+    async def runner(coro: Awaitable[Any]) -> Any:
+        async with semaphore:
+            return await coro
+
+    return await asyncio.gather(*(runner(coro) for coro in coros))
+
+
+def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str], list[str], set[str]]:
+    summary_lines = ctx.get("summary_lines") if isinstance(ctx, dict) else None
+    if not isinstance(summary_lines, list):
+        return [], "", [], [], set()
+    question = ctx.get("question") if isinstance(ctx, dict) else ""
+    sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
+    keywords = ctx.get("keywords") if isinstance(ctx.get("keywords"), list) else []
+    keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx.get("keyword_tokens"), list) else []
+    token_set = {str(token).lower() for token in keyword_tokens if token}
+    token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)}
+    token_set = _token_variants(token_set)
+    return summary_lines, str(question), sub_questions, keywords, token_set
+
+
+def _extract_metric_keys(lines: list[str]) -> list[str]:
+    keys: list[str] = []
+    for line in lines:
+        if ":" not in line:
+            continue
+        key = line.split(":", 1)[0].strip()
+        if not key or " " in key:
+            continue
+        if key not in keys:
+            keys.append(key)
+    return keys
+
+
+def _token_variants(tokens: set[str]) -> set[str]:
+    if not tokens:
+        return set()
+    variants = set(tokens)
+    for token in list(tokens):
+        if len(token) <= TOKEN_MIN_LEN:
+            continue
+        if token.endswith("ies") and len(token) > TOKEN_MIN_LEN:
+            variants.add(token[:-3] + "y")
+        if token.endswith("es") and len(token) > TOKEN_MIN_LEN:
+            variants.add(token[:-2])
+        if token.endswith("s") and len(token) > TOKEN_MIN_LEN:
+            variants.add(token[:-1])
+    return variants
+
+
+def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
+    parsed = _parse_json_block(raw, fallback={})
+    if isinstance(parsed, list):
+        items = parsed
+    else:
+        items = parsed.get("keys") if isinstance(parsed, dict) else []
+    if not isinstance(items, list):
+        return []
+    allowed_set = set(allowed)
+    out: list[str] = []
+    for item in items:
+        if not isinstance(item, str):
+            continue
+        if item in allowed_set and item not in out:
+            out.append(item)
+        if len(out) >= max_keys:
+            break
+    return out
+
+
+def _chunk_ids_for_keys(chunks: list[dict[str, Any]], keys: list[str]) -> list[str]:
+    if not keys:
+        return []
+    ids: list[str] = []
+    key_set = {f"{key}:" for key in keys}
+    for chunk in chunks:
+        text = str(chunk.get("text") or "")
+        if not text:
+            continue
+        for line in text.splitlines():
+            for key in key_set:
+                if line.startswith(key):
+                    cid = chunk.get("id")
+                    if cid and cid not in ids:
+                        ids.append(cid)
+                    break
+    return ids
+
+
+def _filter_metric_keys(keys: list[str], tokens: set[str]) -> list[str]:
+    if not keys or not tokens:
+        return []
+    lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
+    ranked: list[tuple[int, str]] = []
+    for key in keys:
+        parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
+        if not parts:
+            continue
+        hits = len(set(parts) & lowered_tokens)
+        if hits:
+            ranked.append((hits, key))
+    ranked.sort(key=lambda item: (-item[0], item[1]))
+    return [item[1] for item in ranked]
+
+
+def _metric_key_overlap(keys: list[str], tokens: set[str]) -> bool:
+    if not keys or not tokens:
+        return False
+    lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
+    for key in keys:
+        parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
+        if set(parts) & lowered_tokens:
+            return True
+    return False
+
+
+def _lines_for_metric_keys(lines: list[str], keys: list[str], max_lines: int = 0) -> list[str]:
+    if not lines or not keys:
+        return []
+    prefixes = {f"{key}:" for key in keys}
+    selected: list[str] = []
+    for line in lines:
+        for prefix in prefixes:
+            if prefix in line:
+                selected.append(line)
+                break
+        if max_lines and len(selected) >= max_lines:
+            break
+    return selected
+
+
+def _merge_metric_keys(current: list[str], candidates: list[str], max_keys: int) -> list[str]:
+    merged: list[str] = []
+    seen = set()
+    for key in current:
+        if key and key not in seen:
+            merged.append(key)
+            seen.add(key)
+    for key in candidates:
+        if key and key not in seen:
+            merged.append(key)
+            seen.add(key)
+        if len(merged) >= max_keys:
+            break
+    return merged[:max_keys]
+
+
+def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
+    seen = set()
+    merged: list[str] = []
+    for line in primary + fallback:
+        if line in seen:
+            continue
+        seen.add(line)
+        merged.append(line)
+    return merged
+
+
+def _expand_hottest_line(line: str) -> list[str]:
+    if not line:
+        return []
+    if not line.lower().startswith("hottest:"):
+        return []
+    expanded: list[str] = []
+    payload = line.split("hottest:", 1)[1]
+    for part in payload.split(";"):
+        part = part.strip()
+        if not part or "=" not in part:
+            continue
+        metric, rest = part.split("=", 1)
+        metric = metric.strip()
+        match = re.search(r"(?P<node>[^\s\[]+).*\((?P<value>[^)]+)\)", rest)
+        if not match:
+            continue
+        node = match.group("node").strip()
+        value = match.group("value").strip()
+        class_match = re.search(r"\[(?P<class>[^\]]+)\]", rest)
+        node_class = class_match.group("class").strip() if class_match else ""
+        if node_class:
+            expanded.append(f"hottest_{metric}_node: {node} [{node_class}] ({value})")
+        else:
+            expanded.append(f"hottest_{metric}_node: {node} ({value})")
+    return expanded
+
+
+def _has_token(text: str, token: str) -> bool:
+    if not text or not token:
+        return False
+    if token == "io":
+        return "i/o" in text or re.search(r"\bio\b", text) is not None
+    return re.search(rf"\b{re.escape(token)}\b", text) is not None
+
+
+def _hotspot_evidence(summary: dict[str, Any]) -> list[str]:
+    hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
+    if not hottest:
+        return []
+    hardware_by_node = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
+    node_pods_top = summary.get("node_pods_top") if isinstance(summary.get("node_pods_top"), list) else []
+    ns_map = {}
+    for item in node_pods_top:
+        if not isinstance(item, dict):
+            continue
+        node = item.get("node")
+        namespaces_top = item.get("namespaces_top") if isinstance(item.get("namespaces_top"), list) else []
+        ns_map[node] = namespaces_top
+    lines: list[str] = []
+    for metric, info in hottest.items():
+        if not isinstance(info, dict):
+            continue
+        node = info.get("node")
+        value = info.get("value")
+        if not node:
+            continue
+        node_class = hardware_by_node.get(node)
+        ns_parts = []
+        for entry in ns_map.get(node, [])[:3]:
+            if isinstance(entry, (list, tuple)) and len(entry) >= NS_ENTRY_MIN_LEN:
+                ns_parts.append(f"{entry[0]}={entry[1]}")
+        ns_text = ", ".join(ns_parts)
+        value_text = f"{value:.2f}" if isinstance(value, (int, float)) else str(value)
+        line = f"hotspot.{metric}: node={node} class={node_class or 'unknown'} value={value_text}"
+        if ns_text:
+            line += f" namespaces_top={ns_text}"
+        lines.append(line)
+    return lines
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/retrieval_ext.py
+++ b/atlasbot/engine/answerer/retrieval_ext.py
@ -0,0 +1,197 @@
+from __future__ import annotations
+
+import re
+from collections.abc import Callable
+from typing import Any
+
+from atlasbot.llm import prompts
+from atlasbot.llm.client import parse_json
+from ._base import *
+
+
+def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
+    raw = text.strip()
+    match = re.search(r"\{.*\}", raw, flags=re.S)
+    if match:
+        return parse_json(match.group(0), fallback=fallback)
+    return parse_json(raw, fallback=fallback)
+
+
+def _metric_key_tokens(summary_lines: list[str]) -> set[str]:
+    tokens: set[str] = set()
+    for line in summary_lines:
+        if not isinstance(line, str) or ":" not in line:
+            continue
+        key = line.split(":", 1)[0].strip().lower()
+        if not key:
+            continue
+        tokens.add(key)
+        for part in re.split(r"[_\s]+", key):
+            if part:
+                tokens.add(part)
+    return tokens
+
+
+async def _select_best_candidate(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, tag: str) -> int:
+    if len(candidates) <= 1:
+        return 0
+    prompt = (
+        prompts.CANDIDATE_SELECT_PROMPT
+        + "\nQuestion: "
+        + question
+        + "\nCandidates:\n"
+        + "\n".join([f"{idx+1}) {cand}" for idx, cand in enumerate(candidates)])
+    )
+    raw = await call_llm(prompts.CANDIDATE_SELECT_SYSTEM, prompt, model=plan.model, tag=tag)
+    data = _parse_json_block(raw, fallback={})
+    best = data.get("best") if isinstance(data, dict) else None
+    if isinstance(best, int) and 1 <= best <= len(candidates):
+        return best - 1
+    return 0
+
+
+def _dedupe_lines(lines: list[str], limit: int | None = None) -> list[str]:
+    seen: set[str] = set()
+    cleaned: list[str] = []
+    for line in lines:
+        value = (line or "").strip()
+        if not value or value in seen:
+            continue
+        if value.lower().startswith("lexicon_") or value.lower().startswith("units:"):
+            continue
+        cleaned.append(value)
+        seen.add(value)
+        if limit and len(cleaned) >= limit:
+            break
+    return cleaned
+
+
+def _collect_fact_candidates(selected: list[dict[str, Any]], limit: int) -> list[str]:
+    lines: list[str] = []
+    for chunk in selected:
+        text = chunk.get("text") if isinstance(chunk, dict) else None
+        if not isinstance(text, str):
+            continue
+        lines.extend([line for line in text.splitlines() if line.strip()])
+    return _dedupe_lines(lines, limit=limit)
+
+
+async def _select_best_list(call_llm: Callable[..., Any], question: str, candidates: list[list[str]], plan: ModePlan, tag: str) -> list[str]:
+    if not candidates:
+        return []
+    if len(candidates) == 1:
+        return candidates[0]
+    render = ["; ".join(items) for items in candidates]
+    best_idx = await _select_best_candidate(call_llm, question, render, plan, tag)
+    chosen = candidates[best_idx] if 0 <= best_idx < len(candidates) else candidates[0]
+    if not chosen:
+        merged: list[str] = []
+        for entry in candidates:
+            for item in entry:
+                if item not in merged:
+                    merged.append(item)
+        chosen = merged
+    return chosen
+
+
+async def _extract_fact_types(call_llm: Callable[..., Any], question: str, keywords: list[str], plan: ModePlan) -> list[str]:
+    prompt = prompts.FACT_TYPES_PROMPT + "\nQuestion: " + question
+    if keywords:
+        prompt += "\nKeywords: " + ", ".join(keywords)
+    candidates: list[list[str]] = []
+    attempts = max(plan.metric_retries, 1)
+    for _ in range(attempts):
+        raw = await call_llm(prompts.FACT_TYPES_SYSTEM, prompt, model=plan.fast_model, tag="fact_types")
+        data = _parse_json_block(raw, fallback={})
+        items = data.get("fact_types") if isinstance(data, dict) else None
+        if not isinstance(items, list):
+            continue
+        cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=10)
+        if cleaned:
+            candidates.append(cleaned)
+    chosen = await _select_best_list(call_llm, question, candidates, plan, "fact_types_select")
+    return chosen[:10]
+
+
+async def _derive_signals(call_llm: Callable[..., Any], question: str, fact_types: list[str], plan: ModePlan) -> list[str]:
+    if not fact_types:
+        return []
+    prompt = prompts.SIGNAL_PROMPT.format(question=question, fact_types="; ".join(fact_types))
+    candidates: list[list[str]] = []
+    attempts = max(plan.metric_retries, 1)
+    for _ in range(attempts):
+        raw = await call_llm(prompts.SIGNAL_SYSTEM, prompt, model=plan.fast_model, tag="signals")
+        data = _parse_json_block(raw, fallback={})
+        items = data.get("signals") if isinstance(data, dict) else None
+        if not isinstance(items, list):
+            continue
+        cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=12)
+        if cleaned:
+            candidates.append(cleaned)
+    chosen = await _select_best_list(call_llm, question, candidates, plan, "signals_select")
+    return chosen[:12]
+
+
+async def _scan_chunk_for_signals(call_llm: Callable[..., Any], question: str, signals: list[str], chunk_lines: list[str], plan: ModePlan) -> list[str]:
+    if not signals or not chunk_lines:
+        return []
+    prompt = prompts.CHUNK_SCAN_PROMPT.format(
+        signals="; ".join(signals),
+        lines="\n".join(chunk_lines),
+    )
+    attempts = max(1, min(plan.metric_retries, 2))
+    candidates: list[list[str]] = []
+    for _ in range(attempts):
+        raw = await call_llm(prompts.CHUNK_SCAN_SYSTEM, prompt, model=plan.fast_model, tag="chunk_scan")
+        data = _parse_json_block(raw, fallback={})
+        items = data.get("lines") if isinstance(data, dict) else None
+        if not isinstance(items, list):
+            continue
+        cleaned = [line for line in chunk_lines if line in items]
+        cleaned = _dedupe_lines(cleaned, limit=15)
+        if cleaned:
+            candidates.append(cleaned)
+    chosen = await _select_best_list(call_llm, question, candidates, plan, "chunk_scan_select")
+    return chosen[:15]
+
+
+async def _prune_metric_candidates(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, attempts: int) -> list[str]:
+    if not candidates:
+        return []
+    prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=6)
+    picks: list[list[str]] = []
+    for _ in range(max(attempts, 1)):
+        raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_prune")
+        data = _parse_json_block(raw, fallback={})
+        items = data.get("lines") if isinstance(data, dict) else None
+        if not isinstance(items, list):
+            continue
+        cleaned = [line for line in candidates if line in items]
+        cleaned = _dedupe_lines(cleaned, limit=6)
+        if cleaned:
+            picks.append(cleaned)
+    chosen = await _select_best_list(call_llm, question, picks, plan, "fact_prune_select")
+    return chosen[:6]
+
+
+async def _select_fact_lines(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, max_lines: int) -> list[str]:
+    if not candidates:
+        return []
+    prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=max_lines)
+    picks: list[list[str]] = []
+    attempts = max(plan.metric_retries, 1)
+    for _ in range(attempts):
+        raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_select")
+        data = _parse_json_block(raw, fallback={})
+        items = data.get("lines") if isinstance(data, dict) else None
+        if not isinstance(items, list):
+            continue
+        cleaned = [line for line in candidates if line in items]
+        cleaned = _dedupe_lines(cleaned, limit=max_lines)
+        if cleaned:
+            picks.append(cleaned)
+    chosen = await _select_best_list(call_llm, question, picks, plan, "fact_select_best")
+    return chosen[:max_lines]
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/spine.py
+++ b/atlasbot/engine/answerer/spine.py
@ -0,0 +1,404 @@
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from atlasbot.engine.intent_router import IntentMatch
+from atlasbot.snapshot.builder import summary_text
+
+from ._base import *
+
+
+def _join_context(parts: list[str]) -> str:
+    text = "\n".join([part for part in parts if part])
+    return text.strip()
+
+
+def _format_metric_value(value: Any) -> str:
+    if isinstance(value, bool):
+        return str(value).lower()
+    if isinstance(value, int):
+        return str(value)
+    if isinstance(value, float):
+        return f"{value:.1f}".rstrip("0").rstrip(".")
+    return str(value)
+
+
+def _format_history(history: list[dict[str, str]] | None) -> str:
+    if not history:
+        return ""
+    lines = ["Recent conversation (non-authoritative):"]
+    for entry in history[-4:]:
+        if not isinstance(entry, dict):
+            continue
+        question = entry.get("q")
+        answer = entry.get("a")
+        role = entry.get("role")
+        content = entry.get("content")
+        if question:
+            lines.append(f"Q: {question}")
+        if answer:
+            lines.append(f"A: {answer}")
+        if role and content:
+            prefix = "Q" if role == "user" else "A"
+            lines.append(f"{prefix}: {content}")
+    return "\n".join(lines)
+
+
+def _summary_lines(snapshot: dict[str, Any] | None) -> list[str]:
+    text = summary_text(snapshot)
+    if not text:
+        return []
+    return [line for line in text.splitlines() if line.strip()]
+
+
+def _line_starting_with(lines: list[str], prefix: str) -> str | None:
+    if not lines:
+        return None
+    for line in lines:
+        if line.lower().startswith(prefix.lower()):
+            return line
+    return None
+
+
+def _spine_lines(lines: list[str]) -> dict[str, str]:
+    spine: dict[str, str] = {}
+    _spine_nodes(lines, spine)
+    _spine_hardware(lines, spine)
+    _spine_hottest(lines, spine)
+    _spine_postgres(lines, spine)
+    _spine_namespaces(lines, spine)
+    _spine_pressure(lines, spine)
+    return spine
+
+
+def _spine_nodes(lines: list[str], spine: dict[str, str]) -> None:
+    nodes_line = _line_starting_with(lines, "nodes:")
+    if nodes_line:
+        spine["nodes_count"] = nodes_line
+        spine["nodes_ready"] = nodes_line
+        return
+    nodes_total = _line_starting_with(lines, "nodes_total:")
+    nodes_ready = _line_starting_with(lines, "nodes_ready:")
+    if nodes_total:
+        spine["nodes_count"] = nodes_total
+    if nodes_ready:
+        spine["nodes_ready"] = nodes_ready
+
+
+def _spine_hardware(lines: list[str], spine: dict[str, str]) -> None:
+    hardware_line = _line_starting_with(lines, "hardware_nodes:")
+    if not hardware_line:
+        hardware_line = _line_starting_with(lines, "hardware:")
+    if hardware_line:
+        spine["nodes_non_rpi"] = hardware_line
+
+
+def _spine_hottest(lines: list[str], spine: dict[str, str]) -> None:
+    hottest_line = _line_starting_with(lines, "hottest:")
+    if not hottest_line:
+        return
+    for key in ("hottest_cpu", "hottest_ram", "hottest_net", "hottest_io", "hottest_disk"):
+        spine[key] = hottest_line
+
+
+def _spine_postgres(lines: list[str], spine: dict[str, str]) -> None:
+    postgres_total = _line_starting_with(lines, "postgres_connections_total:")
+    if postgres_total:
+        spine["postgres_connections"] = postgres_total
+    postgres_line = _line_starting_with(lines, "postgres:")
+    if postgres_line:
+        spine["postgres_hottest"] = postgres_line
+
+
+def _spine_namespaces(lines: list[str], spine: dict[str, str]) -> None:
+    namespaces_top = _line_starting_with(lines, "namespaces_top:")
+    if namespaces_top:
+        spine["namespace_most_pods"] = namespaces_top
+
+
+def _spine_pressure(lines: list[str], spine: dict[str, str]) -> None:
+    pressure_line = _line_starting_with(lines, "pressure_nodes:")
+    if pressure_line:
+        spine["pressure_summary"] = pressure_line
+        return
+    load_line = _line_starting_with(lines, "node_load_top:")
+    if load_line:
+        spine["pressure_summary"] = load_line
+
+
+def _parse_group_line(line: str) -> dict[str, list[str]]:
+    groups: dict[str, list[str]] = {}
+    if not line:
+        return groups
+    payload = line.split(":", 1)[1] if ":" in line else line
+    for part in payload.split(";"):
+        part = part.strip()
+        if not part or "=" not in part:
+            continue
+        key, value = part.split("=", 1)
+        value = value.strip()
+        nodes: list[str] = []
+        if "(" in value and ")" in value:
+            inner = value[value.find("(") + 1 : value.rfind(")")]
+            nodes = [item.strip() for item in inner.split(",") if item.strip()]
+        if not nodes:
+            cleaned = re.sub(r"^[0-9]+", "", value).strip()
+            nodes = [item.strip() for item in cleaned.split(",") if item.strip()]
+        groups[key.strip()] = nodes
+    return groups
+
+
+def _parse_hottest(line: str, metric: str) -> str | None:
+    if not line:
+        return None
+    payload = line.split(":", 1)[1] if ":" in line else line
+    for part in payload.split(";"):
+        part = part.strip()
+        if part.startswith(f"{metric}="):
+            return part
+    return None
+
+
+def _spine_answer(intent: IntentMatch, spine_line: str | None) -> str | None:
+    if not spine_line:
+        return None
+    handlers = {
+        "nodes_count": _spine_nodes_answer,
+        "nodes_ready": _spine_nodes_answer,
+        "nodes_non_rpi": _spine_non_rpi_answer,
+        "hardware_mix": _spine_hardware_answer,
+        "postgres_connections": _spine_postgres_answer,
+        "postgres_hottest": _spine_postgres_answer,
+        "namespace_most_pods": _spine_namespace_answer,
+        "pressure_summary": _spine_pressure_answer,
+    }
+    kind = intent.kind
+    if kind.startswith("hottest_"):
+        return _spine_hottest_answer(kind, spine_line)
+    handler = handlers.get(kind)
+    if handler:
+        return handler(spine_line)
+    return spine_line
+
+
+def _spine_nodes_answer(line: str) -> str:
+    return line
+
+
+def _spine_non_rpi_answer(line: str) -> str:
+    groups = _parse_group_line(line)
+    non_rpi: list[str] = []
+    for key, nodes in groups.items():
+        if key.lower().startswith("rpi"):
+            continue
+        non_rpi.extend(nodes)
+    if non_rpi:
+        return "Non-Raspberry Pi nodes: " + ", ".join(non_rpi) + "."
+    return line
+
+
+def _spine_hardware_answer(line: str) -> str:
+    return line
+
+
+def _spine_hottest_answer(kind: str, line: str) -> str:
+    metric = kind.split("_", 1)[1]
+    hottest = _parse_hottest(line, metric)
+    if hottest:
+        return hottest
+    return line
+
+
+def _spine_postgres_answer(line: str) -> str:
+    return line
+
+
+def _spine_namespace_answer(line: str) -> str:
+    payload = line.split(":", 1)[1] if ":" in line else line
+    top = payload.split(";")[0].strip()
+    if top:
+        return f"Namespace with most pods: {top}."
+    return line
+
+
+def _spine_pressure_answer(line: str) -> str:
+    return line
+
+
+def _spine_from_summary(summary: dict[str, Any]) -> dict[str, str]:
+    if not isinstance(summary, dict) or not summary:
+        return {}
+    spine: dict[str, str] = {}
+    spine.update(_spine_from_counts(summary))
+    spine.update(_spine_from_hardware(summary))
+    spine.update(_spine_from_hottest(summary))
+    spine.update(_spine_from_postgres(summary))
+    spine.update(_spine_from_namespace_pods(summary))
+    spine.update(_spine_from_pressure(summary))
+    return spine
+
+
+def _spine_from_counts(summary: dict[str, Any]) -> dict[str, str]:
+    counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
+    inventory = summary.get("inventory") if isinstance(summary.get("inventory"), dict) else {}
+    nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
+    workers = inventory.get("workers") if isinstance(inventory.get("workers"), dict) else {}
+    total = nodes.get("total")
+    ready = nodes.get("ready")
+    not_ready = nodes.get("not_ready")
+    if total is None:
+        total = counts.get("nodes_total")
+    if ready is None:
+        ready = counts.get("nodes_ready")
+    if not_ready is None and isinstance(inventory.get("not_ready_names"), list):
+        not_ready = len(inventory.get("not_ready_names") or [])
+    workers_ready = workers.get("ready")
+    workers_total = workers.get("total")
+    if total is None and ready is None and not_ready is None:
+        return {}
+    parts = []
+    if total is not None:
+        parts.append(f"total={int(total)}")
+    if ready is not None:
+        parts.append(f"ready={int(ready)}")
+    if not_ready is not None:
+        parts.append(f"not_ready={int(not_ready)}")
+    if workers_total is not None and workers_ready is not None:
+        parts.append(f"workers_ready={int(workers_ready)}/{int(workers_total)}")
+    line = "nodes: " + ", ".join(parts)
+    return {"nodes_count": line, "nodes_ready": line}
+
+
+def _spine_from_hardware(summary: dict[str, Any]) -> dict[str, str]:
+    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
+    if not hardware:
+        return {}
+    parts = []
+    for key, nodes in hardware.items():
+        if not isinstance(nodes, list):
+            continue
+        node_list = ", ".join(str(n) for n in nodes if n)
+        if node_list:
+            parts.append(f"{key}=({node_list})")
+    if not parts:
+        return {}
+    return {"nodes_non_rpi": "hardware: " + "; ".join(parts)}
+
+
+def _spine_from_hottest(summary: dict[str, Any]) -> dict[str, str]:
+    hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
+    top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
+    top_hottest = top.get("node_hottest") if isinstance(top.get("node_hottest"), dict) else {}
+    if not hottest and top_hottest:
+        hottest = top_hottest
+    elif top_hottest:
+        for key, value in top_hottest.items():
+            if key not in hottest and value is not None:
+                hottest[key] = value
+    if not hottest:
+        return {}
+    mapping = {}
+    for key in ("cpu", "ram", "net", "io", "disk"):
+        entry = hottest.get(key)
+        if not isinstance(entry, dict):
+            continue
+        node = entry.get("node") or entry.get("label") or ""
+        value = entry.get("value")
+        if node:
+            mapping[f"hottest_{key}"] = f"{key}={node} ({_format_metric_value(value)})"
+    if not mapping:
+        return {}
+    return mapping
+
+
+def _spine_from_postgres(summary: dict[str, Any]) -> dict[str, str]:
+    postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
+    if not postgres:
+        top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
+        postgres = top.get("postgres") if isinstance(top.get("postgres"), dict) else {}
+    if not postgres:
+        return {}
+    used = postgres.get("used")
+    max_conn = postgres.get("max")
+    hottest = postgres.get("hottest_db") if isinstance(postgres.get("hottest_db"), dict) else {}
+    hottest_label = hottest.get("label") or ""
+    facts: dict[str, str] = {}
+    if used is not None and max_conn is not None:
+        facts["postgres_connections"] = f"postgres_connections_total: used={int(used)}, max={int(max_conn)}"
+    if hottest_label:
+        facts["postgres_hottest"] = f"postgres_hottest_db: {hottest_label}"
+    return facts
+
+
+def _spine_from_namespace_pods(summary: dict[str, Any]) -> dict[str, str]:
+    pods = summary.get("namespace_pods") if isinstance(summary.get("namespace_pods"), list) else []
+    if not pods:
+        top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
+        pods = top.get("namespace_pods") if isinstance(top.get("namespace_pods"), list) else []
+    if not pods:
+        return {}
+    best_name = ""
+    best_value = None
+    for entry in pods:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("namespace") or entry.get("name") or entry.get("label") or ""
+        value = entry.get("pods")
+        if value is None:
+            value = entry.get("pods_total")
+        if value is None:
+            value = entry.get("value")
+        try:
+            numeric = float(value)
+        except (TypeError, ValueError):
+            numeric = None
+        if name and numeric is not None and (best_value is None or numeric > best_value):
+            best_name = name
+            best_value = numeric
+    if best_name:
+        return {"namespace_most_pods": f"namespace_most_pods: {best_name} ({int(best_value or 0)} pods)"}
+    return {}
+
+
+def _spine_from_pressure(summary: dict[str, Any]) -> dict[str, str]:
+    pressure = summary.get("pressure_summary") if isinstance(summary.get("pressure_summary"), dict) else {}
+    if not pressure:
+        pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
+    if not pressure:
+        return {}
+    total = pressure.get("total")
+    unsched = pressure.get("unschedulable")
+    names = pressure.get("names") if isinstance(pressure.get("names"), list) else []
+    parts = []
+    if total is None and names:
+        total = len([name for name in names if name])
+    if total is not None:
+        parts.append(f"total={int(total)}")
+    if unsched is not None:
+        parts.append(f"unschedulable={int(unsched)}")
+    if parts:
+        return {"pressure_summary": "pressure_nodes: " + ", ".join(parts)}
+    return {}
+
+
+def _spine_fallback(intent: IntentMatch, lines: list[str]) -> str | None:
+    if not lines:
+        return None
+    keywords = {
+        "nodes_count": ("nodes:", "nodes_total:"),
+        "nodes_ready": ("nodes:", "nodes_ready:"),
+        "postgres_hottest": ("postgres_hottest", "hottest_db", "postgres"),
+        "namespace_most_pods": ("namespace", "pods", "namespaces_top"),
+        "pressure_summary": ("pressure", "node_load_top"),
+    }
+    for token in keywords.get(intent.kind, ("",)):
+        if not token:
+            continue
+        for line in lines:
+            if token in line:
+                return line
+    return None
+
+
+__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/workflow.py
+++ b/atlasbot/engine/answerer/workflow.py
@ -0,0 +1,484 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import math
+import re
+import time
+from collections.abc import Callable
+from typing import Any
+
+from atlasbot.engine.intent_router import route_intent
+from atlasbot.llm import prompts
+from atlasbot.llm.client import build_messages
+from atlasbot.snapshot.builder import build_summary
+
+from ._base import *
+from .common import *
+from .factsheet import *
+from .post import *
+from .post_ext import *
+from .retrieval import *
+from .retrieval_ext import *
+from .spine import *
+from .workflow_post import finalize_answer
+
+async def run_answer(engine: Any, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult:  # noqa: C901
+    """Answer a question using the staged reasoning pipeline."""
+
+    settings = engine._settings
+    question = (question or "").strip()
+    if not question:
+        return AnswerResult("I need a question to answer.", _default_scores(), {"mode": mode})
+    if mode == "stock":
+        return await engine._answer_stock(question)
+
+    limitless = "run limitless" in question.lower()
+    if limitless:
+        question = re.sub(r"(?i)run limitless", "", question).strip()
+
+    plan = _mode_plan(settings, mode)
+    call_limit = _llm_call_limit(settings, mode)
+    call_cap = math.ceil(call_limit * settings.llm_limit_multiplier)
+    call_count = 0
+    limit_hit = False
+    time_budget_hit = False
+    started = time.monotonic()
+    time_budget_sec = _mode_time_budget(settings, mode) if not limitless else 0.0
+
+    debug_tags = {
+        "route",
+        "decompose",
+        "chunk_score",
+        "chunk_select",
+        "fact_select",
+        "synth",
+        "subanswer",
+        "tool",
+        "followup",
+        "select_claims",
+        "evidence_fix",
+    }
+
+    async def call_llm(system: str, prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
+        nonlocal call_count, limit_hit, time_budget_hit
+        if not limitless and call_count >= call_cap:
+            limit_hit = True
+            raise LLMLimitReached("llm_limit")
+        timeout_sec = None
+        if not limitless and time_budget_sec > 0:
+            time_left = time_budget_sec - (time.monotonic() - started)
+            if time_left <= 0:
+                time_budget_hit = True
+                raise LLMTimeBudgetExceeded("time_budget")
+            timeout_sec = min(settings.ollama_timeout_sec, time_left)
+        call_count += 1
+        messages = build_messages(system, prompt, context=context)
+        try:
+            llm_call = engine._llm.chat(messages, model=model or plan.model, timeout_sec=timeout_sec)
+            if timeout_sec is not None:
+                response = await asyncio.wait_for(llm_call, timeout=max(0.001, timeout_sec))
+            else:
+                response = await llm_call
+        except TimeoutError as exc:
+            time_budget_hit = True
+            raise LLMTimeBudgetExceeded("time_budget") from exc
+        log.info(
+            "atlasbot_llm_call",
+            extra={"extra": {"mode": mode, "tag": tag, "call": call_count, "limit": call_cap}},
+        )
+        if settings.debug_pipeline and tag in debug_tags:
+            _debug_pipeline_log(settings, f"llm_raw_{tag}", str(response)[:1200])
+        return response
+
+    state = engine._get_state(conversation_id)
+    pin_snapshot = bool(snapshot_pin) or settings.snapshot_pin_enabled
+    snapshot = engine._snapshot.get()
+    snapshot_used = state.snapshot if pin_snapshot and state and state.snapshot else snapshot
+    summary = build_summary(snapshot_used)
+    summary_lines = _summary_lines(snapshot_used)
+    allowed_nodes = _allowed_nodes(summary)
+    allowed_namespaces = _allowed_namespaces(summary)
+    spine = _spine_from_summary(summary) or _spine_lines(summary_lines)
+    metric_tokens = _metric_key_tokens(summary_lines)
+    global_facts = _global_facts(summary_lines)
+    kb_summary = engine._kb.summary()
+    runbooks = engine._kb.runbook_titles(limit=6)
+    runbook_paths = engine._kb.runbook_paths(limit=10)
+    history_ctx = _format_history(history)
+    lexicon_ctx = _lexicon_context(summary)
+
+    key_facts: list[str] = []
+    metric_facts: list[str] = []
+    facts_used: list[str] = []
+    reply = ""
+    scores = _default_scores()
+    claims: list[ClaimItem] = []
+    classify: dict[str, Any] = {}
+    tool_hint: dict[str, Any] | None = None
+
+    try:
+        if mode in {"quick", "fast", "smart", "genius"} and not limitless:
+            if observer:
+                observer("factsheet", "building fact sheet")
+            if _is_plain_math_question(question):
+                reply = (
+                    "I focus on Titan cluster operations. Ask me about cluster health, nodes, workloads, "
+                    "namespaces, storage, or alerts."
+                )
+                return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
+            kb_lines = (
+                engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=_factsheet_kb_chars(mode, plan.kb_max_chars))
+                if engine._kb
+                else []
+            )
+            fact_lines = _quick_fact_sheet_lines(question, summary_lines, kb_lines, limit=_factsheet_line_limit(mode))
+            classify = {
+                "needs_snapshot": True,
+                "needs_kb": bool(kb_lines),
+                "question_type": f"{mode}_factsheet",
+                "answer_style": "direct" if mode in {"quick", "fast"} else "concise",
+                "follow_up": False,
+            }
+            heuristic_reply = _quick_fact_sheet_heuristic_answer(question, fact_lines)
+            if heuristic_reply:
+                return AnswerResult(heuristic_reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
+            if observer:
+                observer("quick", "answering from fact sheet")
+            quick_context = _quick_fact_sheet_text(fact_lines)
+            quick_prompt = "Question: " + question + "\nAnswer using only the Fact Sheet. " + _factsheet_instruction(mode)
+            reply = await call_llm(prompts.ANSWER_SYSTEM, quick_prompt, context=quick_context, model=_factsheet_model(mode, plan), tag=f"{mode}_factsheet")
+            reply = _strip_followup_meta(reply)
+            return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
+
+        if observer:
+            observer("normalize", "normalizing")
+        normalize_prompt = prompts.NORMALIZE_PROMPT + "\nQuestion: " + question
+        normalize_raw = await call_llm(prompts.NORMALIZE_SYSTEM, normalize_prompt, context=lexicon_ctx, model=plan.fast_model, tag="normalize")
+        normalize = _parse_json_block(normalize_raw, fallback={"normalized": question, "keywords": []})
+        normalized = str(normalize.get("normalized") or question).strip() or question
+        keywords = normalize.get("keywords") or []
+        _debug_pipeline_log(settings, "normalize_parsed", {"normalized": normalized, "keywords": keywords})
+        keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
+        question_tokens = _extract_question_tokens(normalized)
+
+        if observer:
+            observer("route", "routing")
+        route_prompt = prompts.ROUTE_PROMPT + "\nQuestion: " + normalized + "\nKeywords: " + json.dumps(keywords)
+        route_raw = await call_llm(prompts.ROUTE_SYSTEM, route_prompt, context=_join_context([kb_summary, lexicon_ctx]), model=plan.fast_model, tag="route")
+        classify = _parse_json_block(route_raw, fallback={})
+        classify.setdefault("needs_snapshot", True)
+        classify.setdefault("answer_style", "direct")
+        classify.setdefault("follow_up", False)
+        classify.setdefault("focus_entity", "unknown")
+        classify.setdefault("focus_metric", "unknown")
+        if metric_tokens and keyword_tokens and any(token in metric_tokens for token in keyword_tokens):
+            classify["needs_snapshot"] = True
+        intent = route_intent(normalized)
+        if intent:
+            classify["needs_snapshot"] = True
+            classify["question_type"] = "metric"
+        _debug_pipeline_log(settings, "route_parsed", {"classify": classify, "normalized": normalized})
+        lowered_question = f"{question} {normalized}".lower()
+        force_metric = bool(re.search(r"\bhow many\b|\bcount\b|\btotal\b", lowered_question))
+        if any(term in lowered_question for term in ("postgres", "connections", "pvc", "ready")):
+            force_metric = True
+
+        if intent:
+            spine_line = spine.get(intent.kind) if isinstance(spine, dict) else None
+            if not spine_line:
+                spine_line = _spine_fallback(intent, summary_lines)
+            spine_answer = _spine_answer(intent, spine_line)
+            if spine_line:
+                key_facts = _merge_fact_lines([spine_line], key_facts)
+                metric_facts = _merge_fact_lines([spine_line], metric_facts)
+            if spine_answer and mode in {"fast", "quick"}:
+                return AnswerResult(spine_answer, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
+
+        cluster_terms = (
+            "atlas",
+            "cluster",
+            "node",
+            "nodes",
+            "namespace",
+            "pod",
+            "workload",
+            "k8s",
+            "kubernetes",
+            "postgres",
+            "database",
+            "db",
+            "connections",
+            "cpu",
+            "ram",
+            "memory",
+            "network",
+            "io",
+            "disk",
+            "pvc",
+            "storage",
+        )
+        has_cluster_terms = any(term in lowered_question for term in cluster_terms)
+        if has_cluster_terms:
+            classify["needs_snapshot"] = True
+        lowered_norm = normalized.lower()
+        if ("namespace" in lowered_norm and ("pod" in lowered_norm or "pods" in lowered_norm)) or re.search(r"\bmost\s+pods\b", lowered_norm) or re.search(r"\bpods\s+running\b", lowered_norm):
+            classify["question_type"] = "metric"
+            classify["needs_snapshot"] = True
+        if re.search(r"\b(how many|count|number of|list)\b", lowered_question):
+            classify["question_type"] = "metric"
+        if any(term in lowered_question for term in ("postgres", "connections", "db")):
+            classify["question_type"] = "metric"
+            classify["needs_snapshot"] = True
+        if any(term in lowered_question for term in ("pvc", "persistentvolume", "persistent volume", "storage")):
+            if classify.get("question_type") not in {"metric", "diagnostic"}:
+                classify["question_type"] = "metric"
+            classify["needs_snapshot"] = True
+        if "ready" in lowered_question and classify.get("question_type") not in {"metric", "diagnostic"}:
+            classify["question_type"] = "diagnostic"
+        hottest_terms = ("hottest", "highest", "lowest", "most")
+        metric_terms = ("cpu", "ram", "memory", "net", "network", "io", "disk", "load", "usage", "pod", "pods", "namespace")
+        if any(term in lowered_question for term in hottest_terms) and any(term in lowered_question for term in metric_terms):
+            classify["question_type"] = "metric"
+        baseline_terms = ("baseline", "delta", "trend", "increase", "decrease", "drop", "spike", "regression", "change")
+        if any(term in lowered_question for term in baseline_terms) and any(term in lowered_question for term in metric_terms):
+            classify["question_type"] = "metric"
+            classify["needs_snapshot"] = True
+
+        if not classify.get("follow_up") and state and state.claims:
+            follow_terms = ("there", "that", "those", "these", "it", "them", "that one", "this", "former", "latter")
+            is_metric_query = force_metric or classify.get("question_type") in {"metric", "diagnostic"}
+            if not is_metric_query and (
+                any(term in lowered_question for term in follow_terms)
+                or (len(normalized.split()) <= FOLLOWUP_SHORT_WORDS and not has_cluster_terms)
+            ):
+                classify["follow_up"] = True
+
+        if classify.get("follow_up") and state and state.claims:
+            if observer:
+                observer("followup", "answering follow-up")
+            reply = await engine._answer_followup(question, state, summary, classify, plan, call_llm)
+            scores = await engine._score_answer(question, reply, plan, call_llm)
+            return AnswerResult(reply, scores, _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
+
+        if observer:
+            observer("decompose", "decomposing")
+        decompose_prompt = prompts.DECOMPOSE_PROMPT.format(max_parts=plan.max_subquestions * 2)
+        decompose_raw = await call_llm(prompts.DECOMPOSE_SYSTEM, decompose_prompt + "\nQuestion: " + normalized, context=lexicon_ctx, model=plan.fast_model if mode == "quick" else plan.model, tag="decompose")
+        parts = _parse_json_list(decompose_raw)
+        sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
+        _debug_pipeline_log(settings, "decompose_parsed", {"sub_questions": sub_questions})
+        keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
+
+        snapshot_context = ""
+        signal_tokens: list[str] = []
+        if classify.get("needs_snapshot"):
+            if observer:
+                observer("retrieve", "scoring chunks")
+            chunks = _chunk_lines(summary_lines, plan.chunk_lines)
+            if plan.use_raw_snapshot:
+                raw_chunks = _raw_snapshot_chunks(snapshot_used)
+                if raw_chunks:
+                    chunks.extend(raw_chunks)
+            kb_lines = engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if engine._kb else []
+            if kb_lines:
+                kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
+                for idx, chunk in enumerate(kb_chunks):
+                    chunk["id"] = f"k{idx}"
+                chunks.extend(kb_chunks)
+            metric_keys: list[str] = []
+            must_chunk_ids: list[str] = []
+            metric_task = None
+            if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and summary_lines:
+                metric_ctx = {"question": normalized, "sub_questions": sub_questions, "keywords": keywords, "keyword_tokens": keyword_tokens, "summary_lines": summary_lines}
+                metric_task = asyncio.create_task(_select_metric_chunks(call_llm, metric_ctx, chunks, plan))
+            scored_task = asyncio.create_task(_score_chunks(call_llm, chunks, normalized, sub_questions, plan))
+            if metric_task:
+                metric_keys, must_chunk_ids = await metric_task
+            scored = await scored_task
+            selected = _select_chunks(chunks, scored, plan, keyword_tokens, must_chunk_ids)
+            fact_candidates = _collect_fact_candidates(selected, limit=plan.max_subquestions * 12)
+            key_facts = await _select_fact_lines(call_llm, normalized, fact_candidates, plan, max_lines=max(4, plan.max_subquestions * 2))
+            metric_facts = []
+            if classify.get("question_type") in {"metric", "diagnostic"} or force_metric:
+                global_metric_facts: list[str] = []
+                if global_facts:
+                    global_metric_facts = await _select_fact_lines(call_llm, normalized, global_facts, plan, max_lines=min(2, max(1, plan.max_subquestions)))
+                    if not global_metric_facts and (keyword_tokens or question_tokens):
+                        tokens = {tok for tok in (keyword_tokens or question_tokens) if tok and tok not in GENERIC_METRIC_TOKENS}
+                        global_metric_facts = _rank_metric_lines(global_facts, tokens, max_lines=2)
+                    if global_metric_facts:
+                        key_facts = _merge_fact_lines(global_metric_facts, key_facts)
+                all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
+                if plan.use_deep_retrieval:
+                    if observer:
+                        observer("retrieve", "extracting fact types")
+                    fact_types = await _extract_fact_types(call_llm, normalized, keyword_tokens, plan)
+                    if observer:
+                        observer("retrieve", "deriving signals")
+                    signals = await _derive_signals(call_llm, normalized, fact_types, plan)
+                    if isinstance(signals, list):
+                        signal_tokens = [str(item) for item in signals if item]
+                        all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
+                    if observer:
+                        observer("retrieve", "scanning chunks")
+                    candidate_lines: list[str] = []
+                    if signals:
+                        for chunk in selected:
+                            chunk_lines = chunk["text"].splitlines()
+                            if not chunk_lines:
+                                continue
+                            hits = await _scan_chunk_for_signals(call_llm, normalized, signals, chunk_lines, plan)
+                            if hits:
+                                candidate_lines.extend(hits)
+                    candidate_lines = list(dict.fromkeys(candidate_lines))
+                    if candidate_lines:
+                        if observer:
+                            observer("retrieve", "pruning candidates")
+                        metric_facts = await _prune_metric_candidates(call_llm, normalized, candidate_lines, plan, plan.metric_retries)
+                        if metric_facts:
+                            key_facts = _merge_fact_lines(metric_facts, key_facts)
+                            if settings.debug_pipeline:
+                                _debug_pipeline_log(settings, "metric_facts_selected", {"facts": metric_facts})
+                if not metric_facts:
+                    if observer:
+                        observer("retrieve", "fallback metric selection")
+                    token_set = {tok for tok in all_tokens if tok and tok not in GENERIC_METRIC_TOKENS}
+                    fallback_candidates = _rank_metric_lines(summary_lines, token_set, max_lines=200)
+                    if fallback_candidates:
+                        metric_facts = await _select_fact_lines(call_llm, normalized, fallback_candidates, plan, max_lines=max(2, plan.max_subquestions))
+                    if not metric_facts and fallback_candidates:
+                        metric_facts = fallback_candidates[: max(2, plan.max_subquestions)]
+                if metric_keys:
+                    key_lines = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3)
+                    if key_lines:
+                        metric_facts = _merge_fact_lines(key_lines, metric_facts)
+                if metric_facts:
+                    metric_cover_tokens = [tok for tok in keyword_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
+                    if not metric_cover_tokens:
+                        metric_cover_tokens = [tok for tok in question_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
+                    metric_facts = _ensure_token_coverage(metric_facts, metric_cover_tokens or all_tokens, summary_lines, max_add=plan.max_subquestions)
+                    if metric_cover_tokens:
+                        ranked_metric_lines = _rank_metric_lines(summary_lines, set(metric_cover_tokens), max_lines=max(1, plan.max_subquestions))
+                        if ranked_metric_lines:
+                            metric_facts = _merge_fact_lines(ranked_metric_lines, metric_facts)
+                    if metric_facts and not _has_keyword_overlap(metric_facts, keyword_tokens):
+                        best_line = _best_keyword_line(summary_lines, keyword_tokens)
+                        if best_line:
+                            metric_facts = _merge_fact_lines([best_line], metric_facts)
+                    if metric_facts:
+                        key_facts = _merge_fact_lines(metric_facts, key_facts)
+                if global_metric_facts:
+                    metric_facts = _merge_fact_lines(global_metric_facts, metric_facts)
+            if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not metric_facts and key_facts:
+                metric_facts = key_facts
+            if key_facts:
+                key_facts = _ensure_token_coverage(key_facts, _merge_tokens(keyword_tokens, question_tokens), summary_lines, max_add=plan.max_subquestions)
+            facts_used = list(dict.fromkeys(key_facts)) if key_facts else list(dict.fromkeys(metric_facts))
+            snapshot_context = "ClusterSnapshot:\n" + "\n".join([chunk["text"] for chunk in selected])
+            combined_facts = _merge_fact_lines(global_facts, key_facts) if global_facts else key_facts
+            if combined_facts:
+                snapshot_context = "KeyFacts:\n" + "\n".join(combined_facts) + "\n\n" + snapshot_context
+
+        context = _join_context([kb_summary, _format_runbooks(runbooks), snapshot_context, history_ctx if classify.get("follow_up") else ""])
+
+        if plan.use_tool and classify.get("needs_tool"):
+            if observer:
+                observer("tool", "suggesting tools")
+            tool_prompt = prompts.TOOL_PROMPT + "\nQuestion: " + normalized
+            tool_raw = await call_llm(prompts.TOOL_SYSTEM, tool_prompt, context=context, model=plan.fast_model, tag="tool")
+            tool_hint = _parse_json_block(tool_raw, fallback={})
+
+        if observer:
+            observer("subanswers", "drafting subanswers")
+        async def _subanswer_for(subq: str) -> str:
+            sub_prompt = prompts.SUBANSWER_PROMPT + "\nQuestion: " + subq
+            if plan.subanswer_retries > 1:
+                candidates = await _gather_limited(
+                    [call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer") for _ in range(plan.subanswer_retries)],
+                    plan.parallelism,
+                )
+                best_idx = await _select_best_candidate(call_llm, subq, candidates, plan, "subanswer_select")
+                return candidates[best_idx]
+            return await call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer")
+
+        subanswers: list[str] = []
+        if plan.parallelism > 1 and len(sub_questions) > 1:
+            subanswers = await _gather_limited([_subanswer_for(subq) for subq in sub_questions], plan.parallelism)
+        else:
+            for subq in sub_questions:
+                subanswers.append(await _subanswer_for(subq))
+
+        if observer:
+            observer("synthesize", "synthesizing")
+        reply, scores, claims = await finalize_answer(
+            engine=engine,
+            call_llm=call_llm,
+            normalized=normalized,
+            subanswers=subanswers,
+            context=context,
+            classify=classify,
+            plan=plan,
+            summary=summary,
+            summary_lines=summary_lines,
+            metric_facts=metric_facts,
+            key_facts=key_facts,
+            facts_used=facts_used,
+            allowed_nodes=allowed_nodes,
+            allowed_namespaces=allowed_namespaces,
+            runbook_paths=runbook_paths,
+            lowered_question=lowered_question,
+            force_metric=force_metric,
+            keyword_tokens=keyword_tokens,
+            question_tokens=question_tokens,
+            snapshot_context=snapshot_context,
+            observer=observer,
+            mode=mode,
+            metric_keys=metric_keys if 'metric_keys' in locals() else None,
+        )
+
+
+    except LLMTimeBudgetExceeded:
+        time_budget_hit = True
+        if not reply:
+            budget = max(1, round(time_budget_sec)) if time_budget_sec > 0 else 0
+            budget_text = f"{budget}s" if budget else "its configured"
+            if mode in {"quick", "fast"}:
+                reply = f"Quick mode hit {budget_text} time budget before finishing. Try atlas-smart for a deeper answer."
+            elif mode == "smart":
+                reply = f"Smart mode hit {budget_text} time budget before finishing. Try atlas-genius or ask a narrower follow-up."
+            else:
+                reply = "I ran out of time before I could finish this answer."
+        scores = _default_scores()
+    except LLMLimitReached:
+        if not reply:
+            reply = "I started working on this but hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
+        scores = _default_scores()
+    finally:
+        elapsed = round(time.monotonic() - started, 2)
+        log.info(
+            "atlasbot_answer",
+            extra={
+                "extra": {
+                    "mode": mode,
+                    "seconds": elapsed,
+                    "llm_calls": call_count,
+                    "limit": call_cap,
+                    "limit_hit": limit_hit,
+                    "time_budget_sec": time_budget_sec,
+                    "time_budget_hit": time_budget_hit,
+                }
+            },
+        )
+
+    if limit_hit and "run limitless" not in reply.lower():
+        reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
+
+    if conversation_id and claims:
+        engine._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
+
+    return AnswerResult(
+        reply,
+        scores,
+        _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started),
+    )
--- a/atlasbot/engine/answerer/workflow_post.py
+++ b/atlasbot/engine/answerer/workflow_post.py
@ -0,0 +1,170 @@
+from __future__ import annotations
+
+import json
+import re
+from collections.abc import Callable
+from typing import Any
+
+from atlasbot.llm import prompts
+
+from ._base import *
+from .common import *
+from .post import *
+from .post_ext import *
+from .retrieval import *
+from .spine import *
+
+
+async def finalize_answer(*, engine: Any, call_llm: Callable[..., Any], normalized: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, summary: dict[str, Any], summary_lines: list[str], metric_facts: list[str], key_facts: list[str], facts_used: list[str], allowed_nodes: list[str], allowed_namespaces: list[str], runbook_paths: list[str], lowered_question: str, force_metric: bool, keyword_tokens: list[str], question_tokens: list[str], snapshot_context: str, observer: Callable[[str, str], None] | None, mode: str, metric_keys: list[str] | None = None) -> tuple[str, AnswerScores, list[ClaimItem]]:  # noqa: C901
+    """Synthesize and post-process the final answer."""
+
+    reply = await engine._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
+
+    unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
+    unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
+    runbook_fix = _needs_runbook_fix(reply, runbook_paths)
+    runbook_needed = _needs_runbook_reference(normalized, runbook_paths, reply)
+    needs_evidence = _needs_evidence_fix(reply, classify)
+    hardware_terms = ("rpi", "raspberry", "jetson", "amd64", "arm64", "hardware")
+    hardware_line = _line_starting_with(summary_lines, "hardware_nodes:")
+    if any(term in lowered_question for term in hardware_terms) and hardware_line:
+        needs_evidence = True
+    if metric_facts and (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
+        needs_evidence = True
+    if classify.get("question_type") in {"open_ended", "planning"} and metric_facts:
+        needs_evidence = True
+    resolved_runbook = None
+    if runbook_paths and (runbook_fix or runbook_needed):
+        resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
+        resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
+        resolver = _parse_json_block(resolver_raw, fallback={})
+        candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
+        if candidate and candidate in runbook_paths:
+            resolved_runbook = candidate
+
+    if (snapshot_context and needs_evidence) or unknown_nodes or unknown_namespaces or runbook_fix or runbook_needed:
+        if observer:
+            observer("evidence_fix", "repairing missing evidence")
+        extra_bits = []
+        if unknown_nodes:
+            extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
+        if unknown_namespaces:
+            extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
+        if runbook_paths:
+            extra_bits.append("AllowedRunbooks: " + ", ".join(runbook_paths))
+        if resolved_runbook:
+            extra_bits.append("ResolvedRunbook: " + resolved_runbook)
+        if metric_facts:
+            extra_bits.append("MustUseFacts: " + "; ".join(metric_facts[:4]))
+        if hardware_line:
+            extra_bits.append("HardwareNodes: " + hardware_line)
+        if allowed_nodes:
+            extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
+        if allowed_namespaces:
+            extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
+        fix_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + ("\n" + "\n".join(extra_bits) if extra_bits else "")
+        reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, fix_prompt, context=context, model=plan.model, tag="evidence_fix")
+        if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
+            enforce_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nMustIncludeFacts: " + "; ".join(metric_facts[:6]) + "\nInstruction: The answer must include all MustIncludeFacts items."
+            reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, enforce_prompt, context=context, model=plan.model, tag="evidence_fix_enforce")
+
+    if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
+        direct_candidates = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3) if 'metric_keys' in locals() and metric_keys else summary_lines
+        direct_line = _select_metric_line(direct_candidates, normalized, _merge_tokens(keyword_tokens, question_tokens))
+        if direct_line:
+            direct_prompt = f"Question: {normalized}\nFact: {direct_line}\nAnswer using the fact."
+            reply = await call_llm(prompts.ANSWER_SYSTEM, direct_prompt, context="", model=plan.fast_model, tag="metric_direct")
+            if (mode == "quick" and any(term in normalized.lower() for term in ("how many", "count", "total"))) or not _reply_matches_metric_facts(reply, [direct_line], _merge_tokens(keyword_tokens, question_tokens)):
+                reply = _format_direct_metric_line(direct_line)
+
+    if "raspberry" in lowered_question and "not" in lowered_question:
+        non_rpi = _non_rpi_nodes(summary)
+        if non_rpi:
+            reply = _format_hardware_groups(non_rpi, "Non-Raspberry Pi nodes")
+        if unknown_nodes or unknown_namespaces:
+            refreshed_nodes = _find_unknown_nodes(reply, allowed_nodes)
+            refreshed_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
+            if refreshed_nodes or refreshed_namespaces:
+                reply = _strip_unknown_entities(reply, refreshed_nodes, refreshed_namespaces)
+        if runbook_paths and resolved_runbook and _needs_runbook_reference(normalized, runbook_paths, reply):
+            if observer:
+                observer("runbook_enforce", "enforcing runbook path")
+            enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=resolved_runbook)
+            reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
+        if runbook_paths:
+            invalid = [token for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply) if token.lower() not in {p.lower() for p in runbook_paths}]
+            if invalid:
+                if observer:
+                    observer("runbook_enforce", "replacing invalid runbook path")
+                resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
+                resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
+                resolver = _parse_json_block(resolver_raw, fallback={})
+                candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
+                if not (candidate and candidate in runbook_paths):
+                    candidate = _best_runbook_match(invalid[0], runbook_paths)
+                if candidate and candidate in runbook_paths:
+                    enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=candidate)
+                    reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
+        reply = _strip_unknown_entities(reply, unknown_nodes, unknown_namespaces)
+
+    if facts_used and _needs_evidence_guard(reply, facts_used):
+        if observer:
+            observer("evidence_guard", "tightening unsupported claims")
+        use_guard = True
+        if mode in {"smart", "genius"}:
+            decision = await _contradiction_decision(ContradictionContext(call_llm, normalized, reply, facts_used, plan), attempts=3 if mode == "genius" else 1)
+            use_guard = decision.get("use_facts", True)
+        if use_guard:
+            guard_prompt = prompts.EVIDENCE_GUARD_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nFactsUsed:\n" + "\n".join(facts_used)
+            reply = await call_llm(prompts.EVIDENCE_GUARD_SYSTEM, guard_prompt, context=context, model=plan.model, tag="evidence_guard")
+
+    if _needs_focus_fix(normalized, reply, classify):
+        if observer:
+            observer("focus_fix", "tightening answer")
+        reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, prompts.FOCUS_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply, context=context, model=plan.model, tag="focus_fix")
+        if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
+            best_line = _best_keyword_line(summary_lines, keyword_tokens)
+            if best_line:
+                reply = f"Latest metrics: {best_line}."
+    if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
+        best_line = None
+        lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
+        for line in metric_facts:
+            if any(kw in line.lower() for kw in lowered_keywords):
+                best_line = line
+                break
+        best_line = best_line or metric_facts[0]
+        reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
+        fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
+        if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
+            reply = f"Latest metrics: {best_line}."
+
+    if _should_use_insight_guard(classify):
+        if observer:
+            observer("insight_guard", "checking for concrete signals")
+        reply = await _apply_insight_guard(InsightGuardInput(question=normalized, reply=reply, classify=classify, context=context, plan=plan, call_llm=call_llm, facts=metric_facts or key_facts))
+
+    if plan.use_critic:
+        if observer:
+            observer("critic", "reviewing")
+        critic_prompt = prompts.CRITIC_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
+        critic_raw = await call_llm(prompts.CRITIC_SYSTEM, critic_prompt, context=context, model=plan.model, tag="critic")
+        critic = _parse_json_block(critic_raw, fallback={})
+        if critic.get("issues"):
+            revise_prompt = prompts.REVISION_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nCritique: " + json.dumps(critic)
+            reply = await call_llm(prompts.REVISION_SYSTEM, revise_prompt, context=context, model=plan.model, tag="revise")
+
+    if plan.use_gap:
+        if observer:
+            observer("gap", "checking gaps")
+        gap_prompt = prompts.EVIDENCE_GAP_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
+        gap_raw = await call_llm(prompts.GAP_SYSTEM, gap_prompt, context=context, model=plan.fast_model, tag="gap")
+        gap = _parse_json_block(gap_raw, fallback={})
+        note = str(gap.get("note") or "").strip()
+        if note:
+            reply = f"{reply}\n\n{note}"
+
+    reply = await engine._dedup_reply(reply, plan, call_llm, tag="dedup")
+    scores = await engine._score_answer(normalized, reply, plan, call_llm)
+    claims = await engine._extract_claims(normalized, reply, summary, facts_used, call_llm)
+    return reply, scores, claims
--- a/atlasbot/engine/intent_router.py
+++ b/atlasbot/engine/intent_router.py
@ -1,35 +1,46 @@
 from __future__ import annotations

-from dataclasses import dataclass
 import re
+from dataclasses import dataclass


@dataclass(frozen=True)
 class IntentMatch:
+    """Describe the best cluster intent match for a user question."""
+
    kind: str
    score: int


-_COUNT_TERMS = r"(how\\s+many|count|number\\s+of|total|totals|tally|amount\\s+of|quantity|sum\\s+of|overall|in\\s+total|all\\s+up)"
-_NODE_TERMS = r"(nodes?|workers?|worker\\s+nodes?|cluster\\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\\s]?plane|control\\s+plane)"
-_READY_TERMS = r"(ready|unready|not\\s+ready|down|offline|not\\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
+_COUNT_TERMS = r"(how\s+many|count|number\s+of|total|totals|tally|amount\s+of|quantity|sum\s+of|overall|in\s+total|all\s+up)"
+_NODE_TERMS = r"(nodes?|workers?|worker\s+nodes?|cluster\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\s]?plane|control\s+plane)"
+_READY_TERMS = r"(ready|unready|not\s+ready|down|offline|not\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
 _HOTTEST_TERMS = r"(hottest|hot|highest|max(?:imum)?|peak|top|most|worst|spikiest|heaviest|largest|biggest|noisiest|loudest)"
-_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\\s+avg|load\\s+average|util(?:ization)?|usage)"
+_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\s+avg|load\s+average|util(?:ization)?|usage)"
 _RAM_TERMS = r"(ram|memory|mem|heap|rss|resident|swap)"
 _NET_TERMS = r"(net|network|bandwidth|throughput|traffic|rx|tx|ingress|egress|bits|bytes|packets|pps|bps)"
-_IO_TERMS = r"(\\bio\\b|i/o|disk\\s+io|disk\\s+activity|read/?write|storage\\s+io|iops|latency)"
-_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\\bspace\\b|full|usage)"
-_PG_TERMS = r"(postgres|postgresql|pg\\b|database|db|sql|psql)"
-_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\\s+connections?|open\\s+connections?)"
-_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\\s+load)"
-_NAMESPACE_TERMS = r"(namespace|namespaces|ns\\b|tenant|workload\\s+namespace)"
+_IO_TERMS = r"(\bio\b|i/o|disk\s+io|disk\s+activity|read/?write|storage\s+io|iops|latency)"
+_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\bspace\b|full|usage)"
+_PG_TERMS = r"(postgres|postgresql|pg\b|database|db|sql|psql)"
+_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\s+connections?|open\s+connections?)"
+_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\s+load)"
+_NAMESPACE_TERMS = r"(namespace|namespaces|ns\b|tenant|workload\s+namespace)"
 _PODS_TERMS = r"(pods?|workloads?|tasks?|containers?|deployments?|jobs?|cronjobs?|daemonsets?|statefulsets?)"
-_NON_RPI_TERMS = r"(non[-\\s]?raspberry|not\\s+raspberry|non[-\\s]?rpi|not\\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\\b(?!.*rpi))"
-_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\\s+capacity|near\\s+limit)"
-_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\\s+types?)"
+_NON_RPI_TERMS = r"(non[-\s]?raspberry|not\s+raspberry|non[-\s]?rpi|not\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\b(?!.*rpi))"
+_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\s+capacity|near\s+limit)"
+_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\s+types?)"


 def route_intent(question: str) -> IntentMatch | None:
+    """Classify a question into a deterministic cluster intent.
+
+    Input:
+    - `question`: user text to inspect.
+
+    Output:
+    - the highest-confidence `IntentMatch`, or `None` when no intent fits.
+    """
+
    text = (question or "").lower()
    if not text:
        return None
@ -44,13 +55,13 @@ def route_intent(question: str) -> IntentMatch | None:
        return any(_has(pat) for pat in patterns)

    intents = [
-        (lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
        (
            lambda: _all(_READY_TERMS) and (_any(_NODE_TERMS) or "cluster" in text or "workers" in text),
            IntentMatch("nodes_ready", 85),
        ),
+        (lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
        (lambda: _all(_NON_RPI_TERMS) and (_any(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_non_rpi", 80)),
-        (lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("hardware_mix", 75)),
+        (lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text or "mix" in text), IntentMatch("hardware_mix", 75)),
        (lambda: _all(_HOTTEST_TERMS, _CPU_TERMS), IntentMatch("hottest_cpu", 80)),
        (lambda: _all(_HOTTEST_TERMS, _RAM_TERMS), IntentMatch("hottest_ram", 80)),
        (lambda: _all(_HOTTEST_TERMS, _NET_TERMS), IntentMatch("hottest_net", 80)),
--- a/atlasbot/knowledge/loader.py
+++ b/atlasbot/knowledge/loader.py
@ -7,6 +7,8 @@ log = logging.getLogger(__name__)


 class KnowledgeBase:
+    """Load Atlas knowledge-base files and expose summary snippets."""
+
    def __init__(self, base_dir: str) -> None:
        self._base = Path(base_dir) if base_dir else None
        self._atlas: dict[str, Any] = {}
@ -14,6 +16,8 @@ class KnowledgeBase:
        self._loaded = False

    def load(self) -> None:
+        """Load catalog files once so subsequent reads stay cheap."""
+
        if self._loaded or not self._base:
            return
        self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
@ -30,6 +34,8 @@ class KnowledgeBase:
            return {}

    def summary(self) -> str:
+        """Return a short human-readable KB summary for prompt context."""
+
        self.load()
        if not self._atlas:
            return ""
@ -42,12 +48,14 @@ class KnowledgeBase:
        if services:
            parts.append(f"Services indexed: {len(services)}.")
        if isinstance(self._atlas, dict):
-            keys = [key for key in self._atlas.keys() if key not in {"sources"}]
+            keys = [key for key in self._atlas if key not in {"sources"}]
            if keys:
                parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
        return " ".join(parts)

    def runbook_titles(self, *, limit: int = 5) -> str:
+        """Render the top runbook titles for prompt context."""
+
        self.load()
        if not self._runbooks:
            return ""
@ -64,6 +72,8 @@ class KnowledgeBase:
        return "Relevant runbooks:\n" + "\n".join(titles[:limit])

    def runbook_paths(self, *, limit: int = 10) -> list[str]:
+        """Return the runbook paths used for exact-path enforcement."""
+
        self.load()
        if not self._runbooks:
            return []
@ -77,6 +87,8 @@ class KnowledgeBase:
        return paths[:limit]

    def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
+        """Collect KB excerpts into prompt-sized chunks."""
+
        self.load()
        if not self._base:
            return []
--- a/atlasbot/llm/client.py
+++ b/atlasbot/llm/client.py
@ -17,6 +17,8 @@ class LLMError(RuntimeError):


 class LLMClient:
+    """Wrap the Ollama chat endpoint with retries and fallback-model support."""
+
    def __init__(self, settings: Settings) -> None:
        self._settings = settings
        self._timeout = settings.ollama_timeout_sec
@ -37,6 +39,8 @@ class LLMClient:
        model: str | None = None,
        timeout_sec: float | None = None,
    ) -> str:
+        """Send a chat request and return the model content text."""
+
        payload = {
            "model": model or self._settings.ollama_model,
            "messages": messages,
@ -77,6 +81,8 @@ class LLMClient:


 def build_messages(system: str, prompt: str, *, context: str | None = None) -> list[dict[str, str]]:
+    """Assemble the minimal chat message list used by the answer pipeline."""
+
    messages: list[dict[str, str]] = [{"role": "system", "content": system}]
    if context:
        messages.append({"role": "user", "content": "Context (grounded facts):\n" + context})
@ -85,6 +91,8 @@ def build_messages(system: str, prompt: str, *, context: str | None = None) -> l


 def parse_json(text: str, *, fallback: dict[str, Any] | None = None) -> dict[str, Any]:
+    """Parse a JSON blob from model output and fall back to a safe default."""
+
    try:
        raw = text.strip()
        if raw.startswith("`"):
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -253,7 +253,7 @@ CONTRADICTION_PROMPT = (
    "Question: {question}\n"
    "Draft: {draft}\n"
    "FactsUsed:\n{facts}\n\n"
-    "Return JSON: {\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}"
+    "Return JSON: {{\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}}"
 )

 CANDIDATE_SELECT_SYSTEM = (
--- a/atlasbot/logging.py
+++ b/atlasbot/logging.py
@ -1,13 +1,17 @@
 import json
 import logging
 import sys
-from datetime import datetime, timezone
+from datetime import UTC, datetime


 class JsonFormatter(logging.Formatter):
+    """Emit structured log records for the atlasbot services."""
+
    def format(self, record: logging.LogRecord) -> str:
+        """Render a log record as JSON for downstream ingestion."""
+
        payload = {
-            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "timestamp": datetime.now(UTC).isoformat(),
            "level": record.levelname.lower(),
            "logger": record.name,
            "message": record.getMessage(),
@ -21,6 +25,8 @@ class JsonFormatter(logging.Formatter):


 def configure_logging(level: str = "INFO") -> None:
+    """Install JSON logging on the process root logger."""
+
    root = logging.getLogger()
    root.setLevel(level.upper())
    handler = logging.StreamHandler(sys.stdout)
--- a/atlasbot/main.py
+++ b/atlasbot/main.py
@ -17,6 +17,8 @@ log = logging.getLogger(__name__)


 def _build_engine(settings) -> AnswerEngine:
+    """Construct the answer engine from the configured backends."""
+
    kb = KnowledgeBase(settings.kb_dir)
    snapshot = SnapshotProvider(settings)
    llm = LLMClient(settings)
@ -24,6 +26,8 @@ def _build_engine(settings) -> AnswerEngine:


 async def main() -> None:
+    """Start the HTTP API, Matrix bots, and queue worker."""
+
    settings = load_settings()
    configure_logging("INFO")

@ -45,14 +49,7 @@ async def main() -> None:
    queue = QueueManager(settings, handler)
    await queue.start()

-    async def answer_handler(  # noqa: PLR0913
-        question: str,
-        mode: str,
-        history=None,
-        conversation_id=None,
-        snapshot_pin: bool | None = None,
-        observer=None,
-    ) -> AnswerResult:
+    async def answer_handler(question: str, mode: str, history=None, conversation_id=None, snapshot_pin: bool | None = None, observer=None) -> AnswerResult:
        if settings.queue_enabled:
            payload = await queue.submit(
                {
@ -86,6 +83,8 @@ async def main() -> None:


 def result_scores(payload: dict[str, object]) -> AnswerScores:
+    """Coerce a queue payload into the public `AnswerScores` shape."""
+
    scores = payload.get("scores") if isinstance(payload, dict) else None
    if isinstance(scores, dict):
        try:
--- a/atlasbot/matrix/bot.py
+++ b/atlasbot/matrix/bot.py
@ -15,11 +15,15 @@ log = logging.getLogger(__name__)


 class MatrixClient:
+    """Wrap the Matrix client endpoints used by the bot runtime."""
+
    def __init__(self, settings: Settings, bot: MatrixBotConfig) -> None:
        self._settings = settings
        self._bot = bot

    async def login(self) -> str:
+        """Exchange bot credentials for a Matrix access token."""
+
        payload = {
            "type": "m.login.password",
            "identifier": {"type": "m.id.user", "user": self._bot.username},
@ -33,6 +37,8 @@ class MatrixClient:
        return data.get("access_token", "")

    async def resolve_room(self, token: str) -> str:
+        """Resolve the configured room alias into a room id."""
+
        alias = quote(self._settings.room_alias, safe="")
        url = f"{self._settings.matrix_base}/_matrix/client/v3/directory/room/{alias}"
        headers = {"Authorization": f"Bearer {token}"}
@ -50,12 +56,16 @@ class MatrixClient:
        return data.get("room_id", "")

    async def join_room(self, token: str, room_id: str) -> None:
+        """Join the target room if the bot is not already present."""
+
        url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/join"
        headers = {"Authorization": f"Bearer {token}"}
        async with httpx.AsyncClient(timeout=15.0) as client:
            await client.post(url, headers=headers)

    async def send_message(self, token: str, room_id: str, text: str) -> None:
+        """Send a plain text message to the Matrix room."""
+
        url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
        headers = {"Authorization": f"Bearer {token}"}
        payload = {"msgtype": "m.text", "body": text}
@ -63,6 +73,8 @@ class MatrixClient:
            await client.post(url, json=payload, headers=headers)

    async def sync(self, token: str, since: str | None) -> dict[str, Any]:
+        """Fetch the incremental Matrix sync payload."""
+
        base = f"{self._settings.matrix_base}/_matrix/client/v3/sync"
        params = {"timeout": 30000}
        if since:
@ -75,17 +87,9 @@ class MatrixClient:


 class MatrixBot:
-    def __init__(
-        self,
-        settings: Settings,
-        bot: MatrixBotConfig,
-        engine: AnswerEngine,
-        answer_handler: Callable[
-            [str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None],
-            Awaitable[AnswerResult],
-        ]
-        | None = None,
-    ) -> None:
+    """Drive Matrix conversation handling and heartbeat replies."""
+
+    def __init__(self, settings: Settings, bot: MatrixBotConfig, engine: AnswerEngine, answer_handler: Callable[[str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None], Awaitable[AnswerResult]] | None = None) -> None:
        self._settings = settings
        self._bot = bot
        self._engine = engine
@ -94,6 +98,8 @@ class MatrixBot:
        self._history: dict[str, list[dict[str, str]]] = {}

    async def run(self) -> None:
+        """Continuously bootstrap, sync, and answer Matrix events."""
+
        while True:
            try:
                token = await self._client.login()
--- a/atlasbot/queue/nats.py
+++ b/atlasbot/queue/nats.py
@ -1,7 +1,8 @@
 import asyncio
 import json
 import logging
-from typing import Any, Awaitable, Callable
+from collections.abc import Awaitable, Callable
+from typing import Any

 from nats.aio.client import Client as NATS
 from nats.js.errors import NotFoundError
@ -12,6 +13,8 @@ log = logging.getLogger(__name__)


 class QueueManager:
+    """Manage optional NATS-backed work queue processing."""
+
    def __init__(self, settings: Settings, handler: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> None:
        self._settings = settings
        self._handler = handler
@ -20,6 +23,8 @@ class QueueManager:
        self._worker_task: asyncio.Task | None = None

    async def start(self) -> None:
+        """Connect to NATS and start the worker loop when queueing is enabled."""
+
        if not self._settings.queue_enabled:
            return
        self._nc = NATS()
@ -29,12 +34,16 @@ class QueueManager:
        self._worker_task = asyncio.create_task(self._worker_loop())

    async def stop(self) -> None:
+        """Drain the NATS connection and cancel background work."""
+
        if self._worker_task:
            self._worker_task.cancel()
        if self._nc:
            await self._nc.drain()

    async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
+        """Submit work to NATS or fall back to direct handling."""
+
        if not self._settings.queue_enabled:
            return await self._handler(payload)
        if not self._nc or not self._js:
--- a/atlasbot/snapshot/builder.py
+++ b/atlasbot/snapshot/builder.py
--- a/atlasbot/snapshot/builder/init.py
+++ b/atlasbot/snapshot/builder/init.py
@ -0,0 +1,8 @@
+"""Snapshot summary builder and text render helpers."""
+
+from .core_a import *
+from .core_b import *
+from .format_a import *
+from .format_b import *
+from .format_c import *
+from .summary_text import *
--- a/atlasbot/snapshot/builder/core_a.py
+++ b/atlasbot/snapshot/builder/core_a.py
@ -0,0 +1,492 @@
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any
+
+import httpx
+
+from atlasbot.config import Settings
+
+log = logging.getLogger(__name__)
+
+PVC_USAGE_CRITICAL = 90
+
+_BYTES_KB = 1024
+_BYTES_MB = 1024 * 1024
+_BYTES_GB = 1024 * 1024 * 1024
+_VALUE_PAIR_LEN = 2
+class SnapshotProvider:
+    """Fetch and cache the Ariadne snapshot used by the answer engine."""
+
+    def __init__(self, settings: Settings) -> None:
+        self._settings = settings
+        self._cache: dict[str, Any] = {}
+        self._cache_ts = 0.0
+
+    def _cache_valid(self) -> bool:
+        return time.monotonic() - self._cache_ts < max(5, self._settings.snapshot_ttl_sec)
+
+    def get(self) -> dict[str, Any] | None:
+        """Return the cached snapshot or refresh it from Ariadne."""
+
+        if self._cache and self._cache_valid():
+            return self._cache
+        if not self._settings.ariadne_state_url:
+            return self._cache or None
+        headers = {}
+        if self._settings.ariadne_state_token:
+            headers["x-internal-token"] = self._settings.ariadne_state_token
+        try:
+            resp = httpx.get(self._settings.ariadne_state_url, headers=headers, timeout=10.0)
+            resp.raise_for_status()
+            payload = resp.json()
+            if isinstance(payload, dict):
+                self._cache = payload
+                self._cache_ts = time.monotonic()
+                return payload
+        except Exception as exc:
+            log.warning("snapshot fetch failed", extra={"extra": {"error": str(exc)}})
+        return self._cache or None
+
+
+def _node_usage_top(series: list[dict[str, Any]]) -> dict[str, Any] | None:
+    best = None
+    for entry in series or []:
+        if not isinstance(entry, dict):
+            continue
+        node = entry.get("node")
+        value = entry.get("value")
+        try:
+            numeric = float(value)
+        except (TypeError, ValueError):
+            continue
+        if best is None or numeric > best["value"]:
+            best = {"node": node, "value": numeric}
+    return best
+
+
+def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
+    """Condense a raw snapshot into the summary shape used for prompts."""
+
+    if not snapshot:
+        return {}
+    from .core_b import (
+        _build_flux,
+        _build_hottest,
+        _build_namespace_capacity,
+        _build_namespace_capacity_summary,
+        _build_node_load_summary,
+        _build_pvc,
+        _build_workloads,
+    )
+    from .format_c import _build_cluster_watchlist
+
+    nodes_detail = _nodes_detail(snapshot)
+    metrics = _metrics(snapshot)
+    summary: dict[str, Any] = {}
+
+    if isinstance(snapshot.get("nodes_summary"), dict):
+        summary["nodes_summary"] = snapshot.get("nodes_summary")
+    if metrics:
+        summary["metrics"] = metrics
+    if isinstance(snapshot.get("jobs"), dict):
+        summary["jobs"] = snapshot.get("jobs")
+    summary.update(_build_nodes(snapshot))
+    summary.update(_build_pressure(snapshot))
+    summary.update(_build_hardware(nodes_detail))
+    summary.update(_build_hardware_by_node(nodes_detail))
+    summary.update(_build_hardware_usage(metrics, summary.get("hardware_by_node")))
+    summary.update(_build_node_facts(nodes_detail))
+    summary.update(_build_node_ages(nodes_detail))
+    summary.update(_build_node_taints(nodes_detail))
+    summary.update(_build_capacity(metrics))
+    summary.update(_build_pods(metrics))
+    summary.update(_build_namespace_pods(snapshot))
+    summary.update(_build_namespace_nodes(snapshot))
+    summary.update(_build_node_pods(snapshot))
+    summary.update(_build_node_pods_top(metrics))
+    summary.update(_build_pod_issues(snapshot))
+    summary.update(_build_workload_health(snapshot))
+    summary.update(_build_events(snapshot))
+    summary.update(_build_event_summary(snapshot))
+    summary.update(_build_postgres(metrics))
+    summary.update(_build_hottest(metrics))
+    summary.update(_build_pvc(metrics))
+    summary.update(_build_namespace_capacity(metrics))
+    summary.update(_build_namespace_capacity_summary(metrics))
+    summary.update(_build_longhorn(snapshot))
+    summary.update(_build_root_disk_headroom(metrics))
+    summary.update(_build_node_load(metrics))
+    summary.update(_build_node_load_summary(metrics))
+    summary.update(_build_cluster_watchlist(summary))
+    summary.update(_build_workloads(snapshot))
+    summary.update(_build_flux(snapshot))
+    _merge_cluster_summary(snapshot, summary)
+    _augment_lexicon(summary)
+    return summary
+
+
+def _merge_cluster_summary(snapshot: dict[str, Any], summary: dict[str, Any]) -> None:
+    cluster_summary = snapshot.get("summary") if isinstance(snapshot.get("summary"), dict) else {}
+    if not cluster_summary:
+        return
+    _merge_cluster_fields(
+        summary,
+        cluster_summary,
+        {
+            "signals": list,
+            "profiles": dict,
+            "inventory": dict,
+            "topology": dict,
+            "lexicon": dict,
+            "cross_stats": dict,
+            "baseline_deltas": dict,
+            "pod_issue_summary": dict,
+            "trend_requests": dict,
+            "pod_waiting_trends": dict,
+            "pod_terminated_trends": dict,
+        },
+    )
+
+
+def _merge_cluster_fields(summary: dict[str, Any], cluster_summary: dict[str, Any], field_types: dict[str, type]) -> None:
+    for key, expected in field_types.items():
+        value = cluster_summary.get(key)
+        if isinstance(value, expected):
+            summary[key] = value
+
+
+def _augment_lexicon(summary: dict[str, Any]) -> None:
+    lexicon = summary.get("lexicon")
+    if not isinstance(lexicon, dict):
+        lexicon = {"terms": [], "aliases": {}}
+    terms = list(lexicon.get("terms") or [])
+    aliases = dict(lexicon.get("aliases") or {})
+    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
+    hardware_map = {
+        "rpi5": "Raspberry Pi 5 nodes",
+        "rpi4": "Raspberry Pi 4 nodes",
+        "rpi": "Raspberry Pi nodes",
+        "jetson": "NVIDIA Jetson nodes",
+        "amd64": "AMD64 nodes",
+    }
+    existing_terms = {entry.get("term") for entry in terms if isinstance(entry, dict)}
+    for key, meaning in hardware_map.items():
+        if key not in hardware:
+            continue
+        if key not in existing_terms:
+            terms.append({"term": key, "meaning": meaning})
+        if key not in aliases:
+            aliases[key] = meaning
+    if "raspberry pi 5" not in aliases and "rpi5" in hardware:
+        aliases["raspberry pi 5"] = "rpi5"
+    if "raspberry pi 4" not in aliases and "rpi4" in hardware:
+        aliases["raspberry pi 4"] = "rpi4"
+    lexicon["terms"] = terms
+    lexicon["aliases"] = aliases
+    summary["lexicon"] = lexicon
+
+
+def _nodes_detail(snapshot: dict[str, Any]) -> list[dict[str, Any]]:
+    items = snapshot.get("nodes_detail")
+    return items if isinstance(items, list) else []
+
+
+def _metrics(snapshot: dict[str, Any]) -> dict[str, Any]:
+    metrics = snapshot.get("metrics")
+    return metrics if isinstance(metrics, dict) else {}
+
+
+def _build_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
+    nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
+    if not nodes_summary:
+        return {}
+    return {
+        "nodes": {
+            "total": nodes_summary.get("total"),
+            "ready": nodes_summary.get("ready"),
+            "not_ready": nodes_summary.get("not_ready"),
+        }
+    }
+
+
+def _build_pressure(snapshot: dict[str, Any]) -> dict[str, Any]:
+    nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
+    pressure = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary.get("pressure_nodes"), dict) else {}
+    if not pressure:
+        return {}
+    return {"pressure_nodes": pressure}
+
+
+def _build_hardware(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
+    hardware: dict[str, list[str]] = {}
+    for node in nodes_detail or []:
+        if not isinstance(node, dict):
+            continue
+        name = node.get("name")
+        hardware_class = node.get("hardware") or "unknown"
+        if name:
+            hardware.setdefault(hardware_class, []).append(name)
+    if not hardware:
+        return {}
+    return {"hardware": {key: sorted(value) for key, value in hardware.items()}}
+
+
+def _build_hardware_by_node(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
+    mapping: dict[str, str] = {}
+    for node in nodes_detail or []:
+        if not isinstance(node, dict):
+            continue
+        name = node.get("name")
+        if isinstance(name, str) and name:
+            hardware = node.get("hardware") or "unknown"
+            mapping[name] = str(hardware)
+    return {"hardware_by_node": mapping} if mapping else {}
+
+
+def _build_hardware_usage(metrics: dict[str, Any], hardware_by_node: dict[str, Any] | None) -> dict[str, Any]:  # noqa: C901
+    if not isinstance(hardware_by_node, dict) or not hardware_by_node:
+        return {}
+    node_load = metrics.get("node_load") if isinstance(metrics.get("node_load"), list) else []
+    if not node_load:
+        return {}
+    buckets: dict[str, dict[str, list[float]]] = {}
+    for entry in node_load:
+        if not isinstance(entry, dict):
+            continue
+        node = entry.get("node")
+        if not isinstance(node, str) or not node:
+            continue
+        hardware = hardware_by_node.get(node, "unknown")
+        bucket = buckets.setdefault(str(hardware), {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []})
+        for key in ("load_index", "cpu", "ram", "net", "io"):
+            value = entry.get(key)
+            if isinstance(value, (int, float)):
+                bucket[key].append(float(value))
+    output: list[dict[str, Any]] = []
+    for hardware, metrics_bucket in buckets.items():
+        row: dict[str, Any] = {"hardware": hardware}
+        for key, values in metrics_bucket.items():
+            if values:
+                row[key] = sum(values) / len(values)
+        output.append(row)
+    output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or ""))
+    return {"hardware_usage_avg": output}
+
+
+def _build_node_ages(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
+    ages: list[dict[str, Any]] = []
+    for node in nodes_detail or []:
+        if not isinstance(node, dict):
+            continue
+        name = node.get("name")
+        age = node.get("age_hours")
+        if name and isinstance(age, (int, float)):
+            ages.append({"name": name, "age_hours": age})
+    ages.sort(key=lambda item: -(item.get("age_hours") or 0))
+    return {"node_ages": ages[:5]} if ages else {}
+
+
+def _count_values(nodes_detail: list[dict[str, Any]], key: str) -> dict[str, int]:
+    counts: dict[str, int] = {}
+    for node in nodes_detail or []:
+        if not isinstance(node, dict):
+            continue
+        value = node.get(key)
+        if isinstance(value, str) and value:
+            counts[value] = counts.get(value, 0) + 1
+    return counts
+
+
+def _build_node_facts(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
+    if not nodes_detail:
+        return {}
+    role_counts: dict[str, int] = {}
+    for node in nodes_detail:
+        if not isinstance(node, dict):
+            continue
+        if node.get("is_worker"):
+            role_counts["worker"] = role_counts.get("worker", 0) + 1
+        roles = node.get("roles")
+        if isinstance(roles, list):
+            for role in roles:
+                if isinstance(role, str) and role:
+                    role_counts[role] = role_counts.get(role, 0) + 1
+    return {
+        "node_arch_counts": _count_values(nodes_detail, "arch"),
+        "node_os_counts": _count_values(nodes_detail, "os"),
+        "node_kubelet_versions": _count_values(nodes_detail, "kubelet"),
+        "node_kernel_versions": _count_values(nodes_detail, "kernel"),
+        "node_runtime_versions": _count_values(nodes_detail, "container_runtime"),
+        "node_role_counts": role_counts,
+    }
+
+
+def _build_node_taints(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
+    taints: dict[str, list[str]] = {}
+    for node in nodes_detail or []:
+        if not isinstance(node, dict):
+            continue
+        name = node.get("name")
+        if not name:
+            continue
+        entries = node.get("taints") if isinstance(node.get("taints"), list) else []
+        for entry in entries:
+            if not isinstance(entry, dict):
+                continue
+            key = entry.get("key")
+            effect = entry.get("effect")
+            if isinstance(key, str) and isinstance(effect, str):
+                label = f"{key}:{effect}"
+                taints.setdefault(label, []).append(name)
+    if not taints:
+        return {}
+    return {"node_taints": {key: sorted(names) for key, names in taints.items()}}
+
+
+def _build_root_disk_headroom(metrics: dict[str, Any]) -> dict[str, Any]:
+    node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
+    disk = node_usage.get("disk") if isinstance(node_usage.get("disk"), list) else []
+    if not disk:
+        return {}
+    entries = []
+    for entry in disk:
+        if not isinstance(entry, dict):
+            continue
+        node = entry.get("node")
+        try:
+            used_pct = float(entry.get("value"))
+        except (TypeError, ValueError):
+            continue
+        headroom = max(0.0, 100.0 - used_pct)
+        if node:
+            entries.append({"node": node, "headroom_pct": headroom, "used_pct": used_pct})
+    entries.sort(key=lambda item: (item.get("headroom_pct") or 0.0, item.get("node") or ""))
+    return {"root_disk_low_headroom": entries[:5]} if entries else {}
+
+
+def _build_longhorn(snapshot: dict[str, Any]) -> dict[str, Any]:
+    longhorn = snapshot.get("longhorn")
+    return {"longhorn": longhorn} if isinstance(longhorn, dict) and longhorn else {}
+
+
+def _build_node_load(metrics: dict[str, Any]) -> dict[str, Any]:
+    node_load = metrics.get("node_load")
+    if not isinstance(node_load, list) or not node_load:
+        return {}
+    return {"node_load": node_load}
+
+
+def _build_pods(metrics: dict[str, Any]) -> dict[str, Any]:
+    pods = {
+        "running": metrics.get("pods_running"),
+        "pending": metrics.get("pods_pending"),
+        "failed": metrics.get("pods_failed"),
+        "succeeded": metrics.get("pods_succeeded"),
+    }
+    if not any(value is not None for value in pods.values()):
+        return {}
+    return {"pods": pods}
+
+
+def _build_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
+    if not metrics:
+        return {}
+    capacity = {
+        "cpu": metrics.get("capacity_cpu"),
+        "allocatable_cpu": metrics.get("allocatable_cpu"),
+        "mem_bytes": metrics.get("capacity_mem_bytes"),
+        "allocatable_mem_bytes": metrics.get("allocatable_mem_bytes"),
+        "pods": metrics.get("capacity_pods"),
+        "allocatable_pods": metrics.get("allocatable_pods"),
+    }
+    if not any(value is not None for value in capacity.values()):
+        return {}
+    return {"capacity": capacity}
+
+
+def _build_namespace_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
+    namespaces = snapshot.get("namespace_pods")
+    if not isinstance(namespaces, list) or not namespaces:
+        return {}
+    return {"namespace_pods": namespaces}
+
+
+def _build_namespace_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
+    namespace_nodes = snapshot.get("namespace_nodes")
+    if not isinstance(namespace_nodes, list) or not namespace_nodes:
+        return {}
+    return {"namespace_nodes": namespace_nodes}
+
+
+def _build_node_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
+    node_pods = snapshot.get("node_pods")
+    if not isinstance(node_pods, list) or not node_pods:
+        return {}
+    return {"node_pods": node_pods}
+
+
+def _build_node_pods_top(metrics: dict[str, Any]) -> dict[str, Any]:
+    top = metrics.get("node_pods_top")
+    if not isinstance(top, list) or not top:
+        return {}
+    return {"node_pods_top": top}
+
+
+def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]:
+    pod_issues = snapshot.get("pod_issues")
+    if not isinstance(pod_issues, dict) or not pod_issues:
+        return {}
+    return {"pod_issues": pod_issues}
+
+
+def _build_workload_health(snapshot: dict[str, Any]) -> dict[str, Any]:
+    health = snapshot.get("workloads_health")
+    if not isinstance(health, dict) or not health:
+        return {}
+    deployments = health.get("deployments")
+    statefulsets = health.get("statefulsets")
+    daemonsets = health.get("daemonsets")
+    if not isinstance(deployments, dict) or not isinstance(statefulsets, dict) or not isinstance(daemonsets, dict):
+        return {}
+    return {
+        "workloads_health": {
+            "deployments": deployments,
+            "statefulsets": statefulsets,
+            "daemonsets": daemonsets,
+        }
+    }
+
+
+def _build_events(snapshot: dict[str, Any]) -> dict[str, Any]:
+    events = snapshot.get("events")
+    if not isinstance(events, dict) or not events:
+        return {}
+    return {"events": events}
+
+
+def _build_event_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
+    events = snapshot.get("events")
+    if not isinstance(events, dict) or not events:
+        return {}
+    summary = {}
+    if isinstance(events.get("warnings_top_reason"), dict):
+        summary["warnings_top_reason"] = events.get("warnings_top_reason")
+    if events.get("warnings_latest"):
+        summary["warnings_latest"] = events.get("warnings_latest")
+    return {"event_summary": summary} if summary else {}
+
+
+def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
+    postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {}
+    if not postgres:
+        return {}
+    return {
+        "postgres": {
+            "used": postgres.get("used"),
+            "max": postgres.get("max"),
+            "hottest_db": postgres.get("hottest_db"),
+            "by_db": postgres.get("by_db"),
+        }
+    }
--- a/atlasbot/snapshot/builder/core_b.py
+++ b/atlasbot/snapshot/builder/core_b.py
@ -0,0 +1,57 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .core_a import _node_usage_top
+
+def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]:
+    node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
+    hottest: dict[str, Any] = {}
+    for key in ("cpu", "ram", "net", "io", "disk"):
+        top = _node_usage_top(node_usage.get(key, []))
+        if top:
+            hottest[key] = top
+    if not hottest:
+        return {}
+    return {"hottest": hottest}
+
+
+def _build_pvc(metrics: dict[str, Any]) -> dict[str, Any]:
+    pvc_usage = metrics.get("pvc_usage_top") if isinstance(metrics.get("pvc_usage_top"), list) else []
+    if not pvc_usage:
+        return {}
+    return {"pvc_usage_top": pvc_usage}
+
+
+def _build_namespace_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
+    capacity = metrics.get("namespace_capacity")
+    if not isinstance(capacity, list) or not capacity:
+        return {}
+    return {"namespace_capacity": capacity}
+
+
+def _build_namespace_capacity_summary(metrics: dict[str, Any]) -> dict[str, Any]:
+    summary = metrics.get("namespace_capacity_summary")
+    if not isinstance(summary, dict) or not summary:
+        return {}
+    return {"namespace_capacity_summary": summary}
+
+
+def _build_node_load_summary(metrics: dict[str, Any]) -> dict[str, Any]:
+    summary = metrics.get("node_load_summary")
+    if not isinstance(summary, dict) or not summary:
+        return {}
+    return {"node_load_summary": summary}
+
+
+def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]:
+    workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else []
+    return {"workloads": workloads}
+
+
+def _build_flux(snapshot: dict[str, Any]) -> dict[str, Any]:
+    flux = snapshot.get("flux") if isinstance(snapshot.get("flux"), dict) else {}
+    return {"flux": flux}
+
+
+__all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/format_a.py
+++ b/atlasbot/snapshot/builder/format_a.py
@ -0,0 +1,497 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .core_a import _BYTES_GB, _BYTES_KB, _BYTES_MB
+from .core_b import *
+
+
+def _format_float(value: Any) -> str:
+    try:
+        numeric = float(value)
+    except (TypeError, ValueError):
+        return str(value)
+    return f"{numeric:.2f}".rstrip("0").rstrip(".")
+
+
+def _format_rate_bytes(value: Any) -> str:
+    try:
+        numeric = float(value)
+    except (TypeError, ValueError):
+        return str(value)
+    if numeric >= _BYTES_MB:
+        return f"{numeric / _BYTES_MB:.2f} MB/s"
+    if numeric >= _BYTES_KB:
+        return f"{numeric / _BYTES_KB:.2f} KB/s"
+    return f"{numeric:.2f} B/s"
+
+
+def _format_bytes(value: Any) -> str:
+    try:
+        numeric = float(value)
+    except (TypeError, ValueError):
+        return str(value)
+    if numeric >= _BYTES_GB:
+        return f"{numeric / _BYTES_GB:.2f} GB"
+    if numeric >= _BYTES_MB:
+        return f"{numeric / _BYTES_MB:.2f} MB"
+    if numeric >= _BYTES_KB:
+        return f"{numeric / _BYTES_KB:.2f} KB"
+    return f"{numeric:.2f} B"
+
+
+def _format_kv_map(values: dict[str, Any]) -> str:
+    parts = []
+    for key, value in values.items():
+        parts.append(f"{key}={value}")
+    return ", ".join(parts)
+
+
+def _format_names(names: list[str]) -> str:
+    if not names:
+        return ""
+    return ", ".join(sorted(names))
+
+
+def _append_nodes(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
+    if not nodes:
+        return
+    workers = {}
+    if isinstance(summary.get("nodes_summary"), dict):
+        workers = summary["nodes_summary"].get("workers") or {}
+    workers_total = workers.get("total")
+    workers_ready = workers.get("ready")
+    workers_str = ""
+    if workers_total is not None and workers_ready is not None:
+        workers_str = f", workers_ready={workers_ready}/{workers_total}"
+    total = nodes.get("total")
+    ready = nodes.get("ready")
+    not_ready = nodes.get("not_ready")
+    if not_ready is None:
+        not_ready = 0
+    lines.append(f"nodes: total={total}, ready={ready}, not_ready={not_ready}{workers_str}")
+    if total is not None:
+        lines.append(f"nodes_total: {total}")
+    if ready is not None:
+        lines.append(f"nodes_ready: {ready}")
+    if not_ready is not None:
+        lines.append(f"nodes_not_ready_count: {not_ready}")
+    if not isinstance(summary.get("nodes_summary"), dict):
+        return
+    not_ready_names = summary["nodes_summary"].get("not_ready_names") or []
+    if not_ready_names:
+        lines.append("nodes_not_ready: " + _format_names(not_ready_names))
+    by_arch = summary["nodes_summary"].get("by_arch") or {}
+    if isinstance(by_arch, dict) and by_arch:
+        lines.append("archs: " + _format_kv_map(by_arch))
+    by_role = summary["nodes_summary"].get("by_role") or {}
+    if isinstance(by_role, dict) and by_role:
+        lines.append("roles: " + _format_kv_map(by_role))
+
+
+def _append_hardware(lines: list[str], summary: dict[str, Any]) -> None:
+    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
+    if not hardware:
+        return
+    parts = []
+    for key, names in hardware.items():
+        if not isinstance(names, list):
+            continue
+        label = f"{key}={len(names)}"
+        name_list = _format_names([str(name) for name in names if name])
+        if name_list:
+            label = f"{label} ({name_list})"
+        parts.append(label)
+    if parts:
+        lines.append("hardware: " + "; ".join(sorted(parts)))
+
+
+def _append_hardware_groups(lines: list[str], summary: dict[str, Any]) -> None:
+    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
+    if not hardware:
+        return
+    parts = []
+    for key, names in hardware.items():
+        if not isinstance(names, list):
+            continue
+        name_list = _format_names([str(name) for name in names if name])
+        if name_list:
+            parts.append(f"{key}={name_list}")
+    if parts:
+        lines.append("hardware_nodes: " + "; ".join(sorted(parts)))
+
+
+def _append_node_ages(lines: list[str], summary: dict[str, Any]) -> None:
+    ages = summary.get("node_ages") if isinstance(summary.get("node_ages"), list) else []
+    if not ages:
+        return
+    parts = []
+    for entry in ages[:3]:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name")
+        age = entry.get("age_hours")
+        if name and isinstance(age, (int, float)):
+            parts.append(f"{name}={_format_float(age)}h")
+    if parts:
+        lines.append("node_age_top: " + "; ".join(parts))
+
+
+def _append_node_taints(lines: list[str], summary: dict[str, Any]) -> None:
+    taints = summary.get("node_taints") if isinstance(summary.get("node_taints"), dict) else {}
+    if not taints:
+        return
+    parts = []
+    for key, names in taints.items():
+        if not isinstance(names, list):
+            continue
+        name_list = _format_names([str(name) for name in names if name])
+        parts.append(f"{key}={len(names)} ({name_list})" if name_list else f"{key}={len(names)}")
+    if parts:
+        lines.append("node_taints: " + "; ".join(sorted(parts)))
+
+
+def _append_node_facts(lines: list[str], summary: dict[str, Any]) -> None:
+    def top_counts(label: str, counts: dict[str, int], limit: int = 4) -> None:
+        if not counts:
+            return
+        top = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:limit]
+        rendered = "; ".join([f"{name}={count}" for name, count in top])
+        if rendered:
+            lines.append(f"{label}: {rendered}")
+
+    top_counts("node_arch", summary.get("node_arch_counts") or {})
+    top_counts("node_os", summary.get("node_os_counts") or {})
+    top_counts("node_kubelet_versions", summary.get("node_kubelet_versions") or {})
+    top_counts("node_kernel_versions", summary.get("node_kernel_versions") or {})
+    top_counts("node_runtime_versions", summary.get("node_runtime_versions") or {})
+    top_counts("node_roles", summary.get("node_role_counts") or {})
+
+
+def _append_pressure(lines: list[str], summary: dict[str, Any]) -> None:
+    pressure = summary.get("pressure_nodes")
+    if not isinstance(pressure, dict) or not pressure:
+        return
+    parts = []
+    for cond, nodes in sorted(pressure.items()):
+        if not nodes:
+            continue
+        name_list = _format_names([str(name) for name in nodes if name])
+        parts.append(f"{cond}={len(nodes)} ({name_list})" if name_list else f"{cond}={len(nodes)}")
+    if parts:
+        lines.append("node_pressure: " + "; ".join(parts))
+
+
+def _append_pods(lines: list[str], summary: dict[str, Any]) -> None:
+    pods = summary.get("pods") if isinstance(summary.get("pods"), dict) else {}
+    if not pods:
+        return
+    lines.append(
+        "pods: running={running}, pending={pending}, failed={failed}, succeeded={succeeded}".format(
+            running=pods.get("running"),
+            pending=pods.get("pending"),
+            failed=pods.get("failed"),
+            succeeded=pods.get("succeeded"),
+        )
+    )
+
+
+def _append_capacity(lines: list[str], summary: dict[str, Any]) -> None:
+    capacity = summary.get("capacity") if isinstance(summary.get("capacity"), dict) else {}
+    if not capacity:
+        return
+    parts = []
+    if capacity.get("cpu") is not None:
+        parts.append(f"cpu={_format_float(capacity.get('cpu'))}")
+    if capacity.get("allocatable_cpu") is not None:
+        parts.append(f"alloc_cpu={_format_float(capacity.get('allocatable_cpu'))}")
+    if capacity.get("mem_bytes") is not None:
+        parts.append(f"mem={_format_bytes(capacity.get('mem_bytes'))}")
+    if capacity.get("allocatable_mem_bytes") is not None:
+        parts.append(f"alloc_mem={_format_bytes(capacity.get('allocatable_mem_bytes'))}")
+    if capacity.get("pods") is not None:
+        parts.append(f"pods={_format_float(capacity.get('pods'))}")
+    if capacity.get("allocatable_pods") is not None:
+        parts.append(f"alloc_pods={_format_float(capacity.get('allocatable_pods'))}")
+    if parts:
+        lines.append("capacity: " + "; ".join(parts))
+
+
+def _append_namespace_pods(lines: list[str], summary: dict[str, Any]) -> None:
+    namespaces = summary.get("namespace_pods")
+    if not isinstance(namespaces, list) or not namespaces:
+        return
+    top = sorted(
+        (item for item in namespaces if isinstance(item, dict)),
+        key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
+    )[:8]
+    parts = []
+    for item in top:
+        name = item.get("namespace")
+        total = item.get("pods_total")
+        running = item.get("pods_running")
+        if not name:
+            continue
+        label = f"{name}={total}"
+        if running is not None:
+            label = f"{label} (running={running})"
+        parts.append(label)
+    if parts:
+        lines.append("namespaces_top: " + "; ".join(parts))
+
+
+def _append_namespace_nodes(lines: list[str], summary: dict[str, Any]) -> None:
+    namespace_nodes = summary.get("namespace_nodes")
+    if not isinstance(namespace_nodes, list) or not namespace_nodes:
+        return
+    top = sorted(
+        (item for item in namespace_nodes if isinstance(item, dict)),
+        key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
+    )[:8]
+    parts = []
+    for item in top:
+        namespace = item.get("namespace")
+        pods_total = item.get("pods_total")
+        primary = item.get("primary_node")
+        if namespace:
+            label = f"{namespace}={pods_total}"
+            if primary:
+                label = f"{label} (primary={primary})"
+            parts.append(label)
+    if parts:
+        lines.append("namespace_nodes_top: " + "; ".join(parts))
+
+
+def _append_node_pods(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    node_pods = summary.get("node_pods")
+    if not isinstance(node_pods, list) or not node_pods:
+        return
+    sortable: list[dict[str, Any]] = []
+    for item in node_pods:
+        if not isinstance(item, dict):
+            continue
+        try:
+            pods_value = int(item.get("pods_total") or 0)
+        except (TypeError, ValueError):
+            continue
+        sortable.append({**item, "pods_total": pods_value})
+    top = sorted(sortable, key=lambda item: (-int(item.get("pods_total") or 0), item.get("node") or ""))[:8]
+    max_entry = None
+    for entry in node_pods:
+        if not isinstance(entry, dict):
+            continue
+        pods_total = entry.get("pods_total")
+        try:
+            pods_value = int(pods_total)
+        except (TypeError, ValueError):
+            continue
+        if max_entry is None or pods_value > max_entry["pods_total"]:
+            max_entry = {
+                "node": entry.get("node"),
+                "pods_total": pods_value,
+                "namespaces_top": entry.get("namespaces_top") or [],
+            }
+    parts = []
+    for item in top:
+        node = item.get("node")
+        pods_total = item.get("pods_total")
+        namespaces = item.get("namespaces_top") or []
+        ns_label = ""
+        if namespaces:
+            ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
+        if node:
+            label = f"{node}={pods_total}"
+            if ns_label:
+                label = f"{label} ({ns_label})"
+            parts.append(label)
+    if parts:
+        lines.append("node_pods_top: " + "; ".join(parts))
+    if max_entry and isinstance(max_entry.get("node"), str):
+        ns_label = ""
+        namespaces = max_entry.get("namespaces_top") or []
+        if namespaces:
+            ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
+        label = f"{max_entry.get('node')}={max_entry.get('pods_total')}"
+        if ns_label:
+            label = f"{label} ({ns_label})"
+        lines.append("node_pods_max: " + label)
+    for item in top:
+        node = item.get("node")
+        namespaces = item.get("namespaces_top") or []
+        if not node or not namespaces:
+            continue
+        ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
+        lines.append(f"node_namespaces_top: {node} ({ns_label})")
+
+
+def _append_pod_issues(lines: list[str], summary: dict[str, Any]) -> None:
+    pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
+    if not pod_issues:
+        return
+    counts_line = _format_pod_issue_counts(pod_issues)
+    if counts_line:
+        lines.append(counts_line)
+    top_line = _format_pod_issue_top(pod_issues)
+    if top_line:
+        lines.append(top_line)
+    pending_line = _format_pod_pending_oldest(pod_issues)
+    if pending_line:
+        lines.append(pending_line)
+    pending_over_line = _format_pod_pending_over_15m(pod_issues)
+    if pending_over_line:
+        lines.append(pending_over_line)
+    reasons_line = _format_pod_waiting_reasons(pod_issues)
+    if reasons_line:
+        lines.append(reasons_line)
+
+
+def _format_pod_issue_counts(pod_issues: dict[str, Any]) -> str:
+    counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
+    if not counts:
+        return ""
+    parts = []
+    for key in ("Failed", "Pending", "Unknown"):
+        if key in counts:
+            parts.append(f"{key}={counts.get(key)}")
+    return "pod_issues: " + "; ".join(parts) if parts else ""
+
+
+def _format_pod_issue_top(pod_issues: dict[str, Any]) -> str:
+    items = pod_issues.get("items") if isinstance(pod_issues.get("items"), list) else []
+    if not items:
+        return ""
+    top = []
+    for item in items[:5]:
+        if not isinstance(item, dict):
+            continue
+        namespace = item.get("namespace")
+        pod = item.get("pod")
+        if not namespace or not pod:
+            continue
+        phase = item.get("phase") or ""
+        restarts = item.get("restarts") or 0
+        top.append(f"{namespace}/{pod}({phase},r={restarts})")
+    return "pod_issues_top: " + "; ".join(top) if top else ""
+
+
+def _format_pod_pending_oldest(pod_issues: dict[str, Any]) -> str:
+    pending = pod_issues.get("pending_oldest") if isinstance(pod_issues.get("pending_oldest"), list) else []
+    if not pending:
+        return ""
+    parts = []
+    for item in pending[:5]:
+        if not isinstance(item, dict):
+            continue
+        namespace = item.get("namespace")
+        pod = item.get("pod")
+        age = item.get("age_hours")
+        reason = item.get("reason") or ""
+        if namespace and pod and age is not None:
+            label = f"{namespace}/{pod}={_format_float(age)}h"
+            if reason:
+                label = f"{label} ({reason})"
+            parts.append(label)
+    return "pods_pending_oldest: " + "; ".join(parts) if parts else ""
+
+
+def _format_pod_waiting_reasons(pod_issues: dict[str, Any]) -> str:
+    reasons = pod_issues.get("waiting_reasons") if isinstance(pod_issues.get("waiting_reasons"), dict) else {}
+    if not reasons:
+        return ""
+    pairs = sorted(reasons.items(), key=lambda item: (-item[1], item[0]))[:5]
+    return "pod_waiting_reasons: " + "; ".join([f"{key}={val}" for key, val in pairs])
+
+
+def _format_pod_pending_over_15m(pod_issues: dict[str, Any]) -> str:
+    count = pod_issues.get("pending_over_15m")
+    if count is None:
+        return ""
+    try:
+        count_val = int(count)
+    except (TypeError, ValueError):
+        return ""
+    return f"pods_pending_over_15m: {count_val}"
+
+
+def _append_workload_health(lines: list[str], summary: dict[str, Any]) -> None:
+    health = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
+    if not health:
+        return
+    deployments = health.get("deployments") if isinstance(health.get("deployments"), dict) else {}
+    statefulsets = health.get("statefulsets") if isinstance(health.get("statefulsets"), dict) else {}
+    daemonsets = health.get("daemonsets") if isinstance(health.get("daemonsets"), dict) else {}
+    total_not_ready = 0
+    for entry in (deployments, statefulsets, daemonsets):
+        total_not_ready += int(entry.get("not_ready") or 0)
+    lines.append(
+        "workloads_not_ready: "
+        f"deployments={deployments.get('not_ready', 0)}, "
+        f"statefulsets={statefulsets.get('not_ready', 0)}, "
+        f"daemonsets={daemonsets.get('not_ready', 0)} "
+        f"(total={total_not_ready})"
+    )
+
+
+def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    stats = metrics.get("node_usage_stats") if isinstance(metrics.get("node_usage_stats"), dict) else {}
+    if not stats:
+        return
+    parts = []
+    for key in ("cpu", "ram", "net", "io", "disk"):
+        entry = stats.get(key) if isinstance(stats.get(key), dict) else {}
+        avg = entry.get("avg")
+        if avg is None:
+            continue
+        value = _format_rate_bytes(avg) if key in {"net", "io"} else _format_float(avg)
+        parts.append(f"{key}={value}")
+    if parts:
+        lines.append("node_usage_avg: " + "; ".join(parts))
+
+
+def _append_events(lines: list[str], summary: dict[str, Any]) -> None:
+    events = summary.get("events") if isinstance(summary.get("events"), dict) else {}
+    if not events:
+        return
+    total = events.get("warnings_total")
+    by_reason = events.get("warnings_by_reason") if isinstance(events.get("warnings_by_reason"), dict) else {}
+    if total is None:
+        return
+    if by_reason:
+        top = sorted(by_reason.items(), key=lambda item: (-item[1], item[0]))[:3]
+        reasons = "; ".join([f"{reason}={count}" for reason, count in top])
+        lines.append(f"warnings: total={total}; top={reasons}")
+    else:
+        lines.append(f"warnings: total={total}")
+def _append_pvc_usage(lines: list[str], summary: dict[str, Any]) -> None:
+    pvc_usage = summary.get("pvc_usage_top")
+    if not isinstance(pvc_usage, list) or not pvc_usage:
+        return
+    parts = []
+    for entry in pvc_usage:
+        if not isinstance(entry, dict):
+            continue
+        metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+        namespace = metric.get("namespace")
+        pvc = metric.get("persistentvolumeclaim")
+        value = entry.get("value")
+        if namespace and pvc:
+            parts.append(f"{namespace}/{pvc}={_format_float(value)}%")
+    if parts:
+        lines.append("pvc_usage_top: " + "; ".join(parts))
+def _append_root_disk_headroom(lines: list[str], summary: dict[str, Any]) -> None:
+    headroom = summary.get("root_disk_low_headroom")
+    if not isinstance(headroom, list) or not headroom:
+        return
+    parts = []
+    for entry in headroom:
+        if not isinstance(entry, dict):
+            continue
+        node = entry.get("node")
+        headroom_pct = entry.get("headroom_pct")
+        if node and headroom_pct is not None:
+            parts.append(f"{node}={_format_float(headroom_pct)}%")
+    if parts:
+        lines.append("root_disk_low_headroom: " + "; ".join(parts))
+__all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/format_b.py
+++ b/atlasbot/snapshot/builder/format_b.py
@ -0,0 +1,435 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .core_a import _VALUE_PAIR_LEN
+from .format_a import *
+
+
+def _append_namespace_metric_series(
+    lines: list[str],
+    label: str,
+    entries: list[Any],
+    formatter: Any,
+) -> None:
+    parts = []
+    for entry in entries:
+        if not isinstance(entry, dict):
+            continue
+        metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+        namespace = metric.get("namespace")
+        value = entry.get("value")
+        if namespace:
+            parts.append(f"{namespace}={formatter(value)}")
+    if parts:
+        lines.append(f"{label}: " + "; ".join(parts))
+
+
+def _append_longhorn(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    longhorn = summary.get("longhorn") if isinstance(summary.get("longhorn"), dict) else {}
+    if not longhorn:
+        return
+    total = longhorn.get("total")
+    attached = longhorn.get("attached_count")
+    detached = longhorn.get("detached_count")
+    degraded = longhorn.get("degraded_count")
+    by_state = longhorn.get("by_state") if isinstance(longhorn.get("by_state"), dict) else {}
+    by_robust = longhorn.get("by_robustness") if isinstance(longhorn.get("by_robustness"), dict) else {}
+    if total is not None:
+            if attached is None and detached is None and degraded is None:
+                unhealthy = longhorn.get("unhealthy_count")
+                lines.append(f"longhorn: total={total}, unhealthy={unhealthy if unhealthy is not None else 0}")
+            else:
+                lines.append(
+                    f"longhorn: total={total}, attached={attached if attached is not None else 0}, "
+                    f"detached={detached if detached is not None else 0}, "
+                    f"degraded={degraded if degraded is not None else 0}"
+                )
+    if by_state:
+        lines.append("longhorn_state: " + _format_kv_map(by_state))
+    if by_robust:
+        lines.append("longhorn_robustness: " + _format_kv_map(by_robust))
+    unhealthy_items = longhorn.get("unhealthy")
+    if isinstance(unhealthy_items, list) and unhealthy_items:
+        parts = []
+        for entry in unhealthy_items[:5]:
+            if not isinstance(entry, dict):
+                continue
+            name = entry.get("name")
+            state = entry.get("state")
+            robustness = entry.get("robustness")
+            if name:
+                label = name
+                if state or robustness:
+                    label = f"{label}({state},{robustness})"
+                parts.append(label)
+        if parts:
+            lines.append("longhorn_unhealthy_top: " + "; ".join(parts))
+
+
+def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else []
+    mem_top = metrics.get("namespace_mem_top") if isinstance(metrics.get("namespace_mem_top"), list) else []
+    _append_namespace_metric_series(lines, "namespace_cpu_top", cpu_top, _format_float)
+    _append_namespace_metric_series(lines, "namespace_mem_top", mem_top, _format_bytes)
+
+
+def _append_namespace_requests(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    cpu_req = metrics.get("namespace_cpu_requests_top") if isinstance(metrics.get("namespace_cpu_requests_top"), list) else []
+    mem_req = metrics.get("namespace_mem_requests_top") if isinstance(metrics.get("namespace_mem_requests_top"), list) else []
+    _append_namespace_metric_series(lines, "namespace_cpu_requests_top", cpu_req, _format_float)
+    _append_namespace_metric_series(lines, "namespace_mem_requests_top", mem_req, _format_bytes)
+
+
+def _append_namespace_io_net(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    net_top = metrics.get("namespace_net_top") if isinstance(metrics.get("namespace_net_top"), list) else []
+    io_top = metrics.get("namespace_io_top") if isinstance(metrics.get("namespace_io_top"), list) else []
+    _append_namespace_metric_series(lines, "namespace_net_top", net_top, _format_rate_bytes)
+    _append_namespace_metric_series(lines, "namespace_io_top", io_top, _format_rate_bytes)
+
+
+def _append_pod_usage(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    cpu_top = metrics.get("pod_cpu_top") if isinstance(metrics.get("pod_cpu_top"), list) else []
+    cpu_top_node = (
+        metrics.get("pod_cpu_top_node")
+        if isinstance(metrics.get("pod_cpu_top_node"), list)
+        else []
+    )
+    mem_top = metrics.get("pod_mem_top") if isinstance(metrics.get("pod_mem_top"), list) else []
+    mem_top_node = (
+        metrics.get("pod_mem_top_node")
+        if isinstance(metrics.get("pod_mem_top_node"), list)
+        else []
+    )
+    if cpu_top:
+        parts = []
+        for entry in cpu_top:
+            if not isinstance(entry, dict):
+                continue
+            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+            namespace = metric.get("namespace")
+            pod = metric.get("pod")
+            value = entry.get("value")
+            if namespace and pod and value is not None:
+                parts.append(f"{namespace}/{pod}={_format_float(value)}")
+        if parts:
+            lines.append("pod_cpu_top: " + "; ".join(parts))
+    if cpu_top_node:
+        parts = []
+        for entry in cpu_top_node:
+            if not isinstance(entry, dict):
+                continue
+            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+            namespace = metric.get("namespace")
+            pod = metric.get("pod")
+            node = metric.get("node")
+            value = entry.get("value")
+            if namespace and pod and node and value is not None:
+                parts.append(f"{node}:{namespace}/{pod}={_format_float(value)}")
+        if parts:
+            lines.append("pod_cpu_top_node: " + "; ".join(parts))
+    if mem_top:
+        parts = []
+        for entry in mem_top:
+            if not isinstance(entry, dict):
+                continue
+            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+            namespace = metric.get("namespace")
+            pod = metric.get("pod")
+            value = entry.get("value")
+            if namespace and pod and value is not None:
+                parts.append(f"{namespace}/{pod}={_format_bytes(value)}")
+        if parts:
+            lines.append("pod_mem_top: " + "; ".join(parts))
+    if mem_top_node:
+        parts = []
+        for entry in mem_top_node:
+            if not isinstance(entry, dict):
+                continue
+            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+            namespace = metric.get("namespace")
+            pod = metric.get("pod")
+            node = metric.get("node")
+            value = entry.get("value")
+            if namespace and pod and node and value is not None:
+                parts.append(f"{node}:{namespace}/{pod}={_format_bytes(value)}")
+        if parts:
+            lines.append("pod_mem_top_node: " + "; ".join(parts))
+
+
+def _append_restarts(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    top_restarts = metrics.get("top_restarts_1h") or []
+    if not isinstance(top_restarts, list) or not top_restarts:
+        top_restarts = []
+    parts = []
+    for entry in top_restarts:
+        metric = entry.get("metric") if isinstance(entry, dict) else {}
+        value = entry.get("value") if isinstance(entry, dict) else []
+        if not isinstance(metric, dict) or not isinstance(value, list) or len(value) < _VALUE_PAIR_LEN:
+            continue
+        namespace = metric.get("namespace")
+        pod = metric.get("pod")
+        count = _format_float(value[1])
+        if namespace and pod:
+            parts.append(f"{namespace}/{pod}={count}")
+    if parts:
+        lines.append("restarts_1h_top: " + "; ".join(parts))
+    else:
+        lines.append("restarts_1h_top: none")
+    ns_top = metrics.get("restart_namespace_top") or []
+    if isinstance(ns_top, list) and ns_top:
+        ns_parts = []
+        for entry in ns_top:
+            metric = entry.get("metric") if isinstance(entry, dict) else {}
+            value = entry.get("value")
+            namespace = metric.get("namespace") if isinstance(metric, dict) else None
+            if namespace and value is not None:
+                ns_parts.append(f"{namespace}={_format_float(value)}")
+        if ns_parts:
+            lines.append("restarts_1h_namespace_top: " + "; ".join(ns_parts))
+    else:
+        lines.append("restarts_1h_namespace_top: none")
+
+
+def _append_job_failures(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    failures = metrics.get("job_failures_24h") if isinstance(metrics.get("job_failures_24h"), list) else []
+    if not failures:
+        return
+    parts = []
+    for entry in failures:
+        if not isinstance(entry, dict):
+            continue
+        metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+        namespace = metric.get("namespace")
+        job_name = metric.get("job_name") or metric.get("job")
+        value = entry.get("value")
+        if namespace and job_name and value is not None:
+            parts.append(f"{namespace}/{job_name}={_format_float(value)}")
+    if parts:
+        lines.append("job_failures_24h: " + "; ".join(parts))
+
+
+def _append_jobs(lines: list[str], summary: dict[str, Any]) -> None:
+    jobs = summary.get("jobs") if isinstance(summary.get("jobs"), dict) else {}
+    if not jobs:
+        return
+    totals_line = _format_jobs_totals(jobs)
+    if totals_line:
+        lines.append(totals_line)
+    failing_line = _format_jobs_failing(jobs)
+    if failing_line:
+        lines.append(failing_line)
+    active_line = _format_jobs_active_oldest(jobs)
+    if active_line:
+        lines.append(active_line)
+
+
+def _format_jobs_totals(jobs: dict[str, Any]) -> str:
+    totals = jobs.get("totals") if isinstance(jobs.get("totals"), dict) else {}
+    if not totals:
+        return ""
+    return "jobs: total={total}, active={active}, failed={failed}, succeeded={succeeded}".format(
+        total=totals.get("total"),
+        active=totals.get("active"),
+        failed=totals.get("failed"),
+        succeeded=totals.get("succeeded"),
+    )
+
+
+def _format_jobs_failing(jobs: dict[str, Any]) -> str:
+    failing = jobs.get("failing") if isinstance(jobs.get("failing"), list) else []
+    if not failing:
+        return ""
+    parts = []
+    for item in failing[:5]:
+        if not isinstance(item, dict):
+            continue
+        namespace = item.get("namespace")
+        name = item.get("job")
+        failed = item.get("failed")
+        age = item.get("age_hours")
+        if namespace and name and failed is not None:
+            label = f"{namespace}/{name}={failed}"
+            if age is not None:
+                label = f"{label} ({_format_float(age)}h)"
+            parts.append(label)
+    return "jobs_failing_top: " + "; ".join(parts) if parts else ""
+
+
+def _format_jobs_active_oldest(jobs: dict[str, Any]) -> str:
+    active_oldest = jobs.get("active_oldest") if isinstance(jobs.get("active_oldest"), list) else []
+    if not active_oldest:
+        return ""
+    parts = []
+    for item in active_oldest[:5]:
+        if not isinstance(item, dict):
+            continue
+        namespace = item.get("namespace")
+        name = item.get("job")
+        age = item.get("age_hours")
+        if namespace and name and age is not None:
+            parts.append(f"{namespace}/{name}={_format_float(age)}h")
+    return "jobs_active_oldest: " + "; ".join(parts) if parts else ""
+
+
+def _append_postgres(lines: list[str], summary: dict[str, Any]) -> None:
+    postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
+    if not postgres:
+        return
+    hottest = postgres.get("hottest_db") or ""
+    lines.append(
+        "postgres: used={used}, max={max}, hottest_db={hottest}".format(
+            used=postgres.get("used"),
+            max=postgres.get("max"),
+            hottest=hottest,
+        )
+    )
+    used = postgres.get("used")
+    max_conn = postgres.get("max")
+    if used is not None or max_conn is not None:
+        lines.append(f"postgres_connections_total: used={_format_float(used)}, max={_format_float(max_conn)}")
+    by_db = postgres.get("by_db")
+    if isinstance(by_db, list) and by_db:
+        parts = []
+        for entry in by_db:
+            if not isinstance(entry, dict):
+                continue
+            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
+            value = entry.get("value")
+            if isinstance(value, list) and len(value) >= _VALUE_PAIR_LEN:
+                value = value[1]
+            name = metric.get("datname") if isinstance(metric, dict) else None
+            if name and value is not None:
+                parts.append(f"{name}={_format_float(value)}")
+        if parts:
+            lines.append("postgres_connections_by_db: " + "; ".join(parts))
+
+
+def _append_hottest(lines: list[str], summary: dict[str, Any]) -> None:
+    hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
+    if not hottest:
+        return
+    hardware_map = summary.get("hardware_by_node")
+    if not isinstance(hardware_map, dict):
+        hardware_map = {}
+    parts = []
+    for key, entry in hottest.items():
+        if not isinstance(entry, dict):
+            continue
+        node = entry.get("node")
+        hardware = hardware_map.get(node) if node else None
+        if key in {"net", "io"}:
+            value = _format_rate_bytes(entry.get("value"))
+        else:
+            value = _format_float(entry.get("value"))
+            if value and key in {"cpu", "ram", "disk"}:
+                value = f"{value}%"
+        if node:
+            label = node
+            if hardware:
+                label = f"{label} [{hardware}]"
+            parts.append(f"{key}={label} ({value})")
+    if parts:
+        lines.append("hottest: " + "; ".join(parts))
+
+
+def _append_workloads(lines: list[str], summary: dict[str, Any]) -> None:
+    workloads = summary.get("workloads")
+    if not isinstance(workloads, list) or not workloads:
+        return
+    lines.append(f"workloads: total={len(workloads)}")
+    top_workloads = sorted(
+        (item for item in workloads if isinstance(item, dict)),
+        key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or ""),
+    )[:5]
+    if not top_workloads:
+        return
+    parts = []
+    for item in top_workloads:
+        namespace = item.get("namespace")
+        name = item.get("workload")
+        pods_total = item.get("pods_total")
+        primary = item.get("primary_node")
+        if namespace and name:
+            label = f"{namespace}/{name}={pods_total}"
+            if primary:
+                label = f"{label} (primary={primary})"
+            parts.append(label)
+    if parts:
+        lines.append("workloads_top: " + "; ".join(parts))
+
+
+def _append_topology(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    topology = summary.get("topology") if isinstance(summary.get("topology"), dict) else {}
+    if not topology:
+        return
+    nodes = topology.get("nodes") if isinstance(topology.get("nodes"), list) else []
+    workloads = topology.get("workloads") if isinstance(topology.get("workloads"), list) else []
+    if nodes:
+        parts = []
+        for entry in nodes[:5]:
+            if not isinstance(entry, dict):
+                continue
+            node = entry.get("node")
+            top = entry.get("workloads_top") if isinstance(entry.get("workloads_top"), list) else []
+            if not node or not top:
+                continue
+            items = ", ".join([f"{name}({count})" for name, count in top if name and count is not None])
+            if items:
+                parts.append(f"{node}={items}")
+        if parts:
+            lines.append("node_workloads_top: " + "; ".join(parts))
+    if workloads:
+        parts = []
+        for entry in workloads[:5]:
+            if not isinstance(entry, dict):
+                continue
+            namespace = entry.get("namespace")
+            name = entry.get("workload")
+            nodes_top = entry.get("nodes_top") if isinstance(entry.get("nodes_top"), list) else []
+            if not namespace or not name:
+                continue
+            nodes_label = ", ".join([f"{node}:{count}" for node, count in nodes_top if node])
+            label = f"{namespace}/{name}"
+            if nodes_label:
+                label = f"{label} [{nodes_label}]"
+            parts.append(label)
+        if parts:
+            lines.append("workload_nodes_top: " + "; ".join(parts))
+
+
+def _append_flux(lines: list[str], summary: dict[str, Any]) -> None:
+    flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
+    if not flux:
+        return
+    not_ready = flux.get("not_ready")
+    if not_ready is not None:
+        lines.append(f"flux_not_ready: {not_ready}")
+    items = flux.get("items")
+    if isinstance(items, list) and items:
+        parts = []
+        for item in items[:10]:
+            if not isinstance(item, dict):
+                continue
+            name = item.get("name") or ""
+            namespace = item.get("namespace") or ""
+            reason = item.get("reason") or ""
+            suspended = item.get("suspended")
+            label = f"{namespace}/{name}".strip("/")
+            if reason:
+                label = f"{label} ({reason})"
+            if suspended:
+                label = f"{label} [suspended]"
+            if label:
+                parts.append(label)
+        if parts:
+            lines.append("flux_not_ready_items: " + "; ".join(parts))
+
+
+__all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/format_c.py
+++ b/atlasbot/snapshot/builder/format_c.py
@ -0,0 +1,448 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .core_a import PVC_USAGE_CRITICAL
+from .format_b import *
+def _append_signals(lines: list[str], summary: dict[str, Any]) -> None:
+    signals = summary.get("signals") if isinstance(summary.get("signals"), list) else []
+    if not signals:
+        return
+    lines.append("signals:")
+    for entry in signals[:8]:
+        if not isinstance(entry, dict):
+            continue
+        scope = entry.get("scope") or ""
+        target = entry.get("target") or ""
+        metric = entry.get("metric") or ""
+        current = entry.get("current")
+        delta = entry.get("delta_pct")
+        severity = entry.get("severity") or ""
+        detail = f"{scope}:{target} {metric}={current}"
+        if delta is not None:
+            detail += f" delta={delta}%"
+        if severity:
+            detail += f" severity={severity}"
+        lines.append(f"- {detail}")
+
+
+def _append_profiles(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    profiles = summary.get("profiles") if isinstance(summary.get("profiles"), dict) else {}
+    if not profiles:
+        return
+    nodes = profiles.get("nodes") if isinstance(profiles.get("nodes"), list) else []
+    namespaces = profiles.get("namespaces") if isinstance(profiles.get("namespaces"), list) else []
+    workloads = profiles.get("workloads") if isinstance(profiles.get("workloads"), list) else []
+    if nodes:
+        lines.append("node_profiles:")
+        for entry in nodes[:3]:
+            if not isinstance(entry, dict):
+                continue
+            lines.append(
+                f"- {entry.get('node')}: load={entry.get('load_index')} cpu={entry.get('cpu')} ram={entry.get('ram')} "
+                f"pods={entry.get('pods_total')} hw={entry.get('hardware')}"
+            )
+    if namespaces:
+        lines.append("namespace_profiles:")
+        for entry in namespaces[:3]:
+            if not isinstance(entry, dict):
+                continue
+            lines.append(
+                f"- {entry.get('namespace')}: pods={entry.get('pods_total')} cpu={entry.get('cpu_usage')} "
+                f"mem={entry.get('mem_usage')} primary={entry.get('primary_node')}"
+            )
+    if workloads:
+        lines.append("workload_profiles:")
+        for entry in workloads[:3]:
+            if not isinstance(entry, dict):
+                continue
+            lines.append(
+                f"- {entry.get('namespace')}/{entry.get('workload')}: pods={entry.get('pods_total')} "
+                f"running={entry.get('pods_running')} node={entry.get('primary_node')}"
+            )
+
+
+def _append_units_windows(lines: list[str], summary: dict[str, Any]) -> None:
+    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
+    units = metrics.get("units") if isinstance(metrics.get("units"), dict) else {}
+    windows = metrics.get("windows") if isinstance(metrics.get("windows"), dict) else {}
+    if units:
+        lines.append("units: " + _format_kv_map(units))
+    else:
+        lines.append("units: cpu_pct, ram_pct, net=bytes_per_sec, io=bytes_per_sec")
+    if windows:
+        lines.append("windows: " + _format_kv_map(windows))
+    else:
+        lines.append("windows: rates=5m, restarts=1h")
+
+
+def _append_node_load_summary(lines: list[str], summary: dict[str, Any]) -> None:
+    node_load = summary.get("node_load_summary")
+    if not isinstance(node_load, dict) or not node_load:
+        return
+    hardware_by_node = summary.get("hardware_by_node")
+    hardware_by_node = hardware_by_node if isinstance(hardware_by_node, dict) else {}
+    top = node_load.get("top")
+    if isinstance(top, list) and top:
+        parts = []
+        for entry in top[:5]:
+            if not isinstance(entry, dict):
+                continue
+            node = entry.get("node") or ""
+            load = entry.get("load_index")
+            cpu = entry.get("cpu")
+            ram = entry.get("ram")
+            io = entry.get("io")
+            net = entry.get("net")
+            pods_total = entry.get("pods_total")
+            label = f"{node} idx={_format_float(load)}"
+            if node and node in hardware_by_node:
+                label += f" hw={hardware_by_node.get(node)}"
+            if isinstance(pods_total, (int, float)):
+                label += f" pods={int(pods_total)}"
+            label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
+            label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
+            parts.append(label)
+        if parts:
+            lines.append("node_load_top: " + "; ".join(parts))
+    outliers = node_load.get("outliers")
+    if isinstance(outliers, list) and outliers:
+        names = [entry.get("node") for entry in outliers if isinstance(entry, dict)]
+        names = [name for name in names if isinstance(name, str) and name]
+        if names:
+            lines.append("node_load_outliers: " + _format_names(names))
+
+
+def _append_hardware_usage(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    usage = summary.get("hardware_usage_avg")
+    if not isinstance(usage, list) or not usage:
+        return
+    parts = []
+    tops: dict[str, tuple[str, float]] = {}
+    for entry in usage[:5]:
+        if not isinstance(entry, dict):
+            continue
+        hardware = entry.get("hardware")
+        load = entry.get("load_index")
+        cpu = entry.get("cpu")
+        ram = entry.get("ram")
+        io = entry.get("io")
+        net = entry.get("net")
+        if not hardware:
+            continue
+        label = f"{hardware} idx={_format_float(load)}"
+        label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
+        label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
+        parts.append(label)
+        for metric, value in (("cpu", cpu), ("ram", ram), ("io", io), ("net", net), ("load", load)):
+            if isinstance(value, (int, float)):
+                current = tops.get(metric)
+                if current is None or float(value) > current[1]:
+                    tops[metric] = (hardware, float(value))
+    if parts:
+        lines.append("hardware_usage_avg: " + "; ".join(parts))
+    if tops:
+        top_parts = []
+        for metric in ("cpu", "ram", "io", "net", "load"):
+            entry = tops.get(metric)
+            if not entry:
+                continue
+            hardware, value = entry
+            if metric in {"io", "net"}:
+                rendered = _format_rate_bytes(value)
+            else:
+                rendered = _format_float(value)
+            top_parts.append(f"{metric}={hardware} ({rendered})")
+        if top_parts:
+            lines.append("hardware_usage_top: " + "; ".join(top_parts))
+
+
+def _append_cluster_watchlist(lines: list[str], summary: dict[str, Any]) -> None:
+    watchlist = summary.get("cluster_watchlist")
+    if not isinstance(watchlist, list) or not watchlist:
+        return
+    lines.append("cluster_watchlist: " + "; ".join(watchlist))
+
+
+def _append_baseline_deltas(lines: list[str], summary: dict[str, Any]) -> None:
+    deltas = summary.get("baseline_deltas") if isinstance(summary.get("baseline_deltas"), dict) else {}
+    nodes = deltas.get("nodes") if isinstance(deltas.get("nodes"), dict) else {}
+    namespaces = deltas.get("namespaces") if isinstance(deltas.get("namespaces"), dict) else {}
+    for scope, block in (("nodes", nodes), ("namespaces", namespaces)):
+        if not isinstance(block, dict):
+            continue
+        for metric, entries in block.items():
+            if not isinstance(entries, list) or not entries:
+                continue
+            parts: list[str] = []
+            for entry in entries[:5]:
+                if not isinstance(entry, dict):
+                    continue
+                name = entry.get("node") if scope == "nodes" else entry.get("namespace")
+                delta = entry.get("delta")
+                severity = entry.get("severity")
+                if not isinstance(name, str) or not name or not isinstance(delta, (int, float)):
+                    continue
+                suffix = f" ({severity})" if isinstance(severity, str) and severity else ""
+                parts.append(f"{name}={_format_float(delta)}%{suffix}")
+            if parts:
+                lines.append(f"{scope}_baseline_delta_{metric}: " + "; ".join(parts))
+
+
+def _append_pod_issue_summary(lines: list[str], summary: dict[str, Any]) -> None:
+    issues = summary.get("pod_issue_summary") if isinstance(summary.get("pod_issue_summary"), dict) else {}
+    waiting = issues.get("waiting_reasons_top") if isinstance(issues.get("waiting_reasons_top"), list) else []
+    phases = issues.get("phase_reasons_top") if isinstance(issues.get("phase_reasons_top"), list) else []
+    namespace_issue = issues.get("namespace_issue_top") if isinstance(issues.get("namespace_issue_top"), dict) else {}
+    waiting_line = _reason_line(waiting, "pod_waiting_reasons_top")
+    if waiting_line:
+        lines.append(waiting_line)
+    phase_line = _reason_line(phases, "pod_phase_reasons_top")
+    if phase_line:
+        lines.append(phase_line)
+    if namespace_issue:
+        _append_namespace_issue_lines(lines, namespace_issue)
+
+
+def _reason_line(entries: list[dict[str, Any]], label: str) -> str:
+    parts = []
+    for entry in entries[:5]:
+        if not isinstance(entry, dict):
+            continue
+        reason = entry.get("reason")
+        count = entry.get("count")
+        if reason:
+            parts.append(f"{reason}={count}")
+    if parts:
+        return f"{label}: " + "; ".join(parts)
+    return ""
+
+
+def _append_namespace_issue_lines(lines: list[str], namespace_issue: dict[str, Any]) -> None:
+    for key, entries in namespace_issue.items():
+        if not isinstance(entries, list) or not entries:
+            continue
+        parts: list[str] = []
+        for entry in entries[:5]:
+            if not isinstance(entry, dict):
+                continue
+            ns = entry.get("namespace")
+            value = entry.get("value")
+            if ns:
+                parts.append(f"{ns}={value}")
+        if parts:
+            lines.append(f"namespace_issue_top_{key}: " + "; ".join(parts))
+
+
+def _build_cluster_watchlist(summary: dict[str, Any]) -> dict[str, Any]:
+    items: list[str] = []
+    nodes_summary = summary.get("nodes_summary") if isinstance(summary.get("nodes_summary"), dict) else {}
+    not_ready = int(nodes_summary.get("not_ready") or 0)
+    if not_ready > 0:
+        items.append(f"not_ready_nodes={not_ready}")
+    pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
+    pressure_nodes = pressure.get("names") if isinstance(pressure.get("names"), list) else []
+    if pressure_nodes:
+        items.append(f"pressure_nodes={len(pressure_nodes)}")
+    pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
+    pending_over = int(pod_issues.get("pending_over_15m") or 0)
+    if pending_over > 0:
+        items.append(f"pods_pending_over_15m={pending_over}")
+    workloads = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
+    deployments = workloads.get("deployments") if isinstance(workloads.get("deployments"), dict) else {}
+    statefulsets = workloads.get("statefulsets") if isinstance(workloads.get("statefulsets"), dict) else {}
+    daemonsets = workloads.get("daemonsets") if isinstance(workloads.get("daemonsets"), dict) else {}
+    total_not_ready = int(deployments.get("not_ready") or 0) + int(statefulsets.get("not_ready") or 0) + int(daemonsets.get("not_ready") or 0)
+    if total_not_ready > 0:
+        items.append(f"workloads_not_ready={total_not_ready}")
+    flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
+    flux_not_ready = int(flux.get("not_ready") or 0)
+    if flux_not_ready > 0:
+        items.append(f"flux_not_ready={flux_not_ready}")
+    pvc_usage = summary.get("pvc_usage_top") if isinstance(summary.get("pvc_usage_top"), list) else []
+    high_pvc = [
+        entry for entry in pvc_usage if isinstance(entry, dict) and (entry.get("value") or 0) >= PVC_USAGE_CRITICAL
+    ]
+    if high_pvc:
+        items.append(f"pvc_usage>={PVC_USAGE_CRITICAL}%")
+    return {"cluster_watchlist": items} if items else {}
+
+
+def _capacity_ratio_parts(entries: list[dict[str, Any]], ratio_key: str, usage_key: str, req_key: str) -> list[str]:
+    parts: list[str] = []
+    for entry in entries[:5]:
+        if not isinstance(entry, dict):
+            continue
+        ns = entry.get("namespace") or ""
+        ratio = entry.get(ratio_key)
+        usage = entry.get(usage_key)
+        req = entry.get(req_key)
+        if ns:
+            parts.append(
+                f"{ns}={_format_float(ratio)} (usage={_format_float(usage)} req={_format_float(req)})"
+            )
+    return parts
+
+
+def _capacity_headroom_parts(entries: list[dict[str, Any]]) -> list[str]:
+    parts: list[str] = []
+    for entry in entries[:5]:
+        if not isinstance(entry, dict):
+            continue
+        ns = entry.get("namespace") or ""
+        headroom = entry.get("headroom")
+        if ns:
+            parts.append(f"{ns}={_format_float(headroom)}")
+    return parts
+
+
+def _append_namespace_capacity_summary(  # noqa: C901
+    lines: list[str],
+    summary: dict[str, Any],
+) -> None:
+    cap = summary.get("namespace_capacity_summary")
+    if not isinstance(cap, dict) or not cap:
+        return
+    cpu_ratio = cap.get("cpu_ratio_top")
+    if isinstance(cpu_ratio, list):
+        parts = _capacity_ratio_parts(cpu_ratio, "cpu_usage_ratio", "cpu_usage", "cpu_requests")
+        if parts:
+            lines.append("namespace_cpu_ratio_top: " + "; ".join(parts))
+    mem_ratio = cap.get("mem_ratio_top")
+    if isinstance(mem_ratio, list):
+        parts = _capacity_ratio_parts(mem_ratio, "mem_usage_ratio", "mem_usage", "mem_requests")
+        if parts:
+            lines.append("namespace_mem_ratio_top: " + "; ".join(parts))
+    cpu_headroom = cap.get("cpu_headroom_low")
+    if isinstance(cpu_headroom, list):
+        parts = _capacity_headroom_parts(cpu_headroom)
+        if parts:
+            lines.append("namespace_cpu_headroom_low: " + "; ".join(parts))
+    mem_headroom = cap.get("mem_headroom_low")
+    if isinstance(mem_headroom, list):
+        parts = _capacity_headroom_parts(mem_headroom)
+        if parts:
+            lines.append("namespace_mem_headroom_low: " + "; ".join(parts))
+    cpu_over = cap.get("cpu_overcommitted")
+    mem_over = cap.get("mem_overcommitted")
+    if cpu_over is not None or mem_over is not None:
+        lines.append(f"namespace_overcommitted: cpu={cpu_over} mem={mem_over}")
+    cpu_over_names = cap.get("cpu_overcommitted_names")
+    if isinstance(cpu_over_names, list) and cpu_over_names:
+        names = [name for name in cpu_over_names if isinstance(name, str) and name]
+        if names:
+            lines.append("namespace_cpu_overcommitted_names: " + _format_names(names))
+    mem_over_names = cap.get("mem_overcommitted_names")
+    if isinstance(mem_over_names, list) and mem_over_names:
+        names = [name for name in mem_over_names if isinstance(name, str) and name]
+        if names:
+            lines.append("namespace_mem_overcommitted_names: " + _format_names(names))
+
+
+def _append_workloads_by_namespace(lines: list[str], summary: dict[str, Any]) -> None:
+    workloads = summary.get("workloads")
+    if not isinstance(workloads, list) or not workloads:
+        return
+    by_ns: dict[str, list[dict[str, Any]]] = {}
+    for item in workloads:
+        if not isinstance(item, dict):
+            continue
+        ns = item.get("namespace") or ""
+        name = item.get("workload") or ""
+        if not ns or not name:
+            continue
+        by_ns.setdefault(ns, []).append(item)
+    for ns, items in sorted(by_ns.items()):
+        items.sort(
+            key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or "")
+        )
+        parts = []
+        for entry in items[:2]:
+            name = entry.get("workload") or ""
+            pods = entry.get("pods_total")
+            primary = entry.get("primary_node")
+            label = f"{name}({pods})" if pods is not None else name
+            if primary:
+                label = f"{label}@{primary}"
+            if label:
+                parts.append(label)
+        if parts:
+            lines.append(f"workloads_top_{ns}: " + "; ".join(parts))
+
+
+def _append_lexicon(lines: list[str], summary: dict[str, Any]) -> None:
+    lexicon = summary.get("lexicon")
+    if not isinstance(lexicon, dict):
+        return
+    terms = lexicon.get("terms") if isinstance(lexicon.get("terms"), list) else []
+    aliases = lexicon.get("aliases") if isinstance(lexicon.get("aliases"), dict) else {}
+    for entry in terms[:8]:
+        if not isinstance(entry, dict):
+            continue
+        term = entry.get("term")
+        meaning = entry.get("meaning")
+        if term and meaning:
+            lines.append(f"lexicon_term: {term} => {meaning}")
+    for key, value in list(aliases.items())[:6]:
+        if key and value:
+            lines.append(f"lexicon_alias: {key} => {value}")
+
+
+def _append_cross_stats(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
+    cross_stats = summary.get("cross_stats")
+    if not isinstance(cross_stats, dict):
+        return
+    node_entries = cross_stats.get("node_metric_top") if isinstance(cross_stats.get("node_metric_top"), list) else []
+    for entry in node_entries[:10]:
+        if not isinstance(entry, dict):
+            continue
+        metric = entry.get("metric")
+        node = entry.get("node")
+        value = entry.get("value")
+        cpu = entry.get("cpu")
+        ram = entry.get("ram")
+        net = entry.get("net")
+        io = entry.get("io")
+        pods = entry.get("pods_total")
+        if metric and node:
+            parts = [
+                f"value={_format_float(value)}",
+                f"cpu={_format_float(cpu)}",
+                f"ram={_format_float(ram)}",
+                f"net={_format_float(net)}",
+                f"io={_format_float(io)}",
+            ]
+            if pods is not None:
+                parts.append(f"pods={pods}")
+            lines.append(f"cross_node_{metric}: {node} " + " ".join(parts))
+    ns_entries = cross_stats.get("namespace_metric_top") if isinstance(cross_stats.get("namespace_metric_top"), list) else []
+    for entry in ns_entries[:10]:
+        if not isinstance(entry, dict):
+            continue
+        metric = entry.get("metric")
+        namespace = entry.get("namespace")
+        value = entry.get("value")
+        pods = entry.get("pods_total")
+        cpu_ratio = entry.get("cpu_ratio")
+        mem_ratio = entry.get("mem_ratio")
+        if metric and namespace:
+            parts = [
+                f"value={_format_float(value)}",
+                f"cpu_ratio={_format_float(cpu_ratio)}",
+                f"mem_ratio={_format_float(mem_ratio)}",
+            ]
+            if pods is not None:
+                parts.append(f"pods={pods}")
+            lines.append(f"cross_namespace_{metric}: {namespace} " + " ".join(parts))
+    pvc_entries = cross_stats.get("pvc_top") if isinstance(cross_stats.get("pvc_top"), list) else []
+    for entry in pvc_entries[:5]:
+        if not isinstance(entry, dict):
+            continue
+        namespace = entry.get("namespace")
+        pvc = entry.get("pvc")
+        used = entry.get("used_percent")
+        if namespace and pvc:
+            lines.append(f"cross_pvc_usage: {namespace}/{pvc} used={_format_float(used)}")
+
+
+__all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/summary_text.py
+++ b/atlasbot/snapshot/builder/summary_text.py
@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .core_a import *
+from .core_b import *
+from .format_a import *
+from .format_b import *
+from .format_c import *
+
+
+def summary_text(snapshot: dict[str, Any] | None) -> str:
+    """Render the snapshot summary into deterministic prompt text."""
+
+    summary = build_summary(snapshot)
+    if not summary:
+        return ""
+    lines: list[str] = []
+    lines.append("atlas_cluster: Titan Lab Atlas Kubernetes cluster (internal).")
+    collected_at = snapshot.get("collected_at") if isinstance(snapshot, dict) else None
+    snapshot_version = snapshot.get("snapshot_version") if isinstance(snapshot, dict) else None
+    if collected_at or snapshot_version:
+        bits = []
+        if collected_at:
+            bits.append(f"collected_at={collected_at}")
+        if snapshot_version:
+            bits.append(f"version={snapshot_version}")
+        lines.append("snapshot: " + ", ".join(bits))
+    _append_nodes(lines, summary)
+    _append_hardware(lines, summary)
+    _append_hardware_groups(lines, summary)
+    _append_lexicon(lines, summary)
+    _append_pressure(lines, summary)
+    _append_node_facts(lines, summary)
+    _append_node_ages(lines, summary)
+    _append_node_taints(lines, summary)
+    _append_capacity(lines, summary)
+    _append_pods(lines, summary)
+    _append_namespace_pods(lines, summary)
+    _append_namespace_nodes(lines, summary)
+    _append_node_pods(lines, summary)
+    _append_pod_issues(lines, summary)
+    _append_pod_issue_summary(lines, summary)
+    _append_workload_health(lines, summary)
+    _append_events(lines, summary)
+    _append_node_usage_stats(lines, summary)
+    _append_namespace_usage(lines, summary)
+    _append_namespace_requests(lines, summary)
+    _append_namespace_io_net(lines, summary)
+    _append_pod_usage(lines, summary)
+    _append_restarts(lines, summary)
+    _append_job_failures(lines, summary)
+    _append_jobs(lines, summary)
+    _append_postgres(lines, summary)
+    _append_hottest(lines, summary)
+    _append_pvc_usage(lines, summary)
+    _append_root_disk_headroom(lines, summary)
+    _append_namespace_capacity_summary(lines, summary)
+    _append_baseline_deltas(lines, summary)
+    _append_longhorn(lines, summary)
+    _append_workloads(lines, summary)
+    _append_topology(lines, summary)
+    _append_workloads_by_namespace(lines, summary)
+    _append_node_load_summary(lines, summary)
+    _append_cluster_watchlist(lines, summary)
+    _append_hardware_usage(lines, summary)
+    _append_cross_stats(lines, summary)
+    _append_flux(lines, summary)
+    _append_signals(lines, summary)
+    _append_profiles(lines, summary)
+    _append_units_windows(lines, summary)
+    return "\n".join(lines)
--- a/atlasbot/state/store.py
+++ b/atlasbot/state/store.py
@ -6,6 +6,17 @@ from typing import Any


 class ClaimStore:
+    """Persist conversation claims for follow-up answers.
+
+    Why:
+    - keep short-lived conversation state durable across turns without
+      forcing the answer engine to own storage mechanics.
+
+    Input/Output:
+    - accepts a SQLite path and TTL, stores claim payloads, and returns
+      normalized payload dictionaries when queried.
+    """
+
    def __init__(self, path: str, ttl_sec: int) -> None:
        self._path = path or ":memory:"
        self._ttl = max(60, ttl_sec)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,31 @@
+[tool.pytest.ini_options]
+testpaths = ["tests", "testing"]
+pythonpath = ["."]
+
+[tool.ruff]
+line-length = 100
+target-version = "py312"
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "B", "C90", "I", "PLR", "RUF", "SIM", "UP", "ARG"]
+ignore = ["E501"]
+
+[tool.ruff.lint.per-file-ignores]
+"atlasbot/engine/answerer/*.py" = ["F403", "F405", "I001"]
+"atlasbot/engine/answerer/__init__.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
+"atlasbot/engine/answerer/common.py" = ["PLR0913"]
+"atlasbot/engine/answerer/engine.py" = ["PLR0913"]
+"atlasbot/engine/answerer/factsheet.py" = ["PLR0912"]
+"atlasbot/engine/answerer/workflow.py" = ["PLR0911", "PLR0912", "PLR0913", "PLR0915"]
+"atlasbot/engine/answerer/workflow_post.py" = ["PLR0912", "PLR0913", "PLR0915"]
+"atlasbot/main.py" = ["PLR0913"]
+"atlasbot/matrix/bot.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
+"atlasbot/snapshot/builder/__init__.py" = ["F403", "F405", "I001"]
+"atlasbot/snapshot/builder/*.py" = ["F403", "F405", "I001"]
+"atlasbot/snapshot/builder/format_a.py" = ["PLR0912"]
+"atlasbot/snapshot/builder/format_b.py" = ["PLR0912", "PLR0915"]
+"atlasbot/snapshot/builder/format_c.py" = ["PLR0912"]
+"atlasbot/snapshot/builder/summary_text.py" = ["PLR0915"]
+"testing/*.py" = ["PLR0911", "ARG002", "PLR2004"]
+"tests/*.py" = ["PLR0913", "PLR2004", "I001", "ARG001", "ARG002", "ARG005", "C901", "PLR0915", "UP037"]
+"scripts/*.py" = ["PLR0911", "PLR2004"]
--- a/scripts/check_coverage.py
+++ b/scripts/check_coverage.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""Enforce per-file coverage thresholds from SlipCover JSON output."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+
+def _normalize_report_path(path: str, cwd: Path) -> str:
+    """Return a stable repository-relative path for a coverage report entry."""
+
+    candidate = Path(path)
+    if candidate.is_absolute():
+        try:
+            return candidate.relative_to(cwd).as_posix()
+        except ValueError:
+            return candidate.as_posix()
+    return candidate.as_posix()
+
+
+def _production_files(root: Path, cwd: Path) -> set[str]:
+    """List production Python files that must appear in the coverage report."""
+
+    required: set[str] = set()
+    for path in root.rglob("*.py"):
+        if path.name == "__init__.py" or "__pycache__" in path.parts:
+            continue
+        try:
+            required.add(path.relative_to(cwd).as_posix())
+        except ValueError:
+            required.add(path.as_posix())
+    return required
+
+
+def main() -> int:
+    """Check each production file against a minimum coverage percentage."""
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("coverage_json")
+    parser.add_argument("--root", default="atlasbot")
+    parser.add_argument("--threshold", type=float, default=95.0)
+    args = parser.parse_args()
+
+    data = json.loads(Path(args.coverage_json).read_text(encoding="utf-8"))
+    files = data.get("files") if isinstance(data, dict) else {}
+    cwd = Path.cwd().resolve()
+    root = Path(args.root)
+    root_path = (root if root.is_absolute() else cwd / root).resolve()
+    root_prefix = root_path.relative_to(cwd).as_posix() if root_path.is_relative_to(cwd) else root_path.as_posix()
+    covered_paths: set[str] = set()
+    violations: list[str] = []
+
+    for path, payload in sorted(files.items()):
+        normalized_path = _normalize_report_path(path, cwd)
+        if not normalized_path.startswith(f"{root_prefix}/"):
+            continue
+        summary = payload.get("summary") if isinstance(payload, dict) else {}
+        percent = summary.get("percent_covered") if isinstance(summary, dict) else None
+        if not isinstance(percent, (int, float)):
+            violations.append(f"{normalized_path}: coverage percent missing")
+            continue
+        covered_paths.add(normalized_path)
+        if float(percent) < args.threshold:
+            violations.append(f"{normalized_path}: {float(percent):.2f}% < {args.threshold:.2f}%")
+
+    for path in sorted(_production_files(root_path, cwd) - covered_paths):
+        violations.append(f"{path}: missing from coverage report")
+
+    if violations:
+        for violation in sorted(violations):
+            print(violation)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/check_docstrings.py
+++ b/scripts/check_docstrings.py
@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Require docstrings on public production APIs."""
+
+from __future__ import annotations
+
+import argparse
+import ast
+from pathlib import Path
+
+
+def _needs_docstring(node: ast.AST, *, parent_class: str | None = None) -> bool:
+    """Decide whether `node` should carry a contract docstring."""
+
+    if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+        name = node.name
+        if name.startswith("_") and name != "__init__":
+            return False
+        return not (parent_class and name.startswith("_"))
+    if isinstance(node, ast.ClassDef):
+        if node.name.startswith("_"):
+            return False
+        if any(
+            (isinstance(dec, ast.Name) and dec.id == "dataclass")
+            or (isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name) and dec.func.id == "dataclass")
+            for dec in node.decorator_list
+        ):
+            return False
+        if any(
+            isinstance(base, ast.Name) and base.id in {"Exception", "RuntimeError", "BaseException"}
+            for base in node.bases
+        ):
+            return False
+        return not any(isinstance(base, ast.Name) and base.id == "BaseModel" for base in node.bases)
+    return False
+
+
+def _iter_nodes(tree: ast.AST) -> list[tuple[ast.AST, str | None]]:
+    """Yield top-level public nodes only.
+
+    The gate focuses on the module surface area rather than every internal
+    method so we can keep contracts on the actual API seams.
+    """
+
+    items: list[tuple[ast.AST, str | None]] = []
+    for node in getattr(tree, "body", []):
+        items.append((node, None))
+    return items
+
+
+def main() -> int:
+    """Check modules under the production package and report missing contracts."""
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--root", default="atlasbot")
+    args = parser.parse_args()
+
+    root = Path(args.root)
+    violations: list[str] = []
+    for path in sorted(root.rglob("*.py")):
+        if "__pycache__" in path.parts or ".venv" in path.parts:
+            continue
+        tree = ast.parse(path.read_text(encoding="utf-8"))
+        for node, parent_class in _iter_nodes(tree):
+            if not _needs_docstring(node, parent_class=parent_class):
+                continue
+            doc = ast.get_docstring(node)
+            if doc:
+                continue
+            if isinstance(node, ast.ClassDef):
+                violations.append(f"{path}: class {node.name} is missing a docstring")
+            elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                owner = f"{parent_class}." if parent_class else ""
+                violations.append(f"{path}: {owner}{node.name} is missing a docstring")
+
+    if violations:
+        for item in violations:
+            print(item)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/check_file_sizes.py
+++ b/scripts/check_file_sizes.py
@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""Fail when production Python files exceed the configured line budget.
+
+The gate is intentionally narrow:
+- it only checks the `atlasbot/` package tree;
+- it treats each file independently;
+- it keeps the threshold explicit so CI can ratchet without guesswork.
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+
+def _count_lines(path: Path) -> int:
+    """Return the physical line count for `path`.
+
+    Input:
+    - `path`: a readable Python source file.
+
+    Output:
+    - The number of newline-delimited lines in the file.
+    """
+
+    return len(path.read_text(encoding="utf-8").splitlines())
+
+
+def _iter_python_files(root: Path) -> list[Path]:
+    """List production Python files under `root`.
+
+    Input:
+    - `root`: repository package root to scan.
+
+    Output:
+    - Sorted Python file paths, excluding bytecode and hidden caches.
+    """
+
+    return sorted(
+        path
+        for path in root.rglob("*.py")
+        if path.is_file() and "__pycache__" not in path.parts and ".venv" not in path.parts
+    )
+
+
+def main() -> int:
+    """Run the size gate and return a process exit code."""
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--root", default="atlasbot")
+    parser.add_argument("--max-lines", type=int, default=500)
+    args = parser.parse_args()
+
+    root = Path(args.root)
+    violations: list[tuple[int, Path]] = []
+    for path in _iter_python_files(root):
+        lines = _count_lines(path)
+        if lines > args.max_lines:
+            violations.append((lines, path))
+
+    if violations:
+        for lines, path in sorted(violations, reverse=True):
+            print(f"{path}: {lines} lines (limit {args.max_lines})")
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
--- a/scripts/publish_test_metrics.py
+++ b/scripts/publish_test_metrics.py
@ -20,6 +20,19 @@ import urllib.request
 import xml.etree.ElementTree as ET
 from pathlib import Path

+QUALITY_SUCCESS_STATES = {"ok", "pass", "passed", "success", "compliant"}
+
+
+def _escape_label(value: str) -> str:
+    """Escape Prometheus label values safely."""
+    return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
+
+
+def _label_str(labels: dict[str, str]) -> str:
+    """Render Prometheus labels, omitting empty optional values."""
+    parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
+    return "{" + ",".join(parts) + "}" if parts else ""
+

 def _as_int(node: ET.Element, name: str) -> int:
    raw = node.attrib.get(name) or "0"
@ -52,6 +65,39 @@ def _load_junit(path: Path) -> dict[str, int]:
    return totals


+def _load_junit_cases(path: Path) -> list[tuple[str, str]]:
+    if not path.exists():
+        return []
+
+    tree = ET.parse(path)
+    root = tree.getroot()
+    suites: list[ET.Element]
+    if root.tag == "testsuite":
+        suites = [root]
+    elif root.tag == "testsuites":
+        suites = list(root.findall("testsuite"))
+    else:
+        suites = []
+
+    cases: list[tuple[str, str]] = []
+    for suite in suites:
+        for case in suite.findall("testcase"):
+            name = (case.attrib.get("name") or "").strip()
+            classname = (case.attrib.get("classname") or "").strip()
+            if not name:
+                continue
+            test_id = f"{classname}::{name}" if classname else name
+            status = "passed"
+            if case.find("failure") is not None:
+                status = "failed"
+            elif case.find("error") is not None:
+                status = "error"
+            elif case.find("skipped") is not None:
+                status = "skipped"
+            cases.append((test_id, status))
+    return cases
+
+
 def _load_coverage_percent(path: Path) -> float:
    if not path.exists():
        return 0.0
@ -63,6 +109,18 @@ def _load_coverage_percent(path: Path) -> float:
    return 0.0


+def _load_gate_rc(path: Path) -> int | None:
+    if not path.exists():
+        return None
+    raw = path.read_text(encoding="utf-8").strip()
+    if not raw:
+        return None
+    try:
+        return int(raw)
+    except ValueError:
+        return None
+
+
 def _count_source_lines_over_500(root: Path) -> int:
    if not root.exists():
        return 0
@ -76,6 +134,45 @@ def _count_source_lines_over_500(root: Path) -> int:
    return over


+def _load_json(path: Path) -> dict | None:
+    if not path.exists():
+        return None
+    try:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    return payload if isinstance(payload, dict) else None
+
+
+def _sonarqube_check_status(build_dir: Path) -> str:
+    report = _load_json(Path(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", str(build_dir / "sonarqube-quality-gate.json"))))
+    if not report:
+        return "not_applicable"
+    status_candidates = [
+        report.get("status"),
+        ((report.get("projectStatus") or {}).get("status") if isinstance(report.get("projectStatus"), dict) else None),
+        ((report.get("qualityGate") or {}).get("status") if isinstance(report.get("qualityGate"), dict) else None),
+    ]
+    for value in status_candidates:
+        if isinstance(value, str):
+            return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
+    return "failed"
+
+
+def _supply_chain_check_status(build_dir: Path) -> str:
+    report = _load_json(Path(os.getenv("QUALITY_GATE_IRONBANK_REPORT", str(build_dir / "ironbank-compliance.json"))))
+    if not report:
+        return "not_applicable"
+    compliant = report.get("compliant")
+    if isinstance(compliant, bool):
+        return "ok" if compliant else "failed"
+    status_candidates = [report.get("status"), report.get("result"), report.get("compliance")]
+    for value in status_candidates:
+        if isinstance(value, str):
+            return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
+    return "failed"
+
+
 def _read_text(url: str) -> str:
    try:
        with urllib.request.urlopen(url, timeout=10) as resp:
@ -88,7 +185,7 @@ def _counter(metrics: str, suite: str, status: str) -> float:
    for line in metrics.splitlines():
        if not line.startswith("platform_quality_gate_runs_total{"):
            continue
-        if f'job="platform-quality-ci"' not in line:
+        if 'job="platform-quality-ci"' not in line:
            continue
        if f'suite="{suite}"' not in line:
            continue
@ -108,7 +205,7 @@ def _post_text(url: str, payload: str) -> None:
    req = urllib.request.Request(
        url,
        data=payload.encode("utf-8"),
-        method="POST",
+        method="PUT",
        headers={"Content-Type": "text/plain"},
    )
    with urllib.request.urlopen(req, timeout=10) as resp:
@ -124,13 +221,58 @@ def main() -> int:

    junit_path = Path(os.getenv("JUNIT_PATH", "build/junit.xml"))
    coverage_path = Path(os.getenv("COVERAGE_PATH", "build/coverage.json"))
+    gate_rc_path = Path(os.getenv("QUALITY_GATE_RC_PATH", "build/quality-gate.rc"))
+    docs_rc_path = Path(os.getenv("QUALITY_GATE_DOCS_RC_PATH", "build/docs-naming.rc"))
    source_root = Path(os.getenv("SOURCE_ROOT", "atlasbot"))
+    build_dir = Path(os.getenv("BUILD_DIR", "build"))
+    branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
+    if branch.startswith("origin/"):
+        branch = branch[len("origin/") :]
+    build_number = os.getenv("BUILD_NUMBER", "")
+    jenkins_job = os.getenv("JOB_NAME", "atlasbot")
+    build_labels = {
+        "suite": suite,
+        "branch": branch,
+        "build_number": build_number or "unknown",
+        "jenkins_job": jenkins_job,
+    }
+    test_case_base_labels = dict(build_labels)
+
+    if not junit_path.exists():
+        junit_candidates = sorted(build_dir.glob("junit*.xml"))
+        if junit_candidates:
+            junit_path = junit_candidates[0]
+    if not coverage_path.exists():
+        for candidate in (
+            build_dir / "coverage.json",
+            build_dir / "coverage-summary.json",
+            build_dir / "coverage" / "coverage-summary.json",
+        ):
+            if candidate.exists():
+                coverage_path = candidate
+                break
+    print(f"[metrics] junit_path={junit_path} exists={junit_path.exists()}")
+    print(f"[metrics] coverage_path={coverage_path} exists={coverage_path.exists()}")

    totals = _load_junit(junit_path)
+    test_cases = _load_junit_cases(junit_path)
    coverage_pct = _load_coverage_percent(coverage_path)
+    gate_rc = _load_gate_rc(gate_rc_path)
+    docs_rc = _load_gate_rc(docs_rc_path)
    source_lines_over_500 = _count_source_lines_over_500(source_root)
    passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
    outcome = "ok" if totals["tests"] > 0 and totals["failures"] == 0 and totals["errors"] == 0 else "failed"
+    if gate_rc is not None and gate_rc != 0:
+        outcome = "failed"
+    checks = {
+        "tests": "ok" if outcome == "ok" else "failed",
+        "coverage": "ok" if coverage_pct >= 95.0 else "failed",
+        "loc": "ok" if source_lines_over_500 == 0 else "failed",
+        "docs_naming": "ok" if docs_rc == 0 else "failed",
+        "gate_glue": "ok",
+        "sonarqube": _sonarqube_check_status(build_dir),
+        "supply_chain": _supply_chain_check_status(build_dir),
+    }

    metrics = _read_text(f"{pushgateway_url}/metrics")
    ok_count = _counter(metrics, suite, "ok")
@ -156,8 +298,26 @@ def main() -> int:
            f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}',
            "# TYPE platform_quality_gate_source_lines_over_500_total gauge",
            f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
+            "# TYPE platform_quality_gate_build_info gauge",
+            f"platform_quality_gate_build_info{_label_str(build_labels)} 1",
+            "# TYPE atlasbot_quality_gate_checks_total gauge",
+            "# TYPE platform_quality_gate_test_case_result gauge",
        ]
    ) + "\n"
+    if test_cases:
+        payload += "\n".join(
+            f"platform_quality_gate_test_case_result{_label_str({**test_case_base_labels, 'test': test_name, 'status': test_status})} 1"
+            for test_name, test_status in test_cases
+        ) + "\n"
+    else:
+        payload += (
+            f"platform_quality_gate_test_case_result"
+            f"{_label_str({**test_case_base_labels, 'test': '__no_test_cases__', 'status': 'skipped'})} 1\n"
+        )
+    payload += "\n".join(
+        f'atlasbot_quality_gate_checks_total{{suite="{suite}",check="{check_name}",result="{check_status}"}} 1'
+        for check_name, check_status in checks.items()
+    ) + "\n"

    _post_text(f"{pushgateway_url}/metrics/job/platform-quality-ci/suite/{suite}", payload)
    return 0
--- a/testing/init.py
+++ b/testing/init.py
@ -0,0 +1,2 @@
+"""Shared testing helpers for atlasbot."""
+
--- a/testing/coverage_exceptions.json
+++ b/testing/coverage_exceptions.json
@ -0,0 +1,24 @@
+{
+  "ticket": "atlasbot-coverage-debt",
+  "expires_on": "2026-06-30",
+  "per_file_thresholds": {
+    "atlasbot/engine/answerer/workflow_post.py": 61.0,
+    "atlasbot/engine/answerer/common.py": 75.0,
+    "atlasbot/engine/answerer/post.py": 79.0,
+    "atlasbot/engine/answerer/retrieval_ext.py": 80.5,
+    "atlasbot/engine/answerer/engine.py": 81.0,
+    "atlasbot/knowledge/loader.py": 81.5,
+    "atlasbot/engine/answerer/spine.py": 83.5,
+    "atlasbot/engine/answerer/retrieval.py": 83.5,
+    "atlasbot/engine/answerer/workflow.py": 84.0,
+    "atlasbot/snapshot/builder/format_a.py": 84.5,
+    "atlasbot/engine/answerer/post_ext.py": 86.5,
+    "atlasbot/snapshot/builder/format_b.py": 87.5,
+    "atlasbot/engine/answerer/factsheet.py": 88.0,
+    "atlasbot/matrix/bot.py": 88.0,
+    "atlasbot/snapshot/builder/format_c.py": 90.0,
+    "atlasbot/snapshot/builder/core_a.py": 91.0,
+    "atlasbot/llm/client.py": 93.0,
+    "atlasbot/main.py": 93.0
+  }
+}
--- a/testing/fakes.py
+++ b/testing/fakes.py
@ -0,0 +1,108 @@
+"""Reusable test doubles and settings factories."""
+
+from __future__ import annotations
+
+import asyncio
+
+from atlasbot.config import Settings
+
+
+class FakeLLM:
+    """Deterministic LLM double for pipeline tests.
+
+    Why:
+    - keeps the answer engine tests fast and predictable.
+
+    Input/Output:
+    - accepts the same `chat()` signature as the real client;
+    - returns canned JSON or text snippets based on the prompt content.
+    """
+
+    def __init__(self) -> None:
+        self.calls: list[str] = []
+
+    async def chat(self, messages, *, model=None, timeout_sec=None):
+        """Return a prompt-shaped response and remember the last user prompt."""
+
+        prompt = messages[-1]["content"]
+        self.calls.append(prompt)
+        if "normalized" in prompt and "keywords" in prompt:
+            return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
+        if "needs_snapshot" in prompt:
+            return '{"needs_snapshot": true, "answer_style": "direct"}'
+        if "sub-questions" in prompt:
+            return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
+        if "sub-question" in prompt:
+            return "Atlas has 22 nodes."
+        if "Answer using only the Fact Sheet" in prompt:
+            return "Atlas has 22 nodes."
+        if "final response" in prompt:
+            return "Atlas has 22 nodes."
+        if "Score response quality" in prompt:
+            return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
+        if "claims list" in prompt:
+            return '{"claims": []}'
+        return "{}"
+
+
+class SlowFakeLLM(FakeLLM):
+    """Variant that sleeps briefly so timeout guards can be exercised."""
+
+    async def chat(self, messages, *, model=None, timeout_sec=None):
+        """Delay before answering to make budget handling deterministic."""
+
+        await asyncio.sleep(0.02)
+        return await super().chat(messages, model=model, timeout_sec=timeout_sec)
+
+
+def build_test_settings() -> Settings:
+    """Create a fully populated `Settings` instance for unit tests."""
+
+    return Settings(
+        matrix_base="",
+        auth_base="",
+        bot_user="",
+        bot_pass="",
+        room_alias="",
+        server_name="",
+        bot_mentions=(),
+        matrix_bots=(),
+        ollama_url="",
+        ollama_model="base",
+        ollama_model_fast="fast",
+        ollama_model_smart="smart",
+        ollama_model_genius="genius",
+        ollama_fallback_model="",
+        ollama_timeout_sec=1.0,
+        ollama_retries=0,
+        ollama_api_key="",
+        http_port=8090,
+        internal_token="",
+        kb_dir="",
+        vm_url="",
+        ariadne_state_url="",
+        ariadne_state_token="",
+        snapshot_ttl_sec=30,
+        thinking_interval_sec=30,
+        quick_time_budget_sec=15.0,
+        smart_time_budget_sec=45.0,
+        genius_time_budget_sec=180.0,
+        conversation_ttl_sec=300,
+        snapshot_pin_enabled=False,
+        queue_enabled=False,
+        nats_url="",
+        nats_stream="",
+        nats_subject="",
+        nats_result_bucket="",
+        fast_max_angles=1,
+        smart_max_angles=1,
+        genius_max_angles=1,
+        fast_max_candidates=1,
+        smart_max_candidates=1,
+        genius_max_candidates=1,
+        fast_llm_calls_max=9,
+        smart_llm_calls_max=17,
+        genius_llm_calls_max=32,
+        llm_limit_multiplier=1.5,
+        state_db_path="/tmp/atlasbot_test_state.db",
+    )
--- a/tests/test_answerer_support_coverage.py
+++ b/tests/test_answerer_support_coverage.py
--- a/tests/test_check_coverage_contract.py
+++ b/tests/test_check_coverage_contract.py
@ -0,0 +1,83 @@
+"""Tests for Atlasbot's per-file coverage contract script."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+
+SCRIPT = Path(__file__).resolve().parents[1] / "scripts" / "check_coverage.py"
+
+
+def _run_check(tmp_path: Path, coverage_payload: dict) -> subprocess.CompletedProcess[str]:
+    """Run the coverage script against a temporary Atlasbot source tree."""
+
+    coverage_path = tmp_path / "coverage.json"
+    coverage_path.write_text(json.dumps(coverage_payload), encoding="utf-8")
+    return subprocess.run(
+        [sys.executable, str(SCRIPT), str(coverage_path), "--root", "atlasbot", "--threshold", "95"],
+        cwd=tmp_path,
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+
+
+def test_missing_source_file_fails_coverage_contract(tmp_path: Path) -> None:
+    """Every non-init production source file must appear in the coverage report."""
+
+    source_root = tmp_path / "atlasbot"
+    source_root.mkdir()
+    (source_root / "__init__.py").write_text("", encoding="utf-8")
+    (source_root / "covered.py").write_text("value = 1\n", encoding="utf-8")
+    (source_root / "missing.py").write_text("value = 2\n", encoding="utf-8")
+
+    result = _run_check(
+        tmp_path,
+        {"files": {"atlasbot/covered.py": {"summary": {"percent_covered": 100.0}}}},
+    )
+
+    assert result.returncode == 1
+    assert "atlasbot/missing.py: missing from coverage report" in result.stdout
+    assert "atlasbot/__init__.py" not in result.stdout
+
+
+def test_low_or_malformed_file_coverage_fails_contract(tmp_path: Path) -> None:
+    """Covered files still fail if their per-file percentage is bad or missing."""
+
+    source_root = tmp_path / "atlasbot"
+    source_root.mkdir()
+    (source_root / "low.py").write_text("value = 1\n", encoding="utf-8")
+    (source_root / "malformed.py").write_text("value = 2\n", encoding="utf-8")
+
+    result = _run_check(
+        tmp_path,
+        {
+            "files": {
+                "atlasbot/low.py": {"summary": {"percent_covered": 94.9}},
+                "atlasbot/malformed.py": {"summary": {}},
+            }
+        },
+    )
+
+    assert result.returncode == 1
+    assert "atlasbot/low.py: 94.90% < 95.00%" in result.stdout
+    assert "atlasbot/malformed.py: coverage percent missing" in result.stdout
+
+
+def test_complete_per_file_coverage_passes_contract(tmp_path: Path) -> None:
+    """The contract passes when every production file is present above threshold."""
+
+    source_root = tmp_path / "atlasbot"
+    source_root.mkdir()
+    (source_root / "covered.py").write_text("value = 1\n", encoding="utf-8")
+
+    result = _run_check(
+        tmp_path,
+        {"files": {"atlasbot/covered.py": {"summary": {"percent_covered": 95.0}}}},
+    )
+
+    assert result.returncode == 0
+    assert result.stdout == ""
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@ -1,98 +1,21 @@
+"""Answer-engine regression tests."""
+
+from __future__ import annotations
+
 import asyncio
 from dataclasses import replace

 from atlasbot.engine.answerer import AnswerEngine
 from atlasbot.knowledge.loader import KnowledgeBase
 from atlasbot.snapshot.builder import SnapshotProvider
-from atlasbot.config import Settings
+from testing.fakes import FakeLLM, SlowFakeLLM, build_test_settings


-class FakeLLM:
-    def __init__(self) -> None:
-        self.calls: list[str] = []
+def test_engine_answer_basic() -> None:
+    """The quick path should answer from the fact sheet."""

-    async def chat(self, messages, *, model=None, timeout_sec=None):
-        prompt = messages[-1]["content"]
-        self.calls.append(prompt)
-        if "normalized" in prompt and "keywords" in prompt:
-            return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
-        if "needs_snapshot" in prompt:
-            return '{"needs_snapshot": true, "answer_style": "direct"}'
-        if "sub-questions" in prompt:
-            return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
-        if "sub-question" in prompt:
-            return "Atlas has 22 nodes."
-        if "Answer using only the Fact Sheet" in prompt:
-            return "Atlas has 22 nodes."
-        if "final response" in prompt:
-            return "Atlas has 22 nodes."
-        if "Score response quality" in prompt:
-            return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
-        if "claims list" in prompt:
-            return '{"claims": []}'
-        return "{}"
-
-
-class SlowFakeLLM(FakeLLM):
-    async def chat(self, messages, *, model=None, timeout_sec=None):
-        await asyncio.sleep(0.02)
-        return await super().chat(messages, model=model, timeout_sec=timeout_sec)
-
-
-def _settings() -> Settings:
-    return Settings(
-        matrix_base="",
-        auth_base="",
-        bot_user="",
-        bot_pass="",
-        room_alias="",
-        server_name="",
-        bot_mentions=(),
-        matrix_bots=(),
-        ollama_url="",
-        ollama_model="base",
-        ollama_model_fast="fast",
-        ollama_model_smart="smart",
-        ollama_model_genius="genius",
-        ollama_fallback_model="",
-        ollama_timeout_sec=1.0,
-        ollama_retries=0,
-        ollama_api_key="",
-        http_port=8090,
-        internal_token="",
-        kb_dir="",
-        vm_url="",
-        ariadne_state_url="",
-        ariadne_state_token="",
-        snapshot_ttl_sec=30,
-        thinking_interval_sec=30,
-        quick_time_budget_sec=15.0,
-        smart_time_budget_sec=45.0,
-        genius_time_budget_sec=180.0,
-        conversation_ttl_sec=300,
-        snapshot_pin_enabled=False,
-        queue_enabled=False,
-        nats_url="",
-        nats_stream="",
-        nats_subject="",
-        nats_result_bucket="",
-        fast_max_angles=1,
-        smart_max_angles=1,
-        genius_max_angles=1,
-        fast_max_candidates=1,
-        smart_max_candidates=1,
-        genius_max_candidates=1,
-        fast_llm_calls_max=9,
-        smart_llm_calls_max=17,
-        genius_llm_calls_max=32,
-        llm_limit_multiplier=1.5,
-        state_db_path="/tmp/atlasbot_test_state.db",
-    )
-
-
-def test_engine_answer_basic():
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -101,9 +24,11 @@ def test_engine_answer_basic():
    assert "Atlas has 22 nodes" in result.reply


-def test_smart_mode_uses_factsheet_path():
+def test_smart_mode_uses_factsheet_path() -> None:
+    """Smart mode should stay on the factsheet branch for direct cluster questions."""
+
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -113,9 +38,11 @@ def test_smart_mode_uses_factsheet_path():
    assert "time budget" not in result.reply.lower()


-def test_genius_mode_uses_factsheet_path():
+def test_genius_mode_uses_factsheet_path() -> None:
+    """Genius mode should also return the factsheet answer for the same query."""
+
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -125,9 +52,11 @@ def test_genius_mode_uses_factsheet_path():
    assert "time budget" not in result.reply.lower()


-def test_plain_math_question_is_rejected_for_cluster_modes():
+def test_plain_math_question_is_rejected_for_cluster_modes() -> None:
+    """The bot should keep users on cluster questions instead of generic math."""
+
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -136,9 +65,11 @@ def test_plain_math_question_is_rejected_for_cluster_modes():
    assert "focus on Titan cluster operations" in result.reply


-def test_quick_mode_time_budget_guard():
+def test_quick_mode_time_budget_guard() -> None:
+    """A slow model call should trip the quick-mode budget guard."""
+
    llm = SlowFakeLLM()
-    settings = replace(_settings(), quick_time_budget_sec=0.01)
+    settings = replace(build_test_settings(), quick_time_budget_sec=0.01)
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
--- a/tests/test_quality_gate_paths.py
+++ b/tests/test_quality_gate_paths.py
@ -0,0 +1,810 @@
+"""Targeted quality-gate coverage for runtime and answerer orchestration."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from dataclasses import replace
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import httpx
+import pytest
+
+from atlasbot.api.http import Api, AnswerRequest
+from atlasbot.config import MatrixBotConfig
+from atlasbot.engine.answerer import (
+    AnswerEngine,
+    AnswerResult,
+    AnswerScores,
+    ClaimItem,
+    EvidenceItem,
+    ModePlan,
+)
+from atlasbot.engine.answerer.common import _mode_plan
+from atlasbot.engine.answerer.engine import AnswerEngine as EngineClass
+from atlasbot.engine.answerer.workflow import run_answer
+from atlasbot.engine.answerer.workflow_post import finalize_answer
+from atlasbot.knowledge.loader import KnowledgeBase
+from atlasbot.llm.client import LLMClient, LLMError, parse_json
+from atlasbot.main import result_scores
+from atlasbot.matrix.bot import MatrixBot, MatrixClient
+from atlasbot.queue.nats import QueueManager
+from atlasbot.snapshot.builder import SnapshotProvider, build_summary
+from testing.fakes import build_test_settings
+from tests.test_support_modules import _rich_snapshot
+
+
+class StaticSnapshot:
+    """Return a fixed snapshot for answer-engine tests."""
+
+    def __init__(self, payload: dict[str, Any]) -> None:
+        self._payload = payload
+
+    def get(self) -> dict[str, Any]:
+        """Return the stored snapshot payload."""
+
+        return self._payload
+
+
+class PromptLLM:
+    """Map prompt fragments to canned responses for workflow tests."""
+
+    def __init__(self) -> None:
+        self.calls: list[tuple[str, str]] = []
+
+    async def chat(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        model: str | None = None,
+        timeout_sec: float | None = None,
+    ) -> str:
+        """Return the scripted response for the latest user prompt."""
+
+        del timeout_sec
+        system = messages[0]["content"]
+        prompt = messages[-1]["content"]
+        self.calls.append((model or "", prompt))
+        if "Given chunk summaries, score relevance" in prompt:
+            items = []
+            for line in prompt.splitlines():
+                if line.startswith("- c"):
+                    chunk_id = line.split()[1].rstrip(":")
+                    score = 95 if "cpu" in line.lower() or "synapse" in line.lower() else 80
+                    items.append({"id": chunk_id, "score": score, "reason": "relevant"})
+            return json.dumps(items or [{"id": "c0", "score": 90, "reason": "relevant"}])
+        direct = self._direct_response(prompt)
+        if direct is not None:
+            return direct
+        response = self._lookup_response(system, prompt)
+        if response is not None:
+            return response
+        raise AssertionError(f"Unhandled prompt:\nSYSTEM={system}\nPROMPT={prompt}")
+
+    def _direct_response(self, prompt: str) -> str | None:
+        """Return direct string responses for a few prompt families."""
+
+        if "Answer the sub-question using the context" in prompt:
+            return "The best runbook path is runbooks/fix.md." if "runbook" in prompt.lower() else "synapse is hottest with cpu 95 on titan-01."
+        markers = [
+            ("Write a final response to the user", "titan-99 is hottest and the runbook is runbooks/wrong.md."),
+            ("Draft:", "synapse is hottest at cpu 95 on titan-01, and amd64 nodes remain separate from raspberry hardware."),
+            ("Return JSON with fields: issues", '{"issues":["mention the exact runbook"],"missing_data":[],"risky_claims":[]}'),
+            ("command (string), rationale", '{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}'),
+            ("confidence (0-100)", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
+        ]
+        for marker, response in markers:
+            if marker in prompt:
+                if marker == "Draft:" and "If Facts are provided" not in prompt:
+                    continue
+                return response
+        return None
+
+    def _lookup_response(self, system: str, prompt: str) -> str | None:
+        """Return canned responses for prompt markers."""
+
+        del system
+        markers = [
+            (
+                "normalized (string), keywords",
+                '{"normalized":"Which namespace is hottest on raspberry hardware and which runbook should I use?","keywords":["namespace","hottest","cpu","raspberry","runbook"]}',
+            ),
+            (
+                "needs_snapshot (bool)",
+                '{"needs_snapshot":true,"needs_kb":true,"needs_tool":true,"answer_style":"insightful","follow_up":false,"question_type":"open_ended","focus_entity":"namespace","focus_metric":"cpu"}',
+            ),
+            (
+                "Generate up to",
+                '[{"id":"q1","question":"Which namespace is hottest?","priority":5,"kind":"metric"},{"id":"q2","question":"Which runbook applies?","priority":4,"kind":"context"}]',
+            ),
+            ("Choose the run that best aligns", '{"selected_index": 1}'),
+            ("AvailableKeys:", '{"keys":["namespace_cpu_top","namespace_pods","hardware_nodes"]}'),
+            ("Return JSON with field: missing", '{"missing":[]}'),
+            ("Return JSON with fields: prefixes", '{"prefixes":["namespace","hottest"]}'),
+            ("fact_types", '{"fact_types":["namespace_cpu_top","hardware_nodes"]}'),
+            ("Return JSON with field: signals", '{"signals":["cpu","synapse","raspberry"]}'),
+            (
+                "Signals:",
+                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
+            ),
+            (
+                "Return JSON with field: lines",
+                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
+            ),
+            (
+                "CandidateFacts:",
+                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
+            ),
+            (
+                "FactCandidates:",
+                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
+            ),
+            (
+                "Suggest a safe, read-only command",
+                '{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}',
+            ),
+            ("Pick the best candidate for accuracy and grounding", '{"best": 1}'),
+            ("Pick the best draft for accuracy", '{"best": 1}'),
+            ("Pick the best runbook path", '{"path":"runbooks/fix.md"}'),
+            ("Check the draft against the context", "synapse is hottest on titan-01, but see runbooks/wrong.md."),
+            ("Answer using the fact", "Latest metrics: namespace_cpu_top: synapse=95."),
+            ("Rewrite the draft to only include claims supported by FactsUsed", "synapse is hottest on titan-01."),
+            ("Check if an open-ended answer includes at least two concrete signals", '{"ok": false, "reason": "needs more detail"}'),
+            ("ok (bool), reason (string)", '{"ok": false, "reason": "needs more detail"}'),
+            ("Rewrite the answer using the critique", "synapse is hottest at cpu 95 on titan-01. Use runbooks/fix.md."),
+            ("Return JSON with field: note", '{"note":"The answer would benefit from per-pod CPU samples."}'),
+            ("Score response quality", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
+            (
+                "Return JSON with fields: confidence (0-100), relevance (0-100), satisfaction (0-100), hallucination_risk (low|medium|high).",
+                '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}',
+            ),
+            (
+                "claims list",
+                '{"claims":[{"id":"c1","claim":"synapse is hottest","evidence":[{"path":"hottest.cpu.node","reason":"snapshot"}]}]}',
+            ),
+            ("Select the claims most relevant", '{"claim_ids":["c1"]}'),
+            ("Follow-up:", "titan-99 is still hottest."),
+            ("Rewrite the answer to be concise and directly answer the question", "Latest metrics: namespace_cpu_top: synapse=95."),
+            ("Deduplicate repeated statements", "Latest metrics: namespace_cpu_top: synapse=95."),
+            ("Answer using only the Fact Sheet", "Fact sheet answer: namespace_cpu_top: synapse=95. Use runbooks/fix.md."),
+        ]
+        for marker, response in markers:
+            if marker in prompt:
+                return response
+        return None
+
+
+class TimeoutLLM:
+    """Raise a timeout as soon as the workflow makes an LLM call."""
+
+    async def chat(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        model: str | None = None,
+        timeout_sec: float | None = None,
+    ) -> str:
+        """Trigger the workflow timeout handling branch."""
+
+        del messages, model, timeout_sec
+        raise TimeoutError("boom")
+
+
+class LimitLLM(PromptLLM):
+    """Reuse prompt handling while allowing the workflow to hit call caps."""
+
+
+def _settings(tmp_path: Path, **overrides: Any):
+    """Build settings with an isolated claim-store path."""
+
+    return replace(build_test_settings(), state_db_path=str(tmp_path / "state.db"), **overrides)
+
+
+def _make_engine(tmp_path: Path, llm: Any, **setting_overrides: Any) -> AnswerEngine:
+    """Construct a real engine with static snapshot and KB doubles."""
+
+    settings = _settings(tmp_path, **setting_overrides)
+    snapshot = StaticSnapshot(_rich_snapshot())
+    kb = KnowledgeBase("")
+    kb.summary = lambda: "KB summary."  # type: ignore[method-assign]
+    kb.runbook_titles = lambda limit=5: "Relevant runbooks:\n- Fix (runbooks/fix.md)"  # type: ignore[method-assign]
+    kb.runbook_paths = lambda limit=10: ["runbooks/fix.md"]  # type: ignore[method-assign]
+    kb.chunk_lines = lambda max_files=20, max_chars=6000: [  # type: ignore[method-assign]
+        "runbooks/fix.md",
+        "namespace_cpu_top: synapse=95",
+        "hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
+    ]
+    return AnswerEngine(settings, llm, kb, snapshot)  # type: ignore[arg-type]
+
+
+def test_engine_helper_methods_cover_state_and_followup(tmp_path: Path) -> None:
+    """Cover answer-engine helper branches outside the main workflow."""
+
+    settings = _settings(tmp_path)
+
+    class StockLLM:
+        async def chat(self, messages, *, model=None, timeout_sec=None):
+            del messages, model, timeout_sec
+            return "stock reply"
+
+    engine = EngineClass(settings, StockLLM(), KnowledgeBase(""), StaticSnapshot(_rich_snapshot()))
+
+    async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
+        del _system, context, model
+        static = {
+            "draft_select": '{"best": 2}',
+            "score": '{"confidence":90,"relevance":91,"satisfaction":92,"hallucination_risk":"low"}',
+            "claim_map": '{"claims":[{"id":"c1","claim":"cpu is high","evidence":[{"path":"hottest.cpu.node","reason":"why"},{"path":"","reason":"skip"}]},"bad"]}',
+            "select_claims": '{"claim_ids":["c1"]}',
+            "followup": "titan-99 is hottest. The draft is correct.",
+            "followup_fix": "titan-01 is hottest.",
+            "dedup_followup": "The draft is correct. titan-01 is hottest.",
+            "dedup": "deduped",
+        }
+        if tag == "synth":
+            return "draft one" if "DraftIndex: 1" in _prompt else "draft two"
+        if tag in static:
+            return static[tag]
+        raise AssertionError(tag)
+
+    stock = asyncio.run(engine._answer_stock("hello"))
+    assert stock.reply == "stock reply"
+
+    plan = replace(_mode_plan(settings, "smart"), drafts=2, parallelism=2)
+    synth = asyncio.run(
+        engine._synthesize_answer(
+            "Which node is hottest?",
+            ["draft one", "draft two"],
+            "ctx",
+            {"question_type": "metric", "answer_style": "direct"},
+            plan,
+            call_llm,
+        )
+    )
+    synth_empty = asyncio.run(
+        engine._synthesize_answer(
+            "Which node is hottest?",
+            [],
+            "ctx",
+            {"question_type": "metric", "answer_style": "direct"},
+            replace(plan, drafts=1, parallelism=1),
+            call_llm,
+        )
+    )
+    assert synth == "draft two"
+    assert synth_empty == "draft two"
+
+    scored = asyncio.run(engine._score_answer("q", "a", plan, call_llm))
+    assert scored.hallucination_risk == "low"
+    assert asyncio.run(engine._score_answer("q", "a", replace(plan, use_scores=False), call_llm)).confidence == 60
+
+    summary = build_summary(_rich_snapshot())
+    claims = asyncio.run(engine._extract_claims("q", "a", summary, ["fact"], call_llm))
+    assert claims and claims[0].evidence[0].path == "hottest.cpu.node"
+    assert asyncio.run(engine._extract_claims("q", "", summary, [], call_llm)) == []
+    assert asyncio.run(engine._dedup_reply("one. one. one.", plan, call_llm, "dedup")) == "deduped"
+    assert asyncio.run(engine._dedup_reply("single answer", plan, call_llm, "dedup")) == "single answer"
+
+    engine._store_state("conv-1", claims, summary, _rich_snapshot(), True)
+    state = engine._get_state("conv-1")
+    assert state and state.snapshot
+    assert engine._get_state(None) is None
+    engine._cleanup_state()
+
+    followup = asyncio.run(
+        engine._answer_followup(
+            "Which hardware hotspot is there?",
+            state,
+            summary,
+            {"question_type": "diagnostic"},
+            plan,
+            call_llm,
+        )
+    )
+    assert "titan-01" in followup
+    assert asyncio.run(engine._select_claims("what about that?", claims, plan, call_llm)) == ["c1"]
+    assert asyncio.run(engine._select_claims("what about that?", [], plan, call_llm)) == []
+
+
+def test_finalize_answer_covers_post_processing_branches(tmp_path: Path) -> None:
+    """Exercise evidence-fix, runbook, guard, critic, and gap paths."""
+
+    settings = _settings(tmp_path)
+    plan = replace(_mode_plan(settings, "smart"), use_gap=True, use_critic=True)
+    summary = build_summary(_rich_snapshot())
+    summary_lines = [
+        "namespace_cpu_top: synapse=95",
+        "hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
+        "runbooks/fix.md",
+    ]
+    observed: list[tuple[str, str]] = []
+
+    async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
+        del _system, context, model
+        responses = {
+            "runbook_select": '{"path":"runbooks/fix.md"}',
+            "evidence_fix": "titan-99 is hottest and see runbooks/wrong.md.",
+            "evidence_fix_enforce": "titan-99 is hottest and see runbooks/wrong.md.",
+            "metric_direct": "no numbers here",
+            "runbook_enforce": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
+            "evidence_guard": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
+            "focus_fix": "Latest metrics: namespace_cpu_top: synapse=95.",
+            "insight_guard": '{"ok": false, "reason": "needs more detail"}',
+            "insight_fix": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
+            "critic": '{"issues":["too vague"]}',
+            "revise": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
+            "gap": '{"note":"The answer would benefit from per-pod CPU samples."}',
+        }
+        if tag not in responses:
+            raise AssertionError(_prompt)
+        return responses[tag]
+
+    class FinalizeEngine:
+        async def _synthesize_answer(self, *args: Any) -> str:
+            return "titan-99 is hottest and see runbooks/wrong.md."
+
+        async def _dedup_reply(self, reply: str, _plan: ModePlan, _call_llm, tag: str) -> str:
+            assert tag == "dedup"
+            return reply
+
+        async def _score_answer(self, _question: str, _reply: str, _plan: ModePlan, _call_llm) -> AnswerScores:
+            return AnswerScores(80, 81, 82, "low")
+
+        async def _extract_claims(self, _question: str, _reply: str, _summary: dict[str, Any], _facts_used: list[str], _call_llm) -> list[ClaimItem]:
+            return [ClaimItem(id="c1", claim="cpu high", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot")])]
+
+    reply, scores, claims = asyncio.run(
+        finalize_answer(
+            engine=FinalizeEngine(),
+            call_llm=call_llm,
+            normalized="Which namespace is hottest on raspberry hardware and which runbook should I use?",
+            subanswers=["synapse is hottest"],
+            context="ctx",
+            classify={"question_type": "open_ended", "answer_style": "direct"},
+            plan=plan,
+            summary=summary,
+            summary_lines=summary_lines,
+            metric_facts=["namespace_cpu_top: synapse=95"],
+            key_facts=["namespace_cpu_top: synapse=95"],
+            facts_used=["hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"],
+            allowed_nodes=["titan-01", "titan-02"],
+            allowed_namespaces=["synapse"],
+            runbook_paths=["runbooks/fix.md"],
+            lowered_question="which namespace is hottest on raspberry hardware and which runbook should i use?",
+            force_metric=True,
+            keyword_tokens=["namespace", "cpu", "raspberry"],
+            question_tokens=["namespace", "cpu", "raspberry"],
+            snapshot_context="ClusterSnapshot:\nnamespace_cpu_top: synapse=95",
+            observer=lambda stage, note: observed.append((stage, note)),
+            mode="smart",
+            metric_keys=["namespace_cpu_top"],
+        )
+    )
+    assert "runbooks/fix.md" in reply
+    assert "synapse=95" in reply
+    assert scores.confidence == 80
+    assert claims and claims[0].id == "c1"
+    assert ("evidence_fix", "repairing missing evidence") in observed
+    assert ("critic", "reviewing") in observed
+    assert ("gap", "checking gaps") in observed
+
+
+def test_run_answer_deep_workflow_persists_state(tmp_path: Path) -> None:
+    """Drive the full smart workflow through retrieval, synthesis, and post-processing."""
+
+    engine = _make_engine(tmp_path, PromptLLM())
+    observed: list[tuple[str, str]] = []
+    result = asyncio.run(
+        run_answer(
+            engine,
+            "Run limitless Which namespace is hottest on raspberry hardware and which runbook should I use?",
+            mode="smart",
+            history=[{"q": "before", "a": "earlier"}],
+            observer=lambda stage, note: observed.append((stage, note)),
+            conversation_id="room-1",
+            snapshot_pin=True,
+        )
+    )
+    assert "runbooks/fix.md" in result.reply
+    assert result.meta["tool_hint"]["command"] == "kubectl top pods -n synapse"
+    state = engine._get_state("room-1")
+    assert state and state.claims and state.snapshot
+    stages = {stage for stage, _note in observed}
+    assert {"normalize", "route", "retrieve", "tool", "subanswers", "synthesize"} <= stages
+
+
+def test_run_answer_followup_and_limits(tmp_path: Path) -> None:
+    """Cover follow-up routing, reasoning limit, and timeout fallbacks."""
+
+    class FollowupLLM(PromptLLM):
+        def _lookup_response(self, system: str, prompt: str) -> str | None:
+            if "normalized (string), keywords" in prompt:
+                return '{"normalized":"What about that?","keywords":["that"]}'
+            if "needs_snapshot (bool)" in prompt:
+                return '{"needs_snapshot":true,"needs_kb":false,"needs_tool":false,"answer_style":"direct","follow_up":false,"question_type":"open_ended","focus_entity":"unknown","focus_metric":"unknown"}'
+            if "Select the claims most relevant" in prompt:
+                return '{"claim_ids":["c1"]}'
+            if "Follow-up:" in prompt:
+                return "titan-99 is still hottest."
+            return super()._lookup_response(system, prompt)
+
+    engine = _make_engine(tmp_path, FollowupLLM())
+    summary = build_summary(_rich_snapshot())
+    engine._store_state(
+        "conv-1",
+        [ClaimItem(id="c1", claim="synapse is hottest", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot", value_at_claim="titan-01")])],
+        summary,
+        _rich_snapshot(),
+        True,
+    )
+    followup = asyncio.run(
+        run_answer(
+            engine,
+            "Run limitless What about that?",
+            mode="smart",
+            conversation_id="conv-1",
+            snapshot_pin=True,
+        )
+    )
+    assert "titan-01" in followup.reply
+
+    limit_engine = _make_engine(
+        tmp_path / "limit",
+        LimitLLM(),
+        fast_llm_calls_max=1,
+        llm_limit_multiplier=1.0,
+    )
+    limited = asyncio.run(run_answer(limit_engine, "tell me about cpu and runbooks", mode="custom"))
+    assert "reasoning limit" in limited.reply
+    assert limited.meta["llm_limit_hit"] is True
+
+    timeout_engine = _make_engine(
+        tmp_path / "timeout",
+        TimeoutLLM(),
+        smart_time_budget_sec=0.1,
+        ollama_timeout_sec=0.1,
+    )
+    timed_out = asyncio.run(run_answer(timeout_engine, "Run limitless tell me about cpu and runbooks", mode="smart"))
+    assert "time budget" in timed_out.reply.lower()
+    assert timed_out.meta["time_budget_hit"] is True
+
+
+def test_api_matrix_queue_main_and_store_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    """Exercise remaining API, Matrix, queue, main, and store branches."""
+
+    settings = _settings(
+        tmp_path,
+        internal_token="secret",
+        queue_enabled=True,
+        matrix_bots=(MatrixBotConfig("bot", "pw", ("atlas",), "quick"),),
+    )
+
+    async def handler(
+        question: str,
+        mode: str,
+        history: list[dict[str, str]] | None,
+        conversation_id: str | None,
+        snapshot_pin: bool | None,
+    ) -> AnswerResult:
+        del history, conversation_id, snapshot_pin
+        return AnswerResult(question + ":" + mode, AnswerScores(1, 2, 3, "low"), {"mode": mode})
+
+    api = Api(settings, handler)
+    from fastapi.testclient import TestClient
+
+    client = TestClient(api.app)
+    assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={}).status_code == 400
+    assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"content": "hi"}).json()["reply"] == "hi:quick"
+    assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"question": "  "}).status_code == 400
+    assert AnswerRequest(message=" hello ").message == " hello "
+
+    class FakeResp:
+        def __init__(self, payload: dict[str, Any], *, status_code: int = 200) -> None:
+            self._payload = payload
+            self.status_code = status_code
+
+        def raise_for_status(self) -> None:
+            if self.status_code >= 400:
+                raise httpx.HTTPStatusError("bad", request=httpx.Request("GET", "http://x"), response=httpx.Response(self.status_code))
+
+        def json(self) -> dict[str, Any]:
+            return self._payload
+
+    class MatrixAsyncClient:
+        async def __aenter__(self) -> "MatrixAsyncClient":
+            return self
+
+        async def __aexit__(self, *exc: object) -> None:
+            return None
+
+        async def post(self, url: str, json: dict[str, Any] | None = None, headers: dict[str, str] | None = None) -> FakeResp:
+            del json, headers
+            if "login" in url:
+                return FakeResp({"access_token": "tok"})
+            return FakeResp({})
+
+        async def get(self, url: str, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None) -> FakeResp:
+            del headers, params
+            if "directory/room" in url:
+                return FakeResp({}, status_code=404)
+            return FakeResp({"next_batch": "n1", "rooms": {"join": {}}})
+
+    monkeypatch.setattr("atlasbot.matrix.bot.httpx.AsyncClient", lambda timeout=None: MatrixAsyncClient())
+    matrix_client = MatrixClient(settings, settings.matrix_bots[0])
+    assert asyncio.run(matrix_client.login()) == "tok"
+    assert asyncio.run(matrix_client.resolve_room("tok")) == ""
+
+    bot = MatrixBot(settings, settings.matrix_bots[0], SimpleNamespace(answer=None), handler)
+
+    class BotClient:
+        def __init__(self) -> None:
+            self.sent: list[str] = []
+            self.sync_calls = 0
+
+        async def login(self) -> str:
+            return "tok"
+
+        async def resolve_room(self, token: str) -> str:
+            del token
+            return "!room"
+
+        async def join_room(self, token: str, room_id: str) -> None:
+            del token, room_id
+
+        async def send_message(self, token: str, room_id: str, text: str) -> None:
+            del token, room_id
+            self.sent.append(text)
+
+        async def sync(self, token: str, since: str | None) -> dict[str, Any]:
+            del token, since
+            self.sync_calls += 1
+            if self.sync_calls == 1:
+                return {
+                    "next_batch": "n1",
+                    "rooms": {
+                        "join": {
+                            "!room": {
+                                "timeline": {
+                                    "events": [
+                                        {"type": "m.room.member", "sender": "user"},
+                                        {"type": "m.room.message", "sender": "bot", "content": {"body": "ignore"}},
+                                        {"type": "m.room.message", "sender": "user", "content": {"body": "atlas quick hi"}},
+                                    ]
+                                }
+                            }
+                        }
+                    },
+                }
+            raise RuntimeError("stop")
+
+    bot._client = BotClient()
+    async def run_bot_once() -> None:
+        task = asyncio.create_task(bot.run())
+        await asyncio.sleep(0.01)
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    asyncio.run(run_bot_once())
+    assert any("Thinking" in msg for msg in bot._client.sent)
+
+    timeout_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001, quick_time_budget_sec=0.01), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
+    timeout_bot._client = SimpleNamespace(
+        sent=[],
+        send_message=lambda token, room_id, text: asyncio.sleep(0, result=timeout_bot._client.sent.append(text)),
+    )
+
+    async def sleepy_handler(question: str, mode: str, history, conversation_id, observer):
+        del question, mode, history, conversation_id, observer
+        await asyncio.sleep(1.2)
+        return AnswerResult("late", AnswerScores(1, 2, 3, "low"), {})
+
+    timeout_bot._answer_handler = sleepy_handler
+    asyncio.run(timeout_bot._answer_with_heartbeat("tok", "!room", "q", "quick"))
+    assert any("time budget" in msg for msg in timeout_bot._client.sent)
+
+    error_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
+    error_bot._client = SimpleNamespace(
+        sent=[],
+        send_message=lambda token, room_id, text: asyncio.sleep(0, result=error_bot._client.sent.append(text)),
+    )
+
+    async def failing_handler(question: str, mode: str, history, conversation_id, observer):
+        del question, mode, history, conversation_id, observer
+        raise RuntimeError("boom")
+
+    error_bot._answer_handler = failing_handler
+    asyncio.run(error_bot._answer_with_heartbeat("tok", "!room", "q", "smart"))
+    assert any("internal error" in msg for msg in error_bot._client.sent)
+
+    class DirectQueue:
+        async def __call__(self, payload: dict[str, Any]) -> dict[str, Any]:
+            return {"reply": payload["question"]}
+
+    direct_qm = QueueManager(replace(settings, queue_enabled=False), DirectQueue())
+    assert asyncio.run(direct_qm.submit({"question": "direct"})) == {"reply": "direct"}
+
+    class FakeSub:
+        async def next_msg(self, timeout: float) -> Any:
+            del timeout
+            return SimpleNamespace(data=json.dumps({"reply": "queued"}).encode())
+
+        async def unsubscribe(self) -> None:
+            return None
+
+    class FakeMsg:
+        def __init__(self, raw: bytes, reply: str = "reply") -> None:
+            self.data = raw
+            self.reply = reply
+            self.acked = False
+
+        async def ack(self) -> None:
+            self.acked = True
+
+    published: list[tuple[str, bytes]] = []
+
+    class ExistingStreamJS:
+        async def stream_info(self, stream: str) -> None:
+            assert stream == settings.nats_stream
+
+        async def publish(self, subject: str, data: bytes) -> None:
+            published.append((subject, data))
+
+        async def pull_subscribe(self, subject: str, durable: str):
+            del subject, durable
+
+            class Pull:
+                def __init__(self) -> None:
+                    self.calls = 0
+
+                async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
+                    del count, timeout
+                    self.calls += 1
+                    if self.calls == 1:
+                        raise RuntimeError("retry")
+                    raise asyncio.CancelledError
+
+            return Pull()
+
+    class FakeNats:
+        def __init__(self) -> None:
+            self.drained = False
+
+        async def connect(self, url: str) -> None:
+            assert url == settings.nats_url
+
+        def jetstream(self) -> ExistingStreamJS:
+            return ExistingStreamJS()
+
+        def new_inbox(self) -> str:
+            return "inbox"
+
+        async def subscribe(self, reply: str) -> FakeSub:
+            assert reply == "inbox"
+            return FakeSub()
+
+        async def publish(self, reply: str, data: bytes) -> None:
+            published.append((reply, data))
+
+        async def drain(self) -> None:
+            self.drained = True
+
+    monkeypatch.setattr("atlasbot.queue.nats.NATS", FakeNats)
+    queue = QueueManager(settings, DirectQueue())
+    asyncio.run(queue.start())
+    assert asyncio.run(queue.submit({"question": "queued", "mode": "smart"})) == {"reply": "queued"}
+
+    invalid_msg = FakeMsg(b"not-json")
+    asyncio.run(queue._handle_message(invalid_msg))
+    assert invalid_msg.acked is True
+    handled_msg = FakeMsg(json.dumps({"payload": {"question": "x"}, "reply": "reply"}).encode())
+    asyncio.run(queue._handle_message(handled_msg))
+    assert handled_msg.acked is True
+    failing_queue = QueueManager(settings, lambda payload: (_ for _ in ()).throw(RuntimeError("boom")))
+    failing_queue._nc = FakeNats()
+    failing_queue._js = ExistingStreamJS()
+    failure_msg = FakeMsg(json.dumps({"payload": {"question": "x"}}).encode())
+
+    async def failing_handler(payload: dict[str, Any]) -> dict[str, Any]:
+        del payload
+        raise RuntimeError("boom")
+
+    failing_queue._handler = failing_handler
+    asyncio.run(failing_queue._handle_message(failure_msg))
+    assert failure_msg.acked is True
+    asyncio.run(queue.stop())
+
+    assert result_scores({"scores": {"confidence": "9", "relevance": "8", "satisfaction": "7", "hallucination_risk": "low"}}).confidence == 9
+    assert result_scores({"scores": "bad"}).confidence == 60
+
+
+def test_kb_llm_snapshot_and_json_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    """Cover remaining KB, LLM, snapshot, and JSON parsing branches."""
+
+    base = tmp_path / "kb"
+    catalog = base / "catalog"
+    catalog.mkdir(parents=True)
+    (catalog / "atlas.json").write_text(json.dumps({"cluster": "atlas", "sources": ["bad"]}), encoding="utf-8")
+    (catalog / "runbooks.json").write_text(json.dumps([{"title": "Fix", "path": "runbooks/fix.md"}, {"title": "No path"}]), encoding="utf-8")
+    (base / "docs.md").write_text("x" * 120, encoding="utf-8")
+    kb = KnowledgeBase(str(base))
+    assert kb.runbook_titles(limit=1).count("runbooks/fix.md") == 1
+    assert kb.chunk_lines(max_files=1, max_chars=60)
+    assert kb._extend_with_limit([], ["abcdef"], 3) is False
+
+    empty_kb = KnowledgeBase("")
+    assert empty_kb.chunk_lines() == []
+
+    settings = _settings(tmp_path, ollama_url="http://example/api/chat", ollama_api_key="secret", ollama_retries=0, ollama_fallback_model="")
+    client = LLMClient(settings)
+    assert client._endpoint() == "http://example/api/chat"
+    assert client._headers["x-api-key"] == "secret"
+    assert parse_json("```{\"ok\": true}```") == {"ok": True}
+    assert parse_json("not-json", fallback={"fallback": True}) == {"fallback": True}
+
+    class FakeResponse:
+        def __init__(self, status_code: int, payload: Any) -> None:
+            self.status_code = status_code
+            self._payload = payload
+
+        def raise_for_status(self) -> None:
+            if self.status_code >= 400:
+                raise httpx.HTTPStatusError("bad", request=httpx.Request("POST", "http://example"), response=httpx.Response(self.status_code))
+
+        def json(self) -> Any:
+            return self._payload
+
+    responses = iter([FakeResponse(200, {"response": "plain"}), FakeResponse(200, {"reply": "fallback"}), FakeResponse(200, {"message": {}})])
+
+    class FakeAsyncClient:
+        def __init__(self, timeout: float | None = None) -> None:
+            self.timeout = timeout
+
+        async def __aenter__(self) -> "FakeAsyncClient":
+            return self
+
+        async def __aexit__(self, *exc: object) -> None:
+            return None
+
+        async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
+            del _url, json, headers
+            item = next(responses)
+            if isinstance(item, Exception):
+                raise item
+            return item
+
+    monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
+    assert asyncio.run(client.chat([{"role": "user", "content": "a"}], timeout_sec=1.0)) == "plain"
+    assert asyncio.run(client.chat([{"role": "user", "content": "b"}], timeout_sec=1.0)) == "fallback"
+    with pytest.raises(LLMError, match="empty response"):
+        asyncio.run(client.chat([{"role": "user", "content": "c"}], timeout_sec=1.0))
+    error_settings = replace(settings, ollama_retries=1)
+    error_client = LLMClient(error_settings)
+    error_responses = iter([httpx.ConnectError("nope"), httpx.ConnectError("still nope")])
+
+    class ErrorAsyncClient(FakeAsyncClient):
+        async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
+            del _url, json, headers
+            raise next(error_responses)
+
+    monkeypatch.setattr(httpx, "AsyncClient", ErrorAsyncClient)
+    with pytest.raises(LLMError):
+        asyncio.run(error_client.chat([{"role": "user", "content": "d"}], timeout_sec=1.0))
+
+    provider = SnapshotProvider(replace(settings, ariadne_state_url="http://snapshot", ariadne_state_token="tok"))
+
+    class SnapshotResp:
+        def raise_for_status(self) -> None:
+            return None
+
+        def json(self) -> dict[str, Any]:
+            return {"snapshot_id": "snap-1"}
+
+    monkeypatch.setattr("atlasbot.snapshot.builder.httpx.get", lambda url, headers, timeout: SnapshotResp())
+    assert provider.get() == {"snapshot_id": "snap-1"}
+    provider._cache = {"snapshot_id": "cached"}
+    provider._cache_ts = 10_000.0
+    monkeypatch.setattr("atlasbot.snapshot.builder.time.monotonic", lambda: 10_001.0)
+    assert provider.get() == {"snapshot_id": "cached"}
--- a/tests/test_support_modules.py
+++ b/tests/test_support_modules.py
Author	SHA1	Message	Date
jenkins	cf1e311e14	test(atlasbot): rename support coverage tests	2026-04-22 05:02:21 -03:00
jenkins	48c639e651	security(atlasbot): run images as non-root	2026-04-22 00:00:09 -03:00
jenkins	7908019fc7	ci(atlasbot): use preloaded quality scanner image	2026-04-21 22:50:19 -03:00
jenkins	f95f938cf1	ci(atlasbot): pass sonar token as login	2026-04-21 22:17:55 -03:00
jenkins	c2dbeadc96	ci(atlasbot): run sonar and supply-chain scans	2026-04-21 22:09:06 -03:00
jenkins	4d9c205ff2	ci(atlasbot): bind sonarqube token credential	2026-04-21 20:16:25 -03:00
jenkins	7734b93180	ci(atlasbot): retry buildx bootstrap	2026-04-21 19:26:55 -03:00
jenkins	0b10dcd897	ci(atlasbot): fail coverage when source files are missing	2026-04-21 19:22:07 -03:00
jenkins	51b9fd20e9	ci(atlasbot): use mirrored buildkit builder	2026-04-21 17:35:54 -03:00
jenkins	017cce884e	ci(atlasbot): start docker daemon without tls delay	2026-04-21 17:11:13 -03:00
jenkins	111038a571	ci(atlasbot): keep dev deps out of runtime image	2026-04-21 15:14:10 -03:00
jenkins	a1db462420	ci(atlasbot): use harbor python base image	2026-04-21 14:48:33 -03:00
jenkins	3944e7f0d8	ci(atlasbot): use harbor docker runners	2026-04-21 13:34:45 -03:00
jenkins	8fa49bb34b	ci(atlasbot): use harbor python runner	2026-04-21 13:13:26 -03:00
jenkins	6f955ad5a3	ci(atlasbot): run quality gate without docker daemon	2026-04-21 13:00:46 -03:00
jenkins	cd8d5397cb	ci(atlasbot): publish metrics after buildx bootstrap failures	2026-04-21 12:30:06 -03:00
jenkins	130fdc7eea	ci(atlasbot): label test metrics with build artifacts	2026-04-21 11:39:13 -03:00
jenkins	ed2ebf266d	ci(atlasbot): include primary branch in quality metrics	2026-04-21 11:08:21 -03:00
jenkins	3ba75ee234	ci(atlasbot): publish canonical build info	2026-04-21 09:34:51 -03:00
jenkins	603d2dcec5	ci(atlasbot): archive full quality evidence	2026-04-21 09:21:56 -03:00
jenkins	ecd768818a	ci(atlasbot): normalize style gate exceptions	2026-04-21 06:26:25 -03:00
jenkins	dd077b0f92	quality(atlasbot): merge strict gate coverage	2026-04-21 00:56:41 -03:00
jenkins	b7543d7e57	quality(atlasbot): enforce strict gate split	2026-04-21 00:53:47 -03:00
jenkins	f07373247a	ci: enforce 30d build and artifact retention	2026-04-20 12:27:53 -03:00
jenkins	d750f21e80	ci(atlasbot): discover junit/coverage artifacts robustly	2026-04-20 11:01:15 -03:00
jenkins	b9970d3847	ci(atlasbot): emit test data even when style checks fail	2026-04-20 10:52:53 -03:00
jenkins	9e15badcb1	ci(atlasbot): fix test-case metric label escaping	2026-04-20 10:52:14 -03:00
jenkins	6ecf531bac	ci(atlasbot): emit test data even when style checks fail	2026-04-20 10:49:38 -03:00
jenkins	4fa280ca8d	ci(atlasbot): fix test-case metric label escaping	2026-04-20 10:46:07 -03:00
jenkins	8f3d3c2550	ci(atlasbot): emit placeholder test-case metric when junit is empty	2026-04-20 09:11:35 -03:00
jenkins	4f8c77e7b5	ci(atlasbot): emit per-test case result metrics for flaky tracking	2026-04-20 08:20:01 -03:00
jenkins	abcb38a3f7	ci(atlasbot): enforce docs/style before loc and publish docs_naming	2026-04-20 08:14:01 -03:00
jenkins	2f42c176ba	ci(atlasbot): restore full gate tooling and coverage debt map	2026-04-20 01:12:10 -03:00
codex	882d3a5986	ci(gate): default sonar and supply checks to observe mode	2026-04-19 21:29:30 -03:00
codex	6a5f9fb1ed	ci(gate): enforce sonarqube and supply-chain checks	2026-04-19 21:16:06 -03:00
Brad Stein	5ae40c2116	ci(atlasbot): replace stale metric pushes and clean unused noqa flags	2026-04-19 16:15:02 -03:00
Brad Stein	12b3e14512	ci(metrics): publish quality payload with Pushgateway PUT	2026-04-19 16:08:03 -03:00
Brad Stein	7f0d27288f	ci(atlasbot): tighten lint scope to high-signal checks	2026-04-19 15:08:21 -03:00
Brad Stein	6218c4593d	ci(atlasbot): tighten lint scope to high-signal checks	2026-04-19 15:08:09 -03:00
Brad Stein	5d7679f183	ci: add sonar/supply evidence collection and checks metrics	2026-04-19 14:11:59 -03:00
Brad Stein	e4014aba1c	ci: add sonar/supply evidence collection and checks metrics	2026-04-19 14:11:43 -03:00
Brad Stein	1713dd07c7	ci(metrics): keep full payload with checks and platform stats	2026-04-18 16:32:20 -03:00
				`@ -0,0 +1,2 @@`
				`"""Shared testing helpers for atlasbot."""`