quality(atlasbot): enforce strict gate split

2026-04-21 00:53:47 -03:00 · 2026-04-21 00:53:47 -03:00 · b7543d7e57
commit b7543d7e57
parent 6ecf531bac
44 changed files with 9781 additions and 5716 deletions
--- a/2
+++ b/2
@ -6,11 +6,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 WORKDIR /app
 COPY requirements.txt /app/requirements.txt
 COPY requirements-dev.txt /app/requirements-dev.txt
 COPY pyproject.toml /app/pyproject.toml
 RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
 COPY atlasbot /app/atlasbot
 FROM base AS test
 COPY testing /app/testing
 COPY tests /app/tests
 COPY scripts /app/scripts
--- a/4
+++ b/4
@ -75,6 +75,10 @@ spec:
    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
  }
  options {
    disableConcurrentBuilds()
    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
      steps {
--- a/atlasbot/api/http.py
+++ b/atlasbot/api/http.py
@ -1,7 +1,6 @@
 import logging
 from typing import Any
 from collections.abc import Awaitable, Callable
 from typing import Any
 from fastapi import FastAPI, Header, HTTPException
 from pydantic import BaseModel
@ -29,6 +28,16 @@ class AnswerResponse(BaseModel):
 class Api:
    """Expose the answer API and enforce the shared internal token.
    Input:
    - `settings`: runtime configuration, including the optional internal token;
    - `answer_handler`: async adapter that answers a normalized question.
    Output:
    - registers the HTTP routes on `self.app`.
    """
    def __init__(
        self,
        settings: Settings,
--- a/atlasbot/config.py
+++ b/atlasbot/config.py
@ -1,6 +1,7 @@
 import os
 from dataclasses import dataclass
 def _env_bool(name: str, default: str = "false") -> bool:
    value = os.getenv(name, default).strip().lower()
    return value in {"1", "true", "yes", "y", "on"}
@ -121,6 +122,12 @@ def _load_matrix_bots(bot_mentions: tuple[str, ...]) -> tuple[MatrixBotConfig, .
 def load_settings() -> Settings:
    """Load process settings from environment variables.
    Output:
    - a fully populated `Settings` instance with defaults for missing values.
    """
    bot_mentions = tuple(
        [
            item.strip()
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
--- a/atlasbot/engine/answerer/init.py
+++ b/atlasbot/engine/answerer/init.py
@ -0,0 +1,12 @@
 """Answer engine package."""
 from ._base import *
 from .common import *
 from .engine import *
 from .factsheet import *
 from .post import *
 from .post_ext import *
 from .retrieval import *
 from .retrieval_ext import *
 from .spine import *
 from .workflow import *
--- a/atlasbot/engine/answerer/_base.py
+++ b/atlasbot/engine/answerer/_base.py
@ -0,0 +1,116 @@
 from __future__ import annotations
 import logging
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any
 log = logging.getLogger(__name__)
 FOLLOWUP_SHORT_WORDS = 6
 TOKEN_MIN_LEN = 3
 GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"}
 NS_ENTRY_MIN_LEN = 2
 DEDUP_MIN_SENTENCES = 3
 RUNBOOK_SIMILARITY_THRESHOLD = 0.4
 BYTES_KB = 1024
 BYTES_MB = 1024 * 1024
 class LLMLimitReached(RuntimeError):
    pass
 class LLMTimeBudgetExceeded(RuntimeError):
    pass
@dataclass
 class AnswerScores:
    confidence: int
    relevance: int
    satisfaction: int
    hallucination_risk: str
@dataclass
 class AnswerResult:
    reply: str
    scores: AnswerScores
    meta: dict[str, Any]
@dataclass(frozen=True)
 class InsightGuardInput:
    question: str
    reply: str
    classify: dict[str, Any]
    context: str
    plan: ModePlan
    call_llm: Callable[..., Awaitable[str]]
    facts: list[str]
@dataclass
 class ContradictionContext:
    call_llm: Callable[..., Awaitable[str]]
    question: str
    reply: str
    facts: list[str]
    plan: ModePlan
@dataclass
 class EvidenceItem:
    path: str
    reason: str
    value: Any | None = None
    value_at_claim: Any | None = None
@dataclass
 class ClaimItem:
    id: str
    claim: str
    evidence: list[EvidenceItem]
@dataclass
 class ConversationState:
    updated_at: float
    claims: list[ClaimItem]
    snapshot_id: str | None = None
    snapshot: dict[str, Any] | None = None
@dataclass
 class ModePlan:
    model: str
    fast_model: str
    max_subquestions: int
    chunk_lines: int
    chunk_top: int
    chunk_group: int
    kb_max_chars: int
    kb_max_files: int
    use_raw_snapshot: bool
    parallelism: int
    score_retries: int
    use_deep_retrieval: bool
    use_tool: bool
    use_critic: bool
    use_gap: bool
    use_scores: bool
    drafts: int
    metric_retries: int
    subanswer_retries: int
@dataclass
 class ScoreContext:
    question: str
    sub_questions: list[str]
    retries: int
    parallelism: int
    select_best: bool
    fast_model: str
--- a/atlasbot/engine/answerer/common.py
+++ b/atlasbot/engine/answerer/common.py
@ -0,0 +1,395 @@
 from __future__ import annotations
 import json
 import time
 from collections.abc import Awaitable, Callable
 from typing import Any
 from atlasbot.config import Settings
 from atlasbot.llm import prompts
 from atlasbot.llm.client import parse_json
 from ._base import *
 from .factsheet import *
 from .post import *
 from .post_ext import *
 from .retrieval import _gather_limited
 from .retrieval_ext import *
 from .spine import *
 def _strip_followup_meta(reply: str) -> str:
    cleaned = reply.strip()
    if not cleaned:
        return cleaned
    prefixes = [
        "The draft is correct based on the provided context.",
        "The draft is correct based on the context.",
        "The draft is correct based on the provided evidence.",
        "The draft is correct.",
        "Based on the provided context,",
        "Based on the context,",
        "Based on the provided evidence,",
    ]
    for prefix in prefixes:
        if cleaned.lower().startswith(prefix.lower()):
            cleaned = cleaned[len(prefix) :].lstrip(" .")
            break
    return cleaned
 def _build_meta(mode: str, call_count: int, call_cap: int, limit_hit: bool, time_budget_hit: bool, time_budget_sec: float, classify: dict[str, Any], tool_hint: dict[str, Any] | None, started: float) -> dict[str, Any]:
    return {
        "mode": mode,
        "llm_calls": call_count,
        "llm_limit": call_cap,
        "llm_limit_hit": limit_hit,
        "time_budget_sec": time_budget_sec,
        "time_budget_hit": time_budget_hit,
        "classify": classify,
        "tool_hint": tool_hint,
        "elapsed_sec": round(time.monotonic() - started, 2),
    }
 def _debug_pipeline_log(settings: Settings, name: str, payload: Any) -> None:
    """Write a structured debug event when pipeline tracing is enabled."""
    if not settings.debug_pipeline:
        return
    log.info("atlasbot_debug", extra={"extra": {"name": name, "payload": payload}})
 def _mode_plan(settings: Settings, mode: str) -> ModePlan:
    if mode == "genius":
        return ModePlan(
            model=settings.ollama_model_genius,
            fast_model=settings.ollama_model_fast,
            max_subquestions=6,
            chunk_lines=6,
            chunk_top=10,
            chunk_group=4,
            kb_max_chars=200000,
            kb_max_files=200,
            use_raw_snapshot=True,
            parallelism=4,
            score_retries=3,
            use_deep_retrieval=True,
            use_tool=True,
            use_critic=True,
            use_gap=True,
            use_scores=True,
            drafts=2,
            metric_retries=3,
            subanswer_retries=3,
        )
    if mode == "smart":
        return ModePlan(
            model=settings.ollama_model_smart,
            fast_model=settings.ollama_model_fast,
            max_subquestions=4,
            chunk_lines=8,
            chunk_top=8,
            chunk_group=4,
            kb_max_chars=3000,
            kb_max_files=12,
            use_raw_snapshot=False,
            parallelism=2,
            score_retries=2,
            use_deep_retrieval=True,
            use_tool=True,
            use_critic=True,
            use_gap=True,
            use_scores=True,
            drafts=1,
            metric_retries=2,
            subanswer_retries=2,
        )
    return ModePlan(
        model=settings.ollama_model_fast,
        fast_model=settings.ollama_model_fast,
        max_subquestions=1,
        chunk_lines=16,
        chunk_top=3,
        chunk_group=5,
        kb_max_chars=800,
        kb_max_files=4,
        use_raw_snapshot=False,
        parallelism=1,
        score_retries=1,
        use_deep_retrieval=False,
        use_tool=False,
        use_critic=False,
        use_gap=False,
        use_scores=False,
        drafts=1,
        metric_retries=1,
        subanswer_retries=1,
    )
 def _llm_call_limit(settings: Settings, mode: str) -> int:
    if mode == "genius":
        return settings.genius_llm_calls_max
    if mode == "smart":
        return settings.smart_llm_calls_max
    return settings.fast_llm_calls_max
 def _mode_time_budget(settings: Settings, mode: str) -> float:
    if mode == "genius":
        return max(0.0, settings.genius_time_budget_sec)
    if mode == "smart":
        return max(0.0, settings.smart_time_budget_sec)
    return max(0.0, settings.quick_time_budget_sec)
 def _select_subquestions(parts: list[dict[str, Any]], fallback: str, limit: int) -> list[str]:
    if not parts:
        return [fallback]
    ranked = []
    for entry in parts:
        if not isinstance(entry, dict):
            continue
        question = str(entry.get("question") or "").strip()
        if not question:
            continue
        priority = entry.get("priority")
        try:
            weight = float(priority)
        except (TypeError, ValueError):
            weight = 1.0
        ranked.append((weight, question))
    ranked.sort(key=lambda item: item[0], reverse=True)
    questions = [item[1] for item in ranked][:limit]
    return questions or [fallback]
 def _chunk_lines(lines: list[str], lines_per_chunk: int) -> list[dict[str, Any]]:
    chunks: list[dict[str, Any]] = []
    if not lines:
        return chunks
    for idx in range(0, len(lines), lines_per_chunk):
        chunk_lines = lines[idx : idx + lines_per_chunk]
        text = "\n".join(chunk_lines)
        summary = " | ".join(chunk_lines[:4])
        chunks.append({"id": f"c{idx//lines_per_chunk}", "text": text, "summary": summary})
    return chunks
 def _raw_snapshot_chunks(snapshot: dict[str, Any] | None) -> list[dict[str, Any]]:
    if not isinstance(snapshot, dict) or not snapshot:
        return []
    chunks: list[dict[str, Any]] = []
    for key, value in snapshot.items():
        try:
            payload = json.dumps({key: value}, indent=2)
        except Exception:
            continue
        summary = f"raw:{key}"
        chunks.append({"id": f"r{key}", "text": payload, "summary": summary})
    return chunks
 def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
    groups: list[list[dict[str, Any]]] = []
    group: list[dict[str, Any]] = []
    for chunk in chunks:
        group.append({"id": chunk["id"], "summary": chunk["summary"]})
        if len(group) >= group_size:
            groups.append(group)
            group = []
    if group:
        groups.append(group)
    return groups
 async def _score_chunks(call_llm: Callable[..., Any], chunks: list[dict[str, Any]], question: str, sub_questions: list[str], plan: ModePlan) -> dict[str, float]:
    scores: dict[str, float] = {chunk["id"]: 0.0 for chunk in chunks}
    if not chunks:
        return scores
    groups = _build_chunk_groups(chunks, plan.chunk_group)
    ctx = ScoreContext(
        question=question,
        sub_questions=sub_questions,
        retries=max(1, plan.score_retries),
        parallelism=plan.parallelism,
        select_best=plan.score_retries > 1,
        fast_model=plan.fast_model,
    )
    if ctx.parallelism <= 1 or len(groups) * ctx.retries <= 1:
        return await _score_groups_serial(call_llm, groups, ctx)
    return await _score_groups_parallel(call_llm, groups, ctx)
 async def _score_groups_serial(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
    scores: dict[str, float] = {}
    for grp in groups:
        runs = [await _score_chunk_group(call_llm, grp, ctx.question, ctx.sub_questions) for _ in range(ctx.retries)]
        if ctx.select_best and len(runs) > 1:
            best = await _select_best_score_run(call_llm, grp, runs, ctx)
            scores.update(best)
        else:
            scores.update(_merge_score_runs(runs))
    return scores
 async def _score_groups_parallel(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
    coros: list[Awaitable[tuple[int, dict[str, float]]]] = []
    for idx, grp in enumerate(groups):
        for _ in range(ctx.retries):
            coros.append(_score_chunk_group_run(call_llm, idx, grp, ctx.question, ctx.sub_questions))
    results = await _gather_limited(coros, ctx.parallelism)
    grouped: dict[int, list[dict[str, float]]] = {}
    for idx, result in results:
        grouped.setdefault(idx, []).append(result)
    scores: dict[str, float] = {}
    for idx, runs in grouped.items():
        if ctx.select_best and len(runs) > 1:
            group = groups[idx]
            best = await _select_best_score_run(call_llm, group, runs, ctx)
            scores.update(best)
        else:
            scores.update(_merge_score_runs(runs))
    return scores
 async def _score_chunk_group(call_llm: Callable[..., Any], group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> dict[str, float]:
    prompt = (
        prompts.CHUNK_SCORE_PROMPT
        + "\nQuestion: "
        + question
        + "\nSubQuestions: "
        + json.dumps(sub_questions)
        + "\nChunks: "
        + json.dumps(group)
    )
    raw = await call_llm(prompts.RETRIEVER_SYSTEM, prompt, model=None, tag="chunk_score")
    data = _parse_json_list(raw)
    scored: dict[str, float] = {}
    for entry in data:
        if not isinstance(entry, dict):
            continue
        cid = str(entry.get("id") or "").strip()
        if not cid:
            continue
        try:
            score = float(entry.get("score") or 0)
        except (TypeError, ValueError):
            score = 0.0
        scored[cid] = score
    return scored
 async def _score_chunk_group_run(call_llm: Callable[..., Any], idx: int, group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> tuple[int, dict[str, float]]:
    return idx, await _score_chunk_group(call_llm, group, question, sub_questions)
 def _merge_score_runs(runs: list[dict[str, float]]) -> dict[str, float]:
    if not runs:
        return {}
    totals: dict[str, float] = {}
    counts: dict[str, int] = {}
    for run in runs:
        for key, value in run.items():
            totals[key] = totals.get(key, 0.0) + float(value)
            counts[key] = counts.get(key, 0) + 1
    return {key: totals[key] / counts[key] for key in totals}
 async def _select_best_score_run(call_llm: Callable[..., Any], group: list[dict[str, Any]], runs: list[dict[str, float]], ctx: ScoreContext) -> dict[str, float]:
    if not runs:
        return {}
    prompt = (
        prompts.RETRIEVER_SELECT_PROMPT
        + "\nQuestion: "
        + ctx.question
        + "\nSubQuestions: "
        + json.dumps(ctx.sub_questions)
        + "\nChunks: "
        + json.dumps(group)
        + "\nRuns: "
        + json.dumps(runs)
    )
    raw = await call_llm(prompts.RETRIEVER_SELECT_SYSTEM, prompt, model=ctx.fast_model, tag="chunk_select")
    data = parse_json(raw)
    idx = 0
    if isinstance(data, dict):
        try:
            idx = int(data.get("selected_index") or 0)
        except (TypeError, ValueError):
            idx = 0
    if idx < 0 or idx >= len(runs):
        idx = 0
    return runs[idx]
 def _keyword_hits(ranked: list[dict[str, Any]], head: dict[str, Any], keywords: list[str] | None) -> list[dict[str, Any]]:
    if not keywords:
        return []
    lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
    if not lowered:
        return []
    hits: list[dict[str, Any]] = []
    for item in ranked:
        if item is head:
            continue
        text = str(item.get("text") or "").lower()
        if any(kw in text for kw in lowered):
            hits.append(item)
    return hits
 def _select_chunks(chunks: list[dict[str, Any]], scores: dict[str, float], plan: ModePlan, keywords: list[str] | None = None, must_ids: list[str] | None = None) -> list[dict[str, Any]]:
    if not chunks:
        return []
    ranked = sorted(chunks, key=lambda item: scores.get(item["id"], 0.0), reverse=True)
    selected: list[dict[str, Any]] = [chunks[0]]
    if _append_must_chunks(chunks, selected, must_ids, plan.chunk_top):
        return selected
    if _append_keyword_chunks(ranked, selected, keywords, plan.chunk_top):
        return selected
    _append_ranked_chunks(ranked, selected, plan.chunk_top)
    return selected
 def _append_must_chunks(chunks: list[dict[str, Any]], selected: list[dict[str, Any]], must_ids: list[str] | None, limit: int) -> bool:
    if not must_ids:
        return False
    id_map = {item["id"]: item for item in chunks}
    for cid in must_ids:
        item = id_map.get(cid)
        if item and item not in selected:
            selected.append(item)
            if len(selected) >= limit:
                return True
    return False
 def _append_keyword_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], keywords: list[str] | None, limit: int) -> bool:
    if not ranked:
        return False
    head = ranked[0]
    for item in _keyword_hits(ranked, head, keywords):
        if item not in selected:
            selected.append(item)
            if len(selected) >= limit:
                return True
    return False
 def _append_ranked_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], limit: int) -> None:
    for item in ranked:
        if len(selected) >= limit:
            break
        if item not in selected:
            selected.append(item)
 def _format_runbooks(runbooks: list[str]) -> str:
    if not runbooks:
        return ""
    return "Relevant runbooks:\n" + "\n".join([f"- {item}" for item in runbooks])
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/engine.py
+++ b/atlasbot/engine/answerer/engine.py
@ -0,0 +1,267 @@
 from __future__ import annotations
 from collections.abc import Callable
 import json
 import time
 from typing import Any
 from atlasbot.config import Settings
 from atlasbot.knowledge.loader import KnowledgeBase
 from atlasbot.llm import prompts
 from atlasbot.llm.client import LLMClient, build_messages
 from atlasbot.snapshot.builder import SnapshotProvider
 from atlasbot.state.store import ClaimStore
 from ._base import *
 from .common import *
 from .factsheet import *
 from .post import *
 from .post_ext import *
 from .retrieval import *
 from .retrieval_ext import *
 from .spine import *
 from .workflow import run_answer
 class AnswerEngine:
    """Coordinate Atlas question answering across snapshots, KB, and LLMs.
    Why:
    - keep the public answer surface in one place while the retrieval and
      post-processing helpers stay split across smaller modules.
    """
    def __init__(self, settings: Settings, llm: LLMClient, kb: KnowledgeBase, snapshot: SnapshotProvider) -> None:
        self._settings = settings
        self._llm = llm
        self._kb = kb
        self._snapshot = snapshot
        self._store = ClaimStore(settings.state_db_path, settings.conversation_ttl_sec)
    async def answer(self, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult:
        """Answer a question by delegating to the staged workflow."""
        return await run_answer(
            self,
            question,
            mode=mode,
            history=history,
            observer=observer,
            conversation_id=conversation_id,
            snapshot_pin=snapshot_pin,
        )
    async def _answer_stock(self, question: str) -> AnswerResult:
        messages = build_messages(prompts.STOCK_SYSTEM, question)
        reply = await self._llm.chat(messages, model=self._settings.ollama_model)
        return AnswerResult(reply, _default_scores(), {"mode": "stock"})
    async def _synthesize_answer(self, question: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str:
        style_hint = _style_hint(classify)
        if not subanswers:
            prompt = (
                prompts.SYNTHESIZE_PROMPT
                + "\nQuestion: "
                + question
                + "\nStyle: "
                + style_hint
                + "\nQuestionType: "
                + (classify.get("question_type") or "unknown")
            )
            return await call_llm(prompts.SYNTHESIZE_SYSTEM, prompt, context=context, model=plan.model, tag="synth")
        draft_prompts = []
        for idx in range(plan.drafts):
            draft_prompts.append(
                prompts.SYNTHESIZE_PROMPT
                + "\nQuestion: "
                + question
                + "\nStyle: "
                + style_hint
                + "\nQuestionType: "
                + (classify.get("question_type") or "unknown")
                + "\nSubanswers:\n"
                + "\n".join([f"- {item}" for item in subanswers])
                + f"\nDraftIndex: {idx + 1}"
            )
        drafts: list[str] = []
        if plan.parallelism > 1 and len(draft_prompts) > 1:
            drafts = await _gather_limited(
                [
                    call_llm(
                        prompts.SYNTHESIZE_SYSTEM,
                        prompt,
                        context=context,
                        model=plan.model,
                        tag="synth",
                    )
                    for prompt in draft_prompts
                ],
                plan.parallelism,
            )
        else:
            for prompt in draft_prompts:
                drafts.append(
                    await call_llm(
                        prompts.SYNTHESIZE_SYSTEM,
                        prompt,
                        context=context,
                        model=plan.model,
                        tag="synth",
                    )
                )
        if len(drafts) == 1:
            return drafts[0]
        select_prompt = (
            prompts.DRAFT_SELECT_PROMPT
            + "\nQuestion: "
            + question
            + "\nDrafts:\n"
            + "\n\n".join([f"Draft {idx + 1}: {text}" for idx, text in enumerate(drafts)])
        )
        select_raw = await call_llm(prompts.CRITIC_SYSTEM, select_prompt, context=context, model=plan.fast_model, tag="draft_select")
        selection = _parse_json_block(select_raw, fallback={})
        idx = int(selection.get("best", 1)) - 1
        if 0 <= idx < len(drafts):
            return drafts[idx]
        return drafts[0]
    async def _score_answer(self, question: str, reply: str, plan: ModePlan, call_llm: Callable[..., Any]) -> AnswerScores:
        if not plan.use_scores:
            return _default_scores()
        prompt = prompts.SCORE_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply
        raw = await call_llm(prompts.SCORE_SYSTEM, prompt, model=plan.fast_model, tag="score")
        data = _parse_json_block(raw, fallback={})
        return _scores_from_json(data)
    async def _extract_claims(self, question: str, reply: str, summary: dict[str, Any], facts_used: list[str], call_llm: Callable[..., Any]) -> list[ClaimItem]:
        if not reply or not summary:
            return []
        summary_json = _json_excerpt(summary)
        facts_used = [line.strip() for line in (facts_used or []) if line and line.strip()]
        facts_block = ""
        if facts_used:
            facts_block = "\nFactsUsed:\n" + "\n".join([f"- {line}" for line in facts_used[:12]])
        prompt = prompts.CLAIM_MAP_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply + facts_block
        raw = await call_llm(
            prompts.CLAIM_SYSTEM,
            prompt,
            context=f"SnapshotSummaryJson:{summary_json}",
            model=self._settings.ollama_model_fast,
            tag="claim_map",
        )
        data = _parse_json_block(raw, fallback={})
        claims_raw = data.get("claims") if isinstance(data, dict) else None
        claims: list[ClaimItem] = []
        if isinstance(claims_raw, list):
            for entry in claims_raw:
                if not isinstance(entry, dict):
                    continue
                claim_text = str(entry.get("claim") or "").strip()
                claim_id = str(entry.get("id") or "").strip() or f"c{len(claims)+1}"
                evidence_items: list[EvidenceItem] = []
                for ev in entry.get("evidence") or []:
                    if not isinstance(ev, dict):
                        continue
                    path = str(ev.get("path") or "").strip()
                    if not path:
                        continue
                    reason = str(ev.get("reason") or "").strip()
                    value = _resolve_path(summary, path)
                    evidence_items.append(EvidenceItem(path=path, reason=reason, value=value, value_at_claim=value))
                if claim_text and evidence_items:
                    claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
        return claims
    async def _dedup_reply(self, reply: str, plan: ModePlan, call_llm: Callable[..., Any], tag: str) -> str:
        if not _needs_dedup(reply):
            return reply
        dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
        return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
    async def _answer_followup(self, question: str, state: ConversationState, summary: dict[str, Any], classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str:  # noqa: C901, ARG002
        claim_ids = await self._select_claims(question, state.claims, plan, call_llm)
        selected = [claim for claim in state.claims if claim.id in claim_ids] if claim_ids else state.claims[:2]
        evidence_lines = []
        lowered = question.lower()
        for claim in selected:
            evidence_lines.append(f"Claim: {claim.claim}")
            for ev in claim.evidence:
                current = _resolve_path(summary, ev.path)
                ev.value = current
                delta_note = ""
                if ev.value_at_claim is not None and current is not None and current != ev.value_at_claim:
                    delta_note = f" (now {current})"
                evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
        if any(term in lowered for term in ("hotspot", "hot spot", "hottest", "jetson", "rpi", "amd64", "arm64", "hardware", "class")):
            hotspot_lines = _hotspot_evidence(summary)
            if hotspot_lines:
                evidence_lines.append("HotspotSummary:")
                evidence_lines.extend(hotspot_lines)
        evidence_ctx = "\n".join(evidence_lines)
        prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
        reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
        allowed_nodes = _allowed_nodes(summary)
        allowed_namespaces = _allowed_namespaces(summary)
        unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
        unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
        extra_bits = []
        if unknown_nodes:
            extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
        if unknown_namespaces:
            extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
        if allowed_nodes:
            extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
        if allowed_namespaces:
            extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
        if extra_bits:
            fix_prompt = (
                prompts.EVIDENCE_FIX_PROMPT
                + "\nQuestion: "
                + question
                + "\nDraft: "
                + reply
                + "\n"
                + "\n".join(extra_bits)
            )
            reply = await call_llm(
                prompts.EVIDENCE_FIX_SYSTEM,
                fix_prompt,
                context="Evidence:\n" + evidence_ctx,
                model=plan.model,
                tag="followup_fix",
            )
        reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
        reply = _strip_followup_meta(reply)
        return reply
    async def _select_claims(self, question: str, claims: list[ClaimItem], plan: ModePlan, call_llm: Callable[..., Any]) -> list[str]:
        if not claims:
            return []
        claims_brief = [{"id": claim.id, "claim": claim.claim} for claim in claims]
        prompt = prompts.SELECT_CLAIMS_PROMPT + "\nFollow-up: " + question + "\nClaims: " + json.dumps(claims_brief)
        raw = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.fast_model, tag="select_claims")
        data = _parse_json_block(raw, fallback={})
        ids = data.get("claim_ids") if isinstance(data, dict) else []
        if isinstance(ids, list):
            return [str(item) for item in ids if item]
        return []
    def _get_state(self, conversation_id: str | None) -> ConversationState | None:
        if not conversation_id:
            return None
        state_payload = self._store.get(conversation_id)
        return _state_from_payload(state_payload) if state_payload else None
    def _store_state(self, conversation_id: str, claims: list[ClaimItem], summary: dict[str, Any], snapshot: dict[str, Any] | None, pin_snapshot: bool) -> None:
        snapshot_id = _snapshot_id(summary)
        pinned_snapshot = snapshot if pin_snapshot else None
        payload = {
            "updated_at": time.monotonic(),
            "claims": _claims_to_payload(claims),
            "snapshot_id": snapshot_id,
            "snapshot": pinned_snapshot,
        }
        self._store.set(conversation_id, payload)
    def _cleanup_state(self) -> None:
        self._store.cleanup()
--- a/atlasbot/engine/answerer/factsheet.py
+++ b/atlasbot/engine/answerer/factsheet.py
@ -0,0 +1,189 @@
 from __future__ import annotations
 import json
 import re
 from typing import Any
 from ._base import *
 MAX_FACT_LINE_CHARS = 180
 MAX_KB_LINE_CHARS = 220
 def _factsheet_kb_chars(mode: str, default_chars: int) -> int:
    if mode == "genius":
        return min(max(default_chars, 4000), 6000)
    if mode == "smart":
        return min(max(default_chars, 3000), 4500)
    return max(1200, default_chars)
 def _factsheet_line_limit(mode: str) -> int:
    if mode == "genius":
        return 30
    if mode == "smart":
        return 22
    return 14
 def _factsheet_instruction(mode: str) -> str:
    if mode == "genius":
        return (
            "Start with a direct conclusion, then include the strongest supporting facts and one caveat. "
            "Keep it to 4-8 sentences. If data is missing, name the missing metric explicitly."
        )
    if mode == "smart":
        return (
            "Start with a direct conclusion and support it with key facts. Keep it to 2-5 sentences. "
            "If data is missing, say exactly what is missing and suggest atlas-genius."
        )
    return "Keep it to 1-3 sentences. If key data is missing, say what is missing and suggest atlas-smart."
 def _factsheet_model(mode: str, plan: ModePlan) -> str:
    if mode in {"quick", "fast"}:
        return plan.fast_model
    return plan.model
 def _is_plain_math_question(question: str) -> bool:
    lowered = question.lower().strip()
    if not lowered:
        return False
    cluster_markers = (
        "titan",
        "atlas",
        "cluster",
        "node",
        "pod",
        "namespace",
        "workload",
        "grafana",
        "alert",
        "k8s",
        "kubernetes",
        "rpi",
        "longhorn",
        "postgres",
        "victoria",
        "ollama",
    )
    if any(token in lowered for token in cluster_markers):
        return False
    return bool(
        re.fullmatch(r"[0-9\s+\-*/().=]+", lowered)
        or re.search(r"\bwhat(?:'s| is)\s+\d+\s*[-+*/]\s*\d+\b", lowered)
    )
 def _quick_fact_sheet_lines(question: str, summary_lines: list[str], kb_lines: list[str], *, limit: int) -> list[str]:  # noqa: C901
    tokens = {
        token
        for token in re.findall(r"[a-z0-9][a-z0-9_-]{2,}", question.lower())
        if token not in GENERIC_METRIC_TOKENS
    }
    priority_markers = (
        "snapshot:",
        "nodes_total",
        "nodes_ready",
        "nodes_not_ready",
        "workers_ready",
        "workers_not_ready",
        "control_plane",
        "worker_nodes",
        "hottest",
        "postgres",
        "pods",
        "longhorn",
        "titan-",
        "rpi5",
        "rpi4",
        "jetson",
        "amd64",
    )
    scored: list[tuple[int, str]] = []
    for raw in summary_lines:
        line = raw.strip()
        if not line:
            continue
        lowered = line.lower()
        score = 0
        if any(marker in lowered for marker in priority_markers):
            score += 4
        overlap = sum(1 for token in tokens if token in lowered)
        score += overlap * 3
        if len(line) <= MAX_FACT_LINE_CHARS:
            score += 1
        if score > 0:
            scored.append((score, line))
    scored.sort(key=lambda item: item[0], reverse=True)
    selected = [line for _, line in scored[:limit]]
    if not selected:
        selected = [line.strip() for line in summary_lines if line.strip()][:limit]
    kb_selected: list[str] = []
    for raw in kb_lines:
        line = raw.strip()
        if not line or len(line) > MAX_KB_LINE_CHARS:
            continue
        lowered = line.lower()
        if "kb file:" in lowered or "kb: atlas.json" in lowered:
            continue
        overlap = sum(1 for token in tokens if token in lowered)
        if overlap > 0 or any(marker in lowered for marker in ("runbook", "titan-", "rpi5", "rpi4", "amd64", "jetson")):
            kb_selected.append(line)
        if len(kb_selected) >= max(4, limit // 3):
            break
    merged = []
    seen: set[str] = set()
    for line in selected + kb_selected:
        if line not in seen:
            seen.add(line)
            merged.append(line)
        if len(merged) >= limit:
            break
    return merged
 def _quick_fact_sheet_text(lines: list[str]) -> str:
    if not lines:
        return "Fact Sheet:\n- No snapshot facts available."
    body = "\n".join([f"- {line}" for line in lines])
    return "Fact Sheet:\n" + body
 def _quick_fact_sheet_heuristic_answer(question: str, fact_lines: list[str]) -> str:
    lowered = question.lower()
    if (
        any(token in lowered for token in ("placement", "schedule", "last resort", "last-resort"))
        and any(token in lowered for token in ("node", "workload", "worker", "titan"))
    ):
        return (
            "General workload placement is: prefer rpi5 workers first, then rpi4 workers. "
            "titan-22 is the last-resort general compute node, and titan-24 is the absolute last resort "
            "reserved for heavy one-offs."
        )
    for line in fact_lines:
        compact = line.replace(" ", "")
        match = re.search(r"nodes_total[:=](\d+),ready[:=](\d+),not_ready[:=](\d+)", compact)
        if not match:
            continue
        total = match.group(1)
        ready = match.group(2)
        not_ready = match.group(3)
        if "how many" in lowered and "ready" in lowered and "node" in lowered:
            return f"The latest snapshot shows {ready} ready nodes out of {total} total ({not_ready} not ready)."
        if ("not ready" in lowered or "unready" in lowered) and "node" in lowered:
            return f"The latest snapshot shows {not_ready} not-ready nodes ({ready} ready out of {total} total)."
    return ""
 def _json_excerpt(summary: dict[str, Any], max_chars: int = 12000) -> str:
    raw = json.dumps(summary, ensure_ascii=False)
    return raw[:max_chars]
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/post.py
+++ b/atlasbot/engine/answerer/post.py
@ -0,0 +1,459 @@
 from __future__ import annotations
 import re
 from typing import Any
 from atlasbot.llm import prompts
 from atlasbot.llm.client import parse_json
 from ._base import *
 from .retrieval_ext import _dedupe_lines
 def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
    merged: list[str] = []
    for line in primary + fallback:
        value = (line or "").strip()
        if value and value not in merged:
            merged.append(value)
    return merged
 def _strip_unknown_entities(reply: str, unknown_nodes: list[str], unknown_namespaces: list[str]) -> str:
    if not reply:
        return reply
    if not unknown_nodes and not unknown_namespaces:
        return reply
    sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
    if not sentences:
        return reply
    lowered_nodes = [node.lower() for node in unknown_nodes]
    lowered_namespaces = [ns.lower() for ns in unknown_namespaces]
    kept: list[str] = []
    for sent in sentences:
        lower = sent.lower()
        if lowered_nodes and any(node in lower for node in lowered_nodes):
            continue
        if lowered_namespaces and any(f"namespace {ns}" in lower for ns in lowered_namespaces):
            continue
        kept.append(sent)
    cleaned = " ".join(kept).strip()
    return cleaned or reply
 def _needs_evidence_guard(reply: str, facts: list[str]) -> bool:
    if not reply or not facts:
        return False
    lower_reply = reply.lower()
    fact_text = " ".join(facts).lower()
    node_pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
    nodes = {m.group(1).lower() for m in node_pattern.finditer(reply)}
    if nodes:
        missing = [node for node in nodes if node not in fact_text]
        if missing:
            return True
    pressure_terms = ("pressure", "diskpressure", "memorypressure", "pidpressure", "headroom")
    if any(term in lower_reply for term in pressure_terms) and not any(term in fact_text for term in pressure_terms):
        return True
    arch_terms = ("amd64", "arm64", "rpi", "rpi4", "rpi5", "jetson")
    return any(term in lower_reply for term in arch_terms) and not any(term in fact_text for term in arch_terms)
 async def _contradiction_decision(ctx: ContradictionContext, attempts: int = 1) -> dict[str, Any]:
    best = {"use_facts": True, "confidence": 50}
    facts_block = "\n".join(ctx.facts[:12])
    for idx in range(max(1, attempts)):
        variant = f"Variant: {idx + 1}" if attempts > 1 else ""
        prompt = (
            prompts.CONTRADICTION_PROMPT.format(question=ctx.question, draft=ctx.reply, facts=facts_block)
            + ("\n" + variant if variant else "")
        )
        raw = await ctx.call_llm(
            prompts.CONTRADICTION_SYSTEM,
            prompt,
            model=ctx.plan.fast_model,
            tag="contradiction",
        )
        data = _parse_json_block(raw, fallback={})
        try:
            confidence = int(data.get("confidence", 50))
        except Exception:
            confidence = 50
        use_facts = bool(data.get("use_facts", True))
        if confidence >= best.get("confidence", 0):
            best = {"use_facts": use_facts, "confidence": confidence}
    return best
 def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
    if not lines:
        return []
    tokens = _expand_tokens(keywords)
    if not tokens:
        return lines[:max_lines]
    filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
    return (filtered or lines)[:max_lines]
 def _rank_metric_lines(lines: list[str], tokens: set[str], max_lines: int) -> list[str]:
    if not lines or not tokens:
        return []
    ranked: list[tuple[int, int, str]] = []
    for line in lines:
        lower = line.lower()
        hits = sum(1 for tok in tokens if tok in lower)
        if not hits:
            continue
        has_number = 1 if re.search(r"\d", line) else 0
        ranked.append((has_number, hits, line))
    ranked.sort(key=lambda item: (-item[0], -item[1], item[2]))
    return [item[2] for item in ranked[:max_lines]]
 def _select_metric_line(lines: list[str], question: str, tokens: list[str] | set[str]) -> str | None:
    if not lines or not tokens:
        return None
    token_set = {str(tok).lower() for tok in tokens if tok}
    ranked = _rank_metric_lines(lines, token_set, max_lines=6)
    if not ranked:
        return None
    question_lower = (question or "").lower()
    if any(term in question_lower for term in ("how many", "count", "total")):
        for line in ranked:
            lower = line.lower()
            if "total" in lower or "count" in lower:
                return line
    return ranked[0]
 def _format_direct_metric_line(line: str) -> str:
    if not line:
        return ""
    if ":" in line:
        formatted = _format_colon_metric(line)
        if formatted:
            return formatted
    if "=" in line:
        formatted = _format_equals_metric(line)
        if formatted:
            return formatted
    return line
 def _format_colon_metric(line: str) -> str | None:
    key, value = line.split(":", 1)
    key = key.strip().replace("_", " ")
    value = value.strip()
    if not value:
        return None
    if key == "nodes":
        formatted = _format_nodes_value(value)
        if formatted:
            return formatted
    if key in {"nodes total", "nodes_total"}:
        return f"Atlas has {value} total nodes."
    return f"{key} is {value}."
 def _format_equals_metric(line: str) -> str | None:
    pairs: list[str] = []
    for part in line.split(","):
        if "=" not in part:
            continue
        key, value = part.split("=", 1)
        key = key.strip().replace("_", " ")
        value = value.strip()
        if not value:
            continue
        if key in {"nodes total", "nodes_total"}:
            return f"Atlas has {value} total nodes."
        pairs.append(f"{key} is {value}")
    if not pairs:
        return None
    if len(pairs) == 1:
        return f"{pairs[0]}."
    return "; ".join(pairs) + "."
 def _format_nodes_value(value: str) -> str | None:
    parts = [p.strip() for p in value.split(",") if p.strip()]
    total = None
    rest: list[str] = []
    for part in parts:
        if part.startswith("total="):
            total = part.split("=", 1)[1]
        else:
            rest.append(part.replace("_", " "))
    if not total:
        return None
    if rest:
        return f"Atlas has {total} total nodes ({'; '.join(rest)})."
    return f"Atlas has {total} total nodes."
 def _global_facts(lines: list[str]) -> list[str]:
    if not lines:
        return []
    wanted = ("nodes_total", "nodes_ready", "cluster_name", "cluster", "nodes_not_ready")
    facts: list[str] = []
    for line in lines:
        lower = line.lower()
        if any(key in lower for key in wanted):
            facts.append(line)
    return _dedupe_lines(facts, limit=6)
 def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
    if not lines or not keywords:
        return False
    tokens = _expand_tokens(keywords)
    if not tokens:
        return False
    for line in lines:
        lower = line.lower()
        if any(tok in lower for tok in tokens):
            return True
    return False
 def _merge_tokens(primary: list[str], secondary: list[str], third: list[str] | None = None) -> list[str]:
    merged: list[str] = []
    for token in primary + secondary + (third or []):
        if not token:
            continue
        if token not in merged:
            merged.append(token)
    return merged
 def _extract_question_tokens(question: str) -> list[str]:
    if not question:
        return []
    tokens: list[str] = []
    for part in re.split(r"[^a-zA-Z0-9_-]+", question.lower()):
        if len(part) < TOKEN_MIN_LEN:
            continue
        if part not in tokens:
            tokens.append(part)
    return tokens
 def _expand_tokens(tokens: list[str]) -> list[str]:
    if not tokens:
        return []
    expanded: list[str] = []
    for token in tokens:
        if not isinstance(token, str):
            continue
        for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
            if len(part) < TOKEN_MIN_LEN:
                continue
            if part not in expanded:
                expanded.append(part)
    return expanded
 def _ensure_token_coverage(lines: list[str], tokens: list[str], summary_lines: list[str], max_add: int = 4) -> list[str]:
    if not lines or not tokens or not summary_lines:
        return lines
    hay = " ".join(lines).lower()
    missing = [tok for tok in tokens if tok and tok.lower() not in hay]
    if not missing:
        return lines
    added: list[str] = []
    for token in missing:
        token_lower = token.lower()
        for line in summary_lines:
            if token_lower in line.lower() and line not in lines and line not in added:
                added.append(line)
                break
        if len(added) >= max_add:
            break
    if not added:
        return lines
    return _merge_fact_lines(added, lines)
 def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
    if not lines or not keywords:
        return None
    tokens = _expand_tokens(keywords)
    if not tokens:
        return None
    best = None
    best_score = 0
    for line in lines:
        lower = line.lower()
        score = sum(1 for tok in tokens if tok in lower)
        if score > best_score:
            best_score = score
            best = line
    return best if best_score > 0 else None
 def _line_starting_with(lines: list[str], prefix: str) -> str | None:
    if not lines or not prefix:
        return None
    lower_prefix = prefix.lower()
    for line in lines:
        if str(line).lower().startswith(lower_prefix):
            return line
    return None
 def _non_rpi_nodes(summary: dict[str, Any]) -> dict[str, list[str]]:
    hardware = summary.get("hardware_by_node") if isinstance(summary, dict) else None
    if not isinstance(hardware, dict):
        return {}
    grouped: dict[str, list[str]] = {}
    for node, hw in hardware.items():
        if not isinstance(node, str) or not isinstance(hw, str):
            continue
        if hw.startswith("rpi"):
            continue
        grouped.setdefault(hw, []).append(node)
    for nodes in grouped.values():
        nodes.sort()
    return grouped
 def _format_hardware_groups(groups: dict[str, list[str]], label: str) -> str:
    if not groups:
        return ""
    parts = []
    for hw, nodes in sorted(groups.items()):
        parts.append(f"{hw} ({', '.join(nodes)})")
    return f"{label}: " + "; ".join(parts) + "."
 def _lexicon_context(summary: dict[str, Any]) -> str:  # noqa: C901
    if not isinstance(summary, dict):
        return ""
    lexicon = summary.get("lexicon")
    if not isinstance(lexicon, dict):
        return ""
    terms = lexicon.get("terms")
    aliases = lexicon.get("aliases")
    lines: list[str] = []
    if isinstance(terms, list):
        for entry in terms[:8]:
            if not isinstance(entry, dict):
                continue
            term = entry.get("term")
            meaning = entry.get("meaning")
            if term and meaning:
                lines.append(f"{term}: {meaning}")
    if isinstance(aliases, dict):
        for key, value in list(aliases.items())[:6]:
            if key and value:
                lines.append(f"alias {key} -> {value}")
    if not lines:
        return ""
    return "Lexicon:\n" + "\n".join(lines)
 def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
    raw = text.strip()
    match = re.search(r"\{.*\}", raw, flags=re.S)
    if match:
        return parse_json(match.group(0), fallback=fallback)
    return parse_json(raw, fallback=fallback)
 def _parse_json_list(text: str) -> list[dict[str, Any]]:
    raw = text.strip()
    match = re.search(r"\[.*\]", raw, flags=re.S)
    data = parse_json(match.group(0), fallback={}) if match else parse_json(raw, fallback={})
    if isinstance(data, list):
        return [entry for entry in data if isinstance(entry, dict)]
    return []
 def _scores_from_json(data: dict[str, Any]) -> AnswerScores:
    return AnswerScores(
        confidence=_coerce_int(data.get("confidence"), 60),
        relevance=_coerce_int(data.get("relevance"), 60),
        satisfaction=_coerce_int(data.get("satisfaction"), 60),
        hallucination_risk=str(data.get("hallucination_risk") or "medium"),
    )
 def _coerce_int(value: Any, default: int) -> int:
    try:
        return int(float(value))
    except (TypeError, ValueError):
        return default
 def _default_scores() -> AnswerScores:
    return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
 def _style_hint(classify: dict[str, Any]) -> str:
    style = (classify.get("answer_style") or "").strip().lower()
    qtype = (classify.get("question_type") or "").strip().lower()
    if style == "insightful" or qtype in {"open_ended", "planning"}:
        return "insightful"
    return "direct"
 def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
    if not reply:
        return False
    lowered = reply.lower()
    missing_markers = (
        "don't have",
        "do not have",
        "don't know",
        "cannot",
        "can't",
        "need to",
        "would need",
        "does not provide",
        "does not mention",
        "not mention",
        "not provided",
        "not in context",
        "not referenced",
        "missing",
        "no specific",
        "no information",
    )
    if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
        return True
    return classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply)
 def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
    style = (classify.get("answer_style") or "").strip().lower()
    qtype = (classify.get("question_type") or "").strip().lower()
    return style == "insightful" or qtype in {"open_ended", "planning"}
 async def _apply_insight_guard(inputs: InsightGuardInput) -> str:
    if not inputs.reply or not _should_use_insight_guard(inputs.classify):
        return inputs.reply
    guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=inputs.question, answer=inputs.reply)
    guard_raw = await inputs.call_llm(
        prompts.INSIGHT_GUARD_SYSTEM,
        guard_prompt,
        context=inputs.context,
        model=inputs.plan.fast_model,
        tag="insight_guard",
    )
    guard = _parse_json_block(guard_raw, fallback={})
    if guard.get("ok") is True:
        return inputs.reply
    fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=inputs.question, answer=inputs.reply)
    if inputs.facts:
        fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(inputs.facts[:6])
    return await inputs.call_llm(
        prompts.INSIGHT_FIX_SYSTEM,
        fix_prompt,
        context=inputs.context,
        model=inputs.plan.model,
        tag="insight_fix",
    )
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/post_ext.py
+++ b/atlasbot/engine/answerer/post_ext.py
@ -0,0 +1,276 @@
 from __future__ import annotations
 import difflib
 import re
 import time
 from typing import Any
 from ._base import *
 def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
    if not reply or not metric_facts:
        return True
    reply_numbers = set(re.findall(r"\d+(?:\\.\d+)?", reply))
    if not reply_numbers:
        return False
    fact_numbers: set[str] = set()
    value_pattern = re.compile(r"(?:>=|<=|=|:)\s*(\d+(?:\.\d+)?)")
    filtered = metric_facts
    if tokens:
        token_set = {str(tok).lower() for tok in tokens if tok}
        focused = []
        for line in metric_facts:
            key = line.split(":", 1)[0].lower()
            if any(tok in key for tok in token_set):
                focused.append(line)
        if focused:
            filtered = focused
    for line in filtered:
        for match in value_pattern.findall(line):
            fact_numbers.add(match)
    if not fact_numbers:
        return False
    return bool(reply_numbers & fact_numbers)
 def _needs_dedup(reply: str) -> bool:
    if not reply:
        return False
    sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
    if len(sentences) < DEDUP_MIN_SENTENCES:
        return False
    seen = set()
    for sent in sentences:
        norm = re.sub(r"\s+", " ", sent.lower())
        if norm in seen:
            return True
        seen.add(norm)
    return False
 def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
    if not reply:
        return False
    q_lower = (question or "").lower()
    if classify.get("question_type") not in {"metric", "diagnostic"} and not re.search(r"\b(how many|list|count)\b", q_lower):
        return False
    missing_markers = (
        "does not provide",
        "does not specify",
        "not available",
        "not provided",
        "cannot determine",
        "don't have",
        "do not have",
        "insufficient",
        "no data",
    )
    if any(marker in reply.lower() for marker in missing_markers):
        return True
    if reply.count(".") <= 1:
        return False
    extra_markers = ("for more", "if you need", "additional", "based on")
    return any(marker in reply.lower() for marker in extra_markers)
 def _extract_keywords(raw_question: str, normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
    stopwords = {
        "the",
        "and",
        "for",
        "with",
        "that",
        "this",
        "what",
        "which",
        "when",
        "where",
        "who",
        "why",
        "how",
        "tell",
        "show",
        "list",
        "give",
        "about",
        "right",
        "now",
    }
    tokens: list[str] = []
    for source in [raw_question, normalized, *sub_questions]:
        for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
            if len(part) < TOKEN_MIN_LEN or part in stopwords:
                continue
            tokens.append(part)
    if keywords:
        for kw in keywords:
            if isinstance(kw, str):
                part = kw.strip().lower()
                if part and part not in stopwords and part not in tokens:
                    tokens.append(part)
    return list(dict.fromkeys(tokens))[:12]
 def _allowed_nodes(summary: dict[str, Any]) -> list[str]:
    hardware = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
    if hardware:
        return sorted([node for node in hardware if isinstance(node, str)])
    return []
 def _allowed_namespaces(summary: dict[str, Any]) -> list[str]:
    namespaces: list[str] = []
    for entry in summary.get("namespace_pods") or []:
        if isinstance(entry, dict):
            name = entry.get("namespace")
            if name:
                namespaces.append(str(name))
    return sorted(set(namespaces))
 def _find_unknown_nodes(reply: str, allowed: list[str]) -> list[str]:
    if not reply or not allowed:
        return []
    pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
    found = {m.group(1) for m in pattern.finditer(reply)}
    if not found:
        return []
    allowed_set = {a.lower() for a in allowed}
    return sorted({item for item in found if item.lower() not in allowed_set})
 def _find_unknown_namespaces(reply: str, allowed: list[str]) -> list[str]:
    if not reply or not allowed:
        return []
    pattern = re.compile(r"\bnamespace\s+([a-z0-9-]+)\b", re.IGNORECASE)
    found = {m.group(1) for m in pattern.finditer(reply)}
    if not found:
        return []
    allowed_set = {a.lower() for a in allowed}
    return sorted({item for item in found if item.lower() not in allowed_set})
 def _needs_runbook_fix(reply: str, allowed: list[str]) -> bool:
    if not reply or not allowed:
        return False
    paths = set(re.findall(r"runbooks/[A-Za-z0-9._-]+", reply))
    if not paths:
        return False
    allowed_set = {p.lower() for p in allowed}
    return any(path.lower() not in allowed_set for path in paths)
 def _needs_runbook_reference(question: str, allowed: list[str], reply: str) -> bool:
    if not allowed or not question:
        return False
    lowered = question.lower()
    cues = ("runbook", "checklist", "documented", "documentation", "where", "guide")
    if not any(cue in lowered for cue in cues):
        return False
    if not reply:
        return True
    for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply):
        if token.lower() in {p.lower() for p in allowed}:
            return False
    return True
 def _best_runbook_match(candidate: str, allowed: list[str]) -> str | None:
    if not candidate or not allowed:
        return None
    best = None
    best_score = 0.0
    for path in allowed:
        score = difflib.SequenceMatcher(a=candidate.lower(), b=path.lower()).ratio()
        if score > best_score:
            best_score = score
            best = path
    return best if best_score >= RUNBOOK_SIMILARITY_THRESHOLD else None
 def _resolve_path(data: Any, path: str) -> Any | None:
    if path.startswith("line:"):
        return path.split("line:", 1)[1].strip()
    cursor = data
    for part in re.split(r"\.(?![^\[]*\])", path):
        if not part:
            continue
        match = re.match(r"^(\w+)(?:\[(\d+)\])?$", part)
        if not match:
            return None
        key = match.group(1)
        index = match.group(2)
        if isinstance(cursor, dict):
            cursor = cursor.get(key)
        else:
            return None
        if index is not None:
            idx = int(index)
            if isinstance(cursor, list) and 0 <= idx < len(cursor):
                cursor = cursor[idx]
            else:
                return None
    return cursor
 def _snapshot_id(summary: dict[str, Any]) -> str | None:
    if not summary:
        return None
    for key in ("generated_at", "snapshot_ts", "snapshot_id"):
        value = summary.get(key)
        if isinstance(value, str) and value:
            return value
    return None
 def _claims_to_payload(claims: list[ClaimItem]) -> list[dict[str, Any]]:
    output: list[dict[str, Any]] = []
    for claim in claims:
        evidence = []
        for ev in claim.evidence:
            evidence.append(
                {
                    "path": ev.path,
                    "reason": ev.reason,
                    "value_at_claim": ev.value_at_claim,
                }
            )
        output.append({"id": claim.id, "claim": claim.claim, "evidence": evidence})
    return output
 def _state_from_payload(payload: dict[str, Any] | None) -> ConversationState | None:
    if not payload:
        return None
    claims_raw = payload.get("claims") if isinstance(payload, dict) else None
    claims: list[ClaimItem] = []
    if isinstance(claims_raw, list):
        for entry in claims_raw:
            if not isinstance(entry, dict):
                continue
            claim_text = str(entry.get("claim") or "").strip()
            claim_id = str(entry.get("id") or "").strip()
            if not claim_text or not claim_id:
                continue
            evidence_items: list[EvidenceItem] = []
            for ev in entry.get("evidence") or []:
                if not isinstance(ev, dict):
                    continue
                path = str(ev.get("path") or "").strip()
                if not path:
                    continue
                reason = str(ev.get("reason") or "").strip()
                value_at_claim = ev.get("value_at_claim")
                evidence_items.append(EvidenceItem(path=path, reason=reason, value_at_claim=value_at_claim))
            if evidence_items:
                claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
    return ConversationState(
        updated_at=float(payload.get("updated_at") or time.monotonic()),
        claims=claims,
        snapshot_id=payload.get("snapshot_id"),
        snapshot=payload.get("snapshot"),
    )
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/retrieval.py
+++ b/atlasbot/engine/answerer/retrieval.py
@ -0,0 +1,344 @@
 from __future__ import annotations
 import asyncio
 import json
 import re
 from collections.abc import Awaitable
 from collections.abc import Callable
 from typing import Any
 from atlasbot.llm import prompts
 from atlasbot.llm.client import parse_json
 from ._base import *
 from .post_ext import _extract_keywords
 def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
    raw = text.strip()
    match = re.search(r"\{.*\}", raw, flags=re.S)
    if match:
        return parse_json(match.group(0), fallback=fallback)
    return parse_json(raw, fallback=fallback)
 async def _select_metric_chunks(
    call_llm: Callable[..., Awaitable[str]],
    ctx: dict[str, Any],
    chunks: list[dict[str, Any]],
    plan: ModePlan,
 ) -> tuple[list[str], list[str]]:
    summary_lines, question, sub_questions, keywords, token_set = _metric_ctx_values(ctx)
    if not summary_lines or not chunks:
        return [], []
    keys = _extract_metric_keys(summary_lines)
    if not keys:
        return [], []
    max_keys = max(4, plan.max_subquestions * 2)
    candidate_keys = _filter_metric_keys(keys, token_set)
    available_keys = candidate_keys or keys
    prompt = prompts.METRIC_KEYS_PROMPT.format(available="\n".join(available_keys), max_keys=max_keys)
    raw = await call_llm(
        prompts.METRIC_KEYS_SYSTEM,
        prompt + "\nQuestion: " + str(question) + "\nSubQuestions:\n" + "\n".join([str(item) for item in sub_questions]),
        context="Keywords:\n" + ", ".join([str(item) for item in keywords if item]),
        model=plan.fast_model,
        tag="metric_keys",
    )
    selected = _parse_key_list(raw, available_keys, max_keys)
    if candidate_keys:
        selected = _merge_metric_keys(selected, candidate_keys, max_keys)
    if selected and candidate_keys and not _metric_key_overlap(selected, token_set):
        selected = candidate_keys[:max_keys]
    if not selected and candidate_keys:
        selected = candidate_keys[:max_keys]
    if available_keys:
        missing = await _validate_metric_keys(
            call_llm,
            {
                "question": question,
                "sub_questions": sub_questions,
                "selected": selected,
            },
            available_keys,
            plan,
        )
        if missing:
            selected = _merge_metric_keys(selected, missing, max_keys)
    if not selected:
        return [], []
    ids = _chunk_ids_for_keys(chunks, selected)
    return selected, ids
 async def _validate_metric_keys(
    call_llm: Callable[..., Awaitable[str]],
    ctx: dict[str, Any],
    available: list[str],
    plan: ModePlan,
 ) -> list[str]:
    if not available:
        return []
    question = str(ctx.get("question") or "")
    sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
    selected = ctx.get("selected") if isinstance(ctx.get("selected"), list) else []
    cap = max(12, plan.max_subquestions * 4)
    available_list = available[:cap]
    prompt = prompts.METRIC_KEYS_VALIDATE_PROMPT.format(
        question=question,
        sub_questions=json.dumps(sub_questions),
        selected=json.dumps(selected),
        available="\n".join(available_list),
    )
    raw = await call_llm(
        prompts.METRIC_KEYS_VALIDATE_SYSTEM,
        prompt,
        model=plan.fast_model,
        tag="metric_keys_validate",
    )
    parsed = _parse_json_block(raw, fallback={})
    items = parsed.get("missing") if isinstance(parsed, dict) else []
    if not isinstance(items, list):
        return []
    available_set = set(available_list)
    out: list[str] = []
    for item in items:
        if isinstance(item, str) and item in available_set and item not in out:
            out.append(item)
    return out
 async def _gather_limited(coros: list[Awaitable[Any]], limit: int) -> list[Any]:
    if not coros:
        return []
    semaphore = asyncio.Semaphore(max(1, limit))
    async def runner(coro: Awaitable[Any]) -> Any:
        async with semaphore:
            return await coro
    return await asyncio.gather(*(runner(coro) for coro in coros))
 def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str], list[str], set[str]]:
    summary_lines = ctx.get("summary_lines") if isinstance(ctx, dict) else None
    if not isinstance(summary_lines, list):
        return [], "", [], [], set()
    question = ctx.get("question") if isinstance(ctx, dict) else ""
    sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
    keywords = ctx.get("keywords") if isinstance(ctx.get("keywords"), list) else []
    keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx.get("keyword_tokens"), list) else []
    token_set = {str(token).lower() for token in keyword_tokens if token}
    token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)}
    token_set = _token_variants(token_set)
    return summary_lines, str(question), sub_questions, keywords, token_set
 def _extract_metric_keys(lines: list[str]) -> list[str]:
    keys: list[str] = []
    for line in lines:
        if ":" not in line:
            continue
        key = line.split(":", 1)[0].strip()
        if not key or " " in key:
            continue
        if key not in keys:
            keys.append(key)
    return keys
 def _token_variants(tokens: set[str]) -> set[str]:
    if not tokens:
        return set()
    variants = set(tokens)
    for token in list(tokens):
        if len(token) <= TOKEN_MIN_LEN:
            continue
        if token.endswith("ies") and len(token) > TOKEN_MIN_LEN:
            variants.add(token[:-3] + "y")
        if token.endswith("es") and len(token) > TOKEN_MIN_LEN:
            variants.add(token[:-2])
        if token.endswith("s") and len(token) > TOKEN_MIN_LEN:
            variants.add(token[:-1])
    return variants
 def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
    parsed = _parse_json_block(raw, fallback={})
    if isinstance(parsed, list):
        items = parsed
    else:
        items = parsed.get("keys") if isinstance(parsed, dict) else []
    if not isinstance(items, list):
        return []
    allowed_set = set(allowed)
    out: list[str] = []
    for item in items:
        if not isinstance(item, str):
            continue
        if item in allowed_set and item not in out:
            out.append(item)
        if len(out) >= max_keys:
            break
    return out
 def _chunk_ids_for_keys(chunks: list[dict[str, Any]], keys: list[str]) -> list[str]:
    if not keys:
        return []
    ids: list[str] = []
    key_set = {f"{key}:" for key in keys}
    for chunk in chunks:
        text = str(chunk.get("text") or "")
        if not text:
            continue
        for line in text.splitlines():
            for key in key_set:
                if line.startswith(key):
                    cid = chunk.get("id")
                    if cid and cid not in ids:
                        ids.append(cid)
                    break
    return ids
 def _filter_metric_keys(keys: list[str], tokens: set[str]) -> list[str]:
    if not keys or not tokens:
        return []
    lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
    ranked: list[tuple[int, str]] = []
    for key in keys:
        parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
        if not parts:
            continue
        hits = len(set(parts) & lowered_tokens)
        if hits:
            ranked.append((hits, key))
    ranked.sort(key=lambda item: (-item[0], item[1]))
    return [item[1] for item in ranked]
 def _metric_key_overlap(keys: list[str], tokens: set[str]) -> bool:
    if not keys or not tokens:
        return False
    lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
    for key in keys:
        parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
        if set(parts) & lowered_tokens:
            return True
    return False
 def _lines_for_metric_keys(lines: list[str], keys: list[str], max_lines: int = 0) -> list[str]:
    if not lines or not keys:
        return []
    prefixes = {f"{key}:" for key in keys}
    selected: list[str] = []
    for line in lines:
        for prefix in prefixes:
            if prefix in line:
                selected.append(line)
                break
        if max_lines and len(selected) >= max_lines:
            break
    return selected
 def _merge_metric_keys(current: list[str], candidates: list[str], max_keys: int) -> list[str]:
    merged: list[str] = []
    seen = set()
    for key in current:
        if key and key not in seen:
            merged.append(key)
            seen.add(key)
    for key in candidates:
        if key and key not in seen:
            merged.append(key)
            seen.add(key)
        if len(merged) >= max_keys:
            break
    return merged[:max_keys]
 def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
    seen = set()
    merged: list[str] = []
    for line in primary + fallback:
        if line in seen:
            continue
        seen.add(line)
        merged.append(line)
    return merged
 def _expand_hottest_line(line: str) -> list[str]:
    if not line:
        return []
    if not line.lower().startswith("hottest:"):
        return []
    expanded: list[str] = []
    payload = line.split("hottest:", 1)[1]
    for part in payload.split(";"):
        part = part.strip()
        if not part or "=" not in part:
            continue
        metric, rest = part.split("=", 1)
        metric = metric.strip()
        match = re.search(r"(?P<node>[^\s\[]+).*\((?P<value>[^)]+)\)", rest)
        if not match:
            continue
        node = match.group("node").strip()
        value = match.group("value").strip()
        class_match = re.search(r"\[(?P<class>[^\]]+)\]", rest)
        node_class = class_match.group("class").strip() if class_match else ""
        if node_class:
            expanded.append(f"hottest_{metric}_node: {node} [{node_class}] ({value})")
        else:
            expanded.append(f"hottest_{metric}_node: {node} ({value})")
    return expanded
 def _has_token(text: str, token: str) -> bool:
    if not text or not token:
        return False
    if token == "io":
        return "i/o" in text or re.search(r"\bio\b", text) is not None
    return re.search(rf"\b{re.escape(token)}\b", text) is not None
 def _hotspot_evidence(summary: dict[str, Any]) -> list[str]:
    hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
    if not hottest:
        return []
    hardware_by_node = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
    node_pods_top = summary.get("node_pods_top") if isinstance(summary.get("node_pods_top"), list) else []
    ns_map = {}
    for item in node_pods_top:
        if not isinstance(item, dict):
            continue
        node = item.get("node")
        namespaces_top = item.get("namespaces_top") if isinstance(item.get("namespaces_top"), list) else []
        ns_map[node] = namespaces_top
    lines: list[str] = []
    for metric, info in hottest.items():
        if not isinstance(info, dict):
            continue
        node = info.get("node")
        value = info.get("value")
        if not node:
            continue
        node_class = hardware_by_node.get(node)
        ns_parts = []
        for entry in ns_map.get(node, [])[:3]:
            if isinstance(entry, (list, tuple)) and len(entry) >= NS_ENTRY_MIN_LEN:
                ns_parts.append(f"{entry[0]}={entry[1]}")
        ns_text = ", ".join(ns_parts)
        value_text = f"{value:.2f}" if isinstance(value, (int, float)) else str(value)
        line = f"hotspot.{metric}: node={node} class={node_class or 'unknown'} value={value_text}"
        if ns_text:
            line += f" namespaces_top={ns_text}"
        lines.append(line)
    return lines
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/retrieval_ext.py
+++ b/atlasbot/engine/answerer/retrieval_ext.py
@ -0,0 +1,197 @@
 from __future__ import annotations
 import re
 from collections.abc import Callable
 from typing import Any
 from atlasbot.llm import prompts
 from atlasbot.llm.client import parse_json
 from ._base import *
 def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
    raw = text.strip()
    match = re.search(r"\{.*\}", raw, flags=re.S)
    if match:
        return parse_json(match.group(0), fallback=fallback)
    return parse_json(raw, fallback=fallback)
 def _metric_key_tokens(summary_lines: list[str]) -> set[str]:
    tokens: set[str] = set()
    for line in summary_lines:
        if not isinstance(line, str) or ":" not in line:
            continue
        key = line.split(":", 1)[0].strip().lower()
        if not key:
            continue
        tokens.add(key)
        for part in re.split(r"[_\s]+", key):
            if part:
                tokens.add(part)
    return tokens
 async def _select_best_candidate(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, tag: str) -> int:
    if len(candidates) <= 1:
        return 0
    prompt = (
        prompts.CANDIDATE_SELECT_PROMPT
        + "\nQuestion: "
        + question
        + "\nCandidates:\n"
        + "\n".join([f"{idx+1}) {cand}" for idx, cand in enumerate(candidates)])
    )
    raw = await call_llm(prompts.CANDIDATE_SELECT_SYSTEM, prompt, model=plan.model, tag=tag)
    data = _parse_json_block(raw, fallback={})
    best = data.get("best") if isinstance(data, dict) else None
    if isinstance(best, int) and 1 <= best <= len(candidates):
        return best - 1
    return 0
 def _dedupe_lines(lines: list[str], limit: int | None = None) -> list[str]:
    seen: set[str] = set()
    cleaned: list[str] = []
    for line in lines:
        value = (line or "").strip()
        if not value or value in seen:
            continue
        if value.lower().startswith("lexicon_") or value.lower().startswith("units:"):
            continue
        cleaned.append(value)
        seen.add(value)
        if limit and len(cleaned) >= limit:
            break
    return cleaned
 def _collect_fact_candidates(selected: list[dict[str, Any]], limit: int) -> list[str]:
    lines: list[str] = []
    for chunk in selected:
        text = chunk.get("text") if isinstance(chunk, dict) else None
        if not isinstance(text, str):
            continue
        lines.extend([line for line in text.splitlines() if line.strip()])
    return _dedupe_lines(lines, limit=limit)
 async def _select_best_list(call_llm: Callable[..., Any], question: str, candidates: list[list[str]], plan: ModePlan, tag: str) -> list[str]:
    if not candidates:
        return []
    if len(candidates) == 1:
        return candidates[0]
    render = ["; ".join(items) for items in candidates]
    best_idx = await _select_best_candidate(call_llm, question, render, plan, tag)
    chosen = candidates[best_idx] if 0 <= best_idx < len(candidates) else candidates[0]
    if not chosen:
        merged: list[str] = []
        for entry in candidates:
            for item in entry:
                if item not in merged:
                    merged.append(item)
        chosen = merged
    return chosen
 async def _extract_fact_types(call_llm: Callable[..., Any], question: str, keywords: list[str], plan: ModePlan) -> list[str]:
    prompt = prompts.FACT_TYPES_PROMPT + "\nQuestion: " + question
    if keywords:
        prompt += "\nKeywords: " + ", ".join(keywords)
    candidates: list[list[str]] = []
    attempts = max(plan.metric_retries, 1)
    for _ in range(attempts):
        raw = await call_llm(prompts.FACT_TYPES_SYSTEM, prompt, model=plan.fast_model, tag="fact_types")
        data = _parse_json_block(raw, fallback={})
        items = data.get("fact_types") if isinstance(data, dict) else None
        if not isinstance(items, list):
            continue
        cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=10)
        if cleaned:
            candidates.append(cleaned)
    chosen = await _select_best_list(call_llm, question, candidates, plan, "fact_types_select")
    return chosen[:10]
 async def _derive_signals(call_llm: Callable[..., Any], question: str, fact_types: list[str], plan: ModePlan) -> list[str]:
    if not fact_types:
        return []
    prompt = prompts.SIGNAL_PROMPT.format(question=question, fact_types="; ".join(fact_types))
    candidates: list[list[str]] = []
    attempts = max(plan.metric_retries, 1)
    for _ in range(attempts):
        raw = await call_llm(prompts.SIGNAL_SYSTEM, prompt, model=plan.fast_model, tag="signals")
        data = _parse_json_block(raw, fallback={})
        items = data.get("signals") if isinstance(data, dict) else None
        if not isinstance(items, list):
            continue
        cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=12)
        if cleaned:
            candidates.append(cleaned)
    chosen = await _select_best_list(call_llm, question, candidates, plan, "signals_select")
    return chosen[:12]
 async def _scan_chunk_for_signals(call_llm: Callable[..., Any], question: str, signals: list[str], chunk_lines: list[str], plan: ModePlan) -> list[str]:
    if not signals or not chunk_lines:
        return []
    prompt = prompts.CHUNK_SCAN_PROMPT.format(
        signals="; ".join(signals),
        lines="\n".join(chunk_lines),
    )
    attempts = max(1, min(plan.metric_retries, 2))
    candidates: list[list[str]] = []
    for _ in range(attempts):
        raw = await call_llm(prompts.CHUNK_SCAN_SYSTEM, prompt, model=plan.fast_model, tag="chunk_scan")
        data = _parse_json_block(raw, fallback={})
        items = data.get("lines") if isinstance(data, dict) else None
        if not isinstance(items, list):
            continue
        cleaned = [line for line in chunk_lines if line in items]
        cleaned = _dedupe_lines(cleaned, limit=15)
        if cleaned:
            candidates.append(cleaned)
    chosen = await _select_best_list(call_llm, question, candidates, plan, "chunk_scan_select")
    return chosen[:15]
 async def _prune_metric_candidates(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, attempts: int) -> list[str]:
    if not candidates:
        return []
    prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=6)
    picks: list[list[str]] = []
    for _ in range(max(attempts, 1)):
        raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_prune")
        data = _parse_json_block(raw, fallback={})
        items = data.get("lines") if isinstance(data, dict) else None
        if not isinstance(items, list):
            continue
        cleaned = [line for line in candidates if line in items]
        cleaned = _dedupe_lines(cleaned, limit=6)
        if cleaned:
            picks.append(cleaned)
    chosen = await _select_best_list(call_llm, question, picks, plan, "fact_prune_select")
    return chosen[:6]
 async def _select_fact_lines(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, max_lines: int) -> list[str]:
    if not candidates:
        return []
    prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=max_lines)
    picks: list[list[str]] = []
    attempts = max(plan.metric_retries, 1)
    for _ in range(attempts):
        raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_select")
        data = _parse_json_block(raw, fallback={})
        items = data.get("lines") if isinstance(data, dict) else None
        if not isinstance(items, list):
            continue
        cleaned = [line for line in candidates if line in items]
        cleaned = _dedupe_lines(cleaned, limit=max_lines)
        if cleaned:
            picks.append(cleaned)
    chosen = await _select_best_list(call_llm, question, picks, plan, "fact_select_best")
    return chosen[:max_lines]
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/spine.py
+++ b/atlasbot/engine/answerer/spine.py
@ -0,0 +1,404 @@
 from __future__ import annotations
 import re
 from typing import Any
 from atlasbot.engine.intent_router import IntentMatch
 from atlasbot.snapshot.builder import summary_text
 from ._base import *
 def _join_context(parts: list[str]) -> str:
    text = "\n".join([part for part in parts if part])
    return text.strip()
 def _format_metric_value(value: Any) -> str:
    if isinstance(value, bool):
        return str(value).lower()
    if isinstance(value, int):
        return str(value)
    if isinstance(value, float):
        return f"{value:.1f}".rstrip("0").rstrip(".")
    return str(value)
 def _format_history(history: list[dict[str, str]] | None) -> str:
    if not history:
        return ""
    lines = ["Recent conversation (non-authoritative):"]
    for entry in history[-4:]:
        if not isinstance(entry, dict):
            continue
        question = entry.get("q")
        answer = entry.get("a")
        role = entry.get("role")
        content = entry.get("content")
        if question:
            lines.append(f"Q: {question}")
        if answer:
            lines.append(f"A: {answer}")
        if role and content:
            prefix = "Q" if role == "user" else "A"
            lines.append(f"{prefix}: {content}")
    return "\n".join(lines)
 def _summary_lines(snapshot: dict[str, Any] | None) -> list[str]:
    text = summary_text(snapshot)
    if not text:
        return []
    return [line for line in text.splitlines() if line.strip()]
 def _line_starting_with(lines: list[str], prefix: str) -> str | None:
    if not lines:
        return None
    for line in lines:
        if line.lower().startswith(prefix.lower()):
            return line
    return None
 def _spine_lines(lines: list[str]) -> dict[str, str]:
    spine: dict[str, str] = {}
    _spine_nodes(lines, spine)
    _spine_hardware(lines, spine)
    _spine_hottest(lines, spine)
    _spine_postgres(lines, spine)
    _spine_namespaces(lines, spine)
    _spine_pressure(lines, spine)
    return spine
 def _spine_nodes(lines: list[str], spine: dict[str, str]) -> None:
    nodes_line = _line_starting_with(lines, "nodes:")
    if nodes_line:
        spine["nodes_count"] = nodes_line
        spine["nodes_ready"] = nodes_line
        return
    nodes_total = _line_starting_with(lines, "nodes_total:")
    nodes_ready = _line_starting_with(lines, "nodes_ready:")
    if nodes_total:
        spine["nodes_count"] = nodes_total
    if nodes_ready:
        spine["nodes_ready"] = nodes_ready
 def _spine_hardware(lines: list[str], spine: dict[str, str]) -> None:
    hardware_line = _line_starting_with(lines, "hardware_nodes:")
    if not hardware_line:
        hardware_line = _line_starting_with(lines, "hardware:")
    if hardware_line:
        spine["nodes_non_rpi"] = hardware_line
 def _spine_hottest(lines: list[str], spine: dict[str, str]) -> None:
    hottest_line = _line_starting_with(lines, "hottest:")
    if not hottest_line:
        return
    for key in ("hottest_cpu", "hottest_ram", "hottest_net", "hottest_io", "hottest_disk"):
        spine[key] = hottest_line
 def _spine_postgres(lines: list[str], spine: dict[str, str]) -> None:
    postgres_total = _line_starting_with(lines, "postgres_connections_total:")
    if postgres_total:
        spine["postgres_connections"] = postgres_total
    postgres_line = _line_starting_with(lines, "postgres:")
    if postgres_line:
        spine["postgres_hottest"] = postgres_line
 def _spine_namespaces(lines: list[str], spine: dict[str, str]) -> None:
    namespaces_top = _line_starting_with(lines, "namespaces_top:")
    if namespaces_top:
        spine["namespace_most_pods"] = namespaces_top
 def _spine_pressure(lines: list[str], spine: dict[str, str]) -> None:
    pressure_line = _line_starting_with(lines, "pressure_nodes:")
    if pressure_line:
        spine["pressure_summary"] = pressure_line
        return
    load_line = _line_starting_with(lines, "node_load_top:")
    if load_line:
        spine["pressure_summary"] = load_line
 def _parse_group_line(line: str) -> dict[str, list[str]]:
    groups: dict[str, list[str]] = {}
    if not line:
        return groups
    payload = line.split(":", 1)[1] if ":" in line else line
    for part in payload.split(";"):
        part = part.strip()
        if not part or "=" not in part:
            continue
        key, value = part.split("=", 1)
        value = value.strip()
        nodes: list[str] = []
        if "(" in value and ")" in value:
            inner = value[value.find("(") + 1 : value.rfind(")")]
            nodes = [item.strip() for item in inner.split(",") if item.strip()]
        if not nodes:
            cleaned = re.sub(r"^[0-9]+", "", value).strip()
            nodes = [item.strip() for item in cleaned.split(",") if item.strip()]
        groups[key.strip()] = nodes
    return groups
 def _parse_hottest(line: str, metric: str) -> str | None:
    if not line:
        return None
    payload = line.split(":", 1)[1] if ":" in line else line
    for part in payload.split(";"):
        part = part.strip()
        if part.startswith(f"{metric}="):
            return part
    return None
 def _spine_answer(intent: IntentMatch, spine_line: str | None) -> str | None:
    if not spine_line:
        return None
    handlers = {
        "nodes_count": _spine_nodes_answer,
        "nodes_ready": _spine_nodes_answer,
        "nodes_non_rpi": _spine_non_rpi_answer,
        "hardware_mix": _spine_hardware_answer,
        "postgres_connections": _spine_postgres_answer,
        "postgres_hottest": _spine_postgres_answer,
        "namespace_most_pods": _spine_namespace_answer,
        "pressure_summary": _spine_pressure_answer,
    }
    kind = intent.kind
    if kind.startswith("hottest_"):
        return _spine_hottest_answer(kind, spine_line)
    handler = handlers.get(kind)
    if handler:
        return handler(spine_line)
    return spine_line
 def _spine_nodes_answer(line: str) -> str:
    return line
 def _spine_non_rpi_answer(line: str) -> str:
    groups = _parse_group_line(line)
    non_rpi: list[str] = []
    for key, nodes in groups.items():
        if key.lower().startswith("rpi"):
            continue
        non_rpi.extend(nodes)
    if non_rpi:
        return "Non-Raspberry Pi nodes: " + ", ".join(non_rpi) + "."
    return line
 def _spine_hardware_answer(line: str) -> str:
    return line
 def _spine_hottest_answer(kind: str, line: str) -> str:
    metric = kind.split("_", 1)[1]
    hottest = _parse_hottest(line, metric)
    if hottest:
        return hottest
    return line
 def _spine_postgres_answer(line: str) -> str:
    return line
 def _spine_namespace_answer(line: str) -> str:
    payload = line.split(":", 1)[1] if ":" in line else line
    top = payload.split(";")[0].strip()
    if top:
        return f"Namespace with most pods: {top}."
    return line
 def _spine_pressure_answer(line: str) -> str:
    return line
 def _spine_from_summary(summary: dict[str, Any]) -> dict[str, str]:
    if not isinstance(summary, dict) or not summary:
        return {}
    spine: dict[str, str] = {}
    spine.update(_spine_from_counts(summary))
    spine.update(_spine_from_hardware(summary))
    spine.update(_spine_from_hottest(summary))
    spine.update(_spine_from_postgres(summary))
    spine.update(_spine_from_namespace_pods(summary))
    spine.update(_spine_from_pressure(summary))
    return spine
 def _spine_from_counts(summary: dict[str, Any]) -> dict[str, str]:
    counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
    inventory = summary.get("inventory") if isinstance(summary.get("inventory"), dict) else {}
    nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
    workers = inventory.get("workers") if isinstance(inventory.get("workers"), dict) else {}
    total = nodes.get("total")
    ready = nodes.get("ready")
    not_ready = nodes.get("not_ready")
    if total is None:
        total = counts.get("nodes_total")
    if ready is None:
        ready = counts.get("nodes_ready")
    if not_ready is None and isinstance(inventory.get("not_ready_names"), list):
        not_ready = len(inventory.get("not_ready_names") or [])
    workers_ready = workers.get("ready")
    workers_total = workers.get("total")
    if total is None and ready is None and not_ready is None:
        return {}
    parts = []
    if total is not None:
        parts.append(f"total={int(total)}")
    if ready is not None:
        parts.append(f"ready={int(ready)}")
    if not_ready is not None:
        parts.append(f"not_ready={int(not_ready)}")
    if workers_total is not None and workers_ready is not None:
        parts.append(f"workers_ready={int(workers_ready)}/{int(workers_total)}")
    line = "nodes: " + ", ".join(parts)
    return {"nodes_count": line, "nodes_ready": line}
 def _spine_from_hardware(summary: dict[str, Any]) -> dict[str, str]:
    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
    if not hardware:
        return {}
    parts = []
    for key, nodes in hardware.items():
        if not isinstance(nodes, list):
            continue
        node_list = ", ".join(str(n) for n in nodes if n)
        if node_list:
            parts.append(f"{key}=({node_list})")
    if not parts:
        return {}
    return {"nodes_non_rpi": "hardware: " + "; ".join(parts)}
 def _spine_from_hottest(summary: dict[str, Any]) -> dict[str, str]:
    hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
    top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
    top_hottest = top.get("node_hottest") if isinstance(top.get("node_hottest"), dict) else {}
    if not hottest and top_hottest:
        hottest = top_hottest
    elif top_hottest:
        for key, value in top_hottest.items():
            if key not in hottest and value is not None:
                hottest[key] = value
    if not hottest:
        return {}
    mapping = {}
    for key in ("cpu", "ram", "net", "io", "disk"):
        entry = hottest.get(key)
        if not isinstance(entry, dict):
            continue
        node = entry.get("node") or entry.get("label") or ""
        value = entry.get("value")
        if node:
            mapping[f"hottest_{key}"] = f"{key}={node} ({_format_metric_value(value)})"
    if not mapping:
        return {}
    return mapping
 def _spine_from_postgres(summary: dict[str, Any]) -> dict[str, str]:
    postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
    if not postgres:
        top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
        postgres = top.get("postgres") if isinstance(top.get("postgres"), dict) else {}
    if not postgres:
        return {}
    used = postgres.get("used")
    max_conn = postgres.get("max")
    hottest = postgres.get("hottest_db") if isinstance(postgres.get("hottest_db"), dict) else {}
    hottest_label = hottest.get("label") or ""
    facts: dict[str, str] = {}
    if used is not None and max_conn is not None:
        facts["postgres_connections"] = f"postgres_connections_total: used={int(used)}, max={int(max_conn)}"
    if hottest_label:
        facts["postgres_hottest"] = f"postgres_hottest_db: {hottest_label}"
    return facts
 def _spine_from_namespace_pods(summary: dict[str, Any]) -> dict[str, str]:
    pods = summary.get("namespace_pods") if isinstance(summary.get("namespace_pods"), list) else []
    if not pods:
        top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
        pods = top.get("namespace_pods") if isinstance(top.get("namespace_pods"), list) else []
    if not pods:
        return {}
    best_name = ""
    best_value = None
    for entry in pods:
        if not isinstance(entry, dict):
            continue
        name = entry.get("namespace") or entry.get("name") or entry.get("label") or ""
        value = entry.get("pods")
        if value is None:
            value = entry.get("pods_total")
        if value is None:
            value = entry.get("value")
        try:
            numeric = float(value)
        except (TypeError, ValueError):
            numeric = None
        if name and numeric is not None and (best_value is None or numeric > best_value):
            best_name = name
            best_value = numeric
    if best_name:
        return {"namespace_most_pods": f"namespace_most_pods: {best_name} ({int(best_value or 0)} pods)"}
    return {}
 def _spine_from_pressure(summary: dict[str, Any]) -> dict[str, str]:
    pressure = summary.get("pressure_summary") if isinstance(summary.get("pressure_summary"), dict) else {}
    if not pressure:
        pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
    if not pressure:
        return {}
    total = pressure.get("total")
    unsched = pressure.get("unschedulable")
    names = pressure.get("names") if isinstance(pressure.get("names"), list) else []
    parts = []
    if total is None and names:
        total = len([name for name in names if name])
    if total is not None:
        parts.append(f"total={int(total)}")
    if unsched is not None:
        parts.append(f"unschedulable={int(unsched)}")
    if parts:
        return {"pressure_summary": "pressure_nodes: " + ", ".join(parts)}
    return {}
 def _spine_fallback(intent: IntentMatch, lines: list[str]) -> str | None:
    if not lines:
        return None
    keywords = {
        "nodes_count": ("nodes:", "nodes_total:"),
        "nodes_ready": ("nodes:", "nodes_ready:"),
        "postgres_hottest": ("postgres_hottest", "hottest_db", "postgres"),
        "namespace_most_pods": ("namespace", "pods", "namespaces_top"),
        "pressure_summary": ("pressure", "node_load_top"),
    }
    for token in keywords.get(intent.kind, ("",)):
        if not token:
            continue
        for line in lines:
            if token in line:
                return line
    return None
 __all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
--- a/atlasbot/engine/answerer/workflow.py
+++ b/atlasbot/engine/answerer/workflow.py
@ -0,0 +1,484 @@
 from __future__ import annotations
 import asyncio
 import json
 import math
 import re
 import time
 from collections.abc import Callable
 from typing import Any
 from atlasbot.engine.intent_router import route_intent
 from atlasbot.llm import prompts
 from atlasbot.llm.client import build_messages
 from atlasbot.snapshot.builder import build_summary
 from ._base import *
 from .common import *
 from .factsheet import *
 from .post import *
 from .post_ext import *
 from .retrieval import *
 from .retrieval_ext import *
 from .spine import *
 from .workflow_post import finalize_answer
 async def run_answer(engine: Any, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult:  # noqa: C901
    """Answer a question using the staged reasoning pipeline."""
    settings = engine._settings
    question = (question or "").strip()
    if not question:
        return AnswerResult("I need a question to answer.", _default_scores(), {"mode": mode})
    if mode == "stock":
        return await engine._answer_stock(question)
    limitless = "run limitless" in question.lower()
    if limitless:
        question = re.sub(r"(?i)run limitless", "", question).strip()
    plan = _mode_plan(settings, mode)
    call_limit = _llm_call_limit(settings, mode)
    call_cap = math.ceil(call_limit * settings.llm_limit_multiplier)
    call_count = 0
    limit_hit = False
    time_budget_hit = False
    started = time.monotonic()
    time_budget_sec = _mode_time_budget(settings, mode) if not limitless else 0.0
    debug_tags = {
        "route",
        "decompose",
        "chunk_score",
        "chunk_select",
        "fact_select",
        "synth",
        "subanswer",
        "tool",
        "followup",
        "select_claims",
        "evidence_fix",
    }
    async def call_llm(system: str, prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
        nonlocal call_count, limit_hit, time_budget_hit
        if not limitless and call_count >= call_cap:
            limit_hit = True
            raise LLMLimitReached("llm_limit")
        timeout_sec = None
        if not limitless and time_budget_sec > 0:
            time_left = time_budget_sec - (time.monotonic() - started)
            if time_left <= 0:
                time_budget_hit = True
                raise LLMTimeBudgetExceeded("time_budget")
            timeout_sec = min(settings.ollama_timeout_sec, time_left)
        call_count += 1
        messages = build_messages(system, prompt, context=context)
        try:
            llm_call = engine._llm.chat(messages, model=model or plan.model, timeout_sec=timeout_sec)
            if timeout_sec is not None:
                response = await asyncio.wait_for(llm_call, timeout=max(0.001, timeout_sec))
            else:
                response = await llm_call
        except TimeoutError as exc:
            time_budget_hit = True
            raise LLMTimeBudgetExceeded("time_budget") from exc
        log.info(
            "atlasbot_llm_call",
            extra={"extra": {"mode": mode, "tag": tag, "call": call_count, "limit": call_cap}},
        )
        if settings.debug_pipeline and tag in debug_tags:
            _debug_pipeline_log(settings, f"llm_raw_{tag}", str(response)[:1200])
        return response
    state = engine._get_state(conversation_id)
    pin_snapshot = bool(snapshot_pin) or settings.snapshot_pin_enabled
    snapshot = engine._snapshot.get()
    snapshot_used = state.snapshot if pin_snapshot and state and state.snapshot else snapshot
    summary = build_summary(snapshot_used)
    summary_lines = _summary_lines(snapshot_used)
    allowed_nodes = _allowed_nodes(summary)
    allowed_namespaces = _allowed_namespaces(summary)
    spine = _spine_from_summary(summary) or _spine_lines(summary_lines)
    metric_tokens = _metric_key_tokens(summary_lines)
    global_facts = _global_facts(summary_lines)
    kb_summary = engine._kb.summary()
    runbooks = engine._kb.runbook_titles(limit=6)
    runbook_paths = engine._kb.runbook_paths(limit=10)
    history_ctx = _format_history(history)
    lexicon_ctx = _lexicon_context(summary)
    key_facts: list[str] = []
    metric_facts: list[str] = []
    facts_used: list[str] = []
    reply = ""
    scores = _default_scores()
    claims: list[ClaimItem] = []
    classify: dict[str, Any] = {}
    tool_hint: dict[str, Any] | None = None
    try:
        if mode in {"quick", "fast", "smart", "genius"} and not limitless:
            if observer:
                observer("factsheet", "building fact sheet")
            if _is_plain_math_question(question):
                reply = (
                    "I focus on Titan cluster operations. Ask me about cluster health, nodes, workloads, "
                    "namespaces, storage, or alerts."
                )
                return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
            kb_lines = (
                engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=_factsheet_kb_chars(mode, plan.kb_max_chars))
                if engine._kb
                else []
            )
            fact_lines = _quick_fact_sheet_lines(question, summary_lines, kb_lines, limit=_factsheet_line_limit(mode))
            classify = {
                "needs_snapshot": True,
                "needs_kb": bool(kb_lines),
                "question_type": f"{mode}_factsheet",
                "answer_style": "direct" if mode in {"quick", "fast"} else "concise",
                "follow_up": False,
            }
            heuristic_reply = _quick_fact_sheet_heuristic_answer(question, fact_lines)
            if heuristic_reply:
                return AnswerResult(heuristic_reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
            if observer:
                observer("quick", "answering from fact sheet")
            quick_context = _quick_fact_sheet_text(fact_lines)
            quick_prompt = "Question: " + question + "\nAnswer using only the Fact Sheet. " + _factsheet_instruction(mode)
            reply = await call_llm(prompts.ANSWER_SYSTEM, quick_prompt, context=quick_context, model=_factsheet_model(mode, plan), tag=f"{mode}_factsheet")
            reply = _strip_followup_meta(reply)
            return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
        if observer:
            observer("normalize", "normalizing")
        normalize_prompt = prompts.NORMALIZE_PROMPT + "\nQuestion: " + question
        normalize_raw = await call_llm(prompts.NORMALIZE_SYSTEM, normalize_prompt, context=lexicon_ctx, model=plan.fast_model, tag="normalize")
        normalize = _parse_json_block(normalize_raw, fallback={"normalized": question, "keywords": []})
        normalized = str(normalize.get("normalized") or question).strip() or question
        keywords = normalize.get("keywords") or []
        _debug_pipeline_log(settings, "normalize_parsed", {"normalized": normalized, "keywords": keywords})
        keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
        question_tokens = _extract_question_tokens(normalized)
        if observer:
            observer("route", "routing")
        route_prompt = prompts.ROUTE_PROMPT + "\nQuestion: " + normalized + "\nKeywords: " + json.dumps(keywords)
        route_raw = await call_llm(prompts.ROUTE_SYSTEM, route_prompt, context=_join_context([kb_summary, lexicon_ctx]), model=plan.fast_model, tag="route")
        classify = _parse_json_block(route_raw, fallback={})
        classify.setdefault("needs_snapshot", True)
        classify.setdefault("answer_style", "direct")
        classify.setdefault("follow_up", False)
        classify.setdefault("focus_entity", "unknown")
        classify.setdefault("focus_metric", "unknown")
        if metric_tokens and keyword_tokens and any(token in metric_tokens for token in keyword_tokens):
            classify["needs_snapshot"] = True
        intent = route_intent(normalized)
        if intent:
            classify["needs_snapshot"] = True
            classify["question_type"] = "metric"
        _debug_pipeline_log(settings, "route_parsed", {"classify": classify, "normalized": normalized})
        lowered_question = f"{question} {normalized}".lower()
        force_metric = bool(re.search(r"\bhow many\b|\bcount\b|\btotal\b", lowered_question))
        if any(term in lowered_question for term in ("postgres", "connections", "pvc", "ready")):
            force_metric = True
        if intent:
            spine_line = spine.get(intent.kind) if isinstance(spine, dict) else None
            if not spine_line:
                spine_line = _spine_fallback(intent, summary_lines)
            spine_answer = _spine_answer(intent, spine_line)
            if spine_line:
                key_facts = _merge_fact_lines([spine_line], key_facts)
                metric_facts = _merge_fact_lines([spine_line], metric_facts)
            if spine_answer and mode in {"fast", "quick"}:
                return AnswerResult(spine_answer, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
        cluster_terms = (
            "atlas",
            "cluster",
            "node",
            "nodes",
            "namespace",
            "pod",
            "workload",
            "k8s",
            "kubernetes",
            "postgres",
            "database",
            "db",
            "connections",
            "cpu",
            "ram",
            "memory",
            "network",
            "io",
            "disk",
            "pvc",
            "storage",
        )
        has_cluster_terms = any(term in lowered_question for term in cluster_terms)
        if has_cluster_terms:
            classify["needs_snapshot"] = True
        lowered_norm = normalized.lower()
        if ("namespace" in lowered_norm and ("pod" in lowered_norm or "pods" in lowered_norm)) or re.search(r"\bmost\s+pods\b", lowered_norm) or re.search(r"\bpods\s+running\b", lowered_norm):
            classify["question_type"] = "metric"
            classify["needs_snapshot"] = True
        if re.search(r"\b(how many|count|number of|list)\b", lowered_question):
            classify["question_type"] = "metric"
        if any(term in lowered_question for term in ("postgres", "connections", "db")):
            classify["question_type"] = "metric"
            classify["needs_snapshot"] = True
        if any(term in lowered_question for term in ("pvc", "persistentvolume", "persistent volume", "storage")):
            if classify.get("question_type") not in {"metric", "diagnostic"}:
                classify["question_type"] = "metric"
            classify["needs_snapshot"] = True
        if "ready" in lowered_question and classify.get("question_type") not in {"metric", "diagnostic"}:
            classify["question_type"] = "diagnostic"
        hottest_terms = ("hottest", "highest", "lowest", "most")
        metric_terms = ("cpu", "ram", "memory", "net", "network", "io", "disk", "load", "usage", "pod", "pods", "namespace")
        if any(term in lowered_question for term in hottest_terms) and any(term in lowered_question for term in metric_terms):
            classify["question_type"] = "metric"
        baseline_terms = ("baseline", "delta", "trend", "increase", "decrease", "drop", "spike", "regression", "change")
        if any(term in lowered_question for term in baseline_terms) and any(term in lowered_question for term in metric_terms):
            classify["question_type"] = "metric"
            classify["needs_snapshot"] = True
        if not classify.get("follow_up") and state and state.claims:
            follow_terms = ("there", "that", "those", "these", "it", "them", "that one", "this", "former", "latter")
            is_metric_query = force_metric or classify.get("question_type") in {"metric", "diagnostic"}
            if not is_metric_query and (
                any(term in lowered_question for term in follow_terms)
                or (len(normalized.split()) <= FOLLOWUP_SHORT_WORDS and not has_cluster_terms)
            ):
                classify["follow_up"] = True
        if classify.get("follow_up") and state and state.claims:
            if observer:
                observer("followup", "answering follow-up")
            reply = await engine._answer_followup(question, state, summary, classify, plan, call_llm)
            scores = await engine._score_answer(question, reply, plan, call_llm)
            return AnswerResult(reply, scores, _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
        if observer:
            observer("decompose", "decomposing")
        decompose_prompt = prompts.DECOMPOSE_PROMPT.format(max_parts=plan.max_subquestions * 2)
        decompose_raw = await call_llm(prompts.DECOMPOSE_SYSTEM, decompose_prompt + "\nQuestion: " + normalized, context=lexicon_ctx, model=plan.fast_model if mode == "quick" else plan.model, tag="decompose")
        parts = _parse_json_list(decompose_raw)
        sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
        _debug_pipeline_log(settings, "decompose_parsed", {"sub_questions": sub_questions})
        keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
        snapshot_context = ""
        signal_tokens: list[str] = []
        if classify.get("needs_snapshot"):
            if observer:
                observer("retrieve", "scoring chunks")
            chunks = _chunk_lines(summary_lines, plan.chunk_lines)
            if plan.use_raw_snapshot:
                raw_chunks = _raw_snapshot_chunks(snapshot_used)
                if raw_chunks:
                    chunks.extend(raw_chunks)
            kb_lines = engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if engine._kb else []
            if kb_lines:
                kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
                for idx, chunk in enumerate(kb_chunks):
                    chunk["id"] = f"k{idx}"
                chunks.extend(kb_chunks)
            metric_keys: list[str] = []
            must_chunk_ids: list[str] = []
            metric_task = None
            if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and summary_lines:
                metric_ctx = {"question": normalized, "sub_questions": sub_questions, "keywords": keywords, "keyword_tokens": keyword_tokens, "summary_lines": summary_lines}
                metric_task = asyncio.create_task(_select_metric_chunks(call_llm, metric_ctx, chunks, plan))
            scored_task = asyncio.create_task(_score_chunks(call_llm, chunks, normalized, sub_questions, plan))
            if metric_task:
                metric_keys, must_chunk_ids = await metric_task
            scored = await scored_task
            selected = _select_chunks(chunks, scored, plan, keyword_tokens, must_chunk_ids)
            fact_candidates = _collect_fact_candidates(selected, limit=plan.max_subquestions * 12)
            key_facts = await _select_fact_lines(call_llm, normalized, fact_candidates, plan, max_lines=max(4, plan.max_subquestions * 2))
            metric_facts = []
            if classify.get("question_type") in {"metric", "diagnostic"} or force_metric:
                global_metric_facts: list[str] = []
                if global_facts:
                    global_metric_facts = await _select_fact_lines(call_llm, normalized, global_facts, plan, max_lines=min(2, max(1, plan.max_subquestions)))
                    if not global_metric_facts and (keyword_tokens or question_tokens):
                        tokens = {tok for tok in (keyword_tokens or question_tokens) if tok and tok not in GENERIC_METRIC_TOKENS}
                        global_metric_facts = _rank_metric_lines(global_facts, tokens, max_lines=2)
                    if global_metric_facts:
                        key_facts = _merge_fact_lines(global_metric_facts, key_facts)
                all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
                if plan.use_deep_retrieval:
                    if observer:
                        observer("retrieve", "extracting fact types")
                    fact_types = await _extract_fact_types(call_llm, normalized, keyword_tokens, plan)
                    if observer:
                        observer("retrieve", "deriving signals")
                    signals = await _derive_signals(call_llm, normalized, fact_types, plan)
                    if isinstance(signals, list):
                        signal_tokens = [str(item) for item in signals if item]
                        all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
                    if observer:
                        observer("retrieve", "scanning chunks")
                    candidate_lines: list[str] = []
                    if signals:
                        for chunk in selected:
                            chunk_lines = chunk["text"].splitlines()
                            if not chunk_lines:
                                continue
                            hits = await _scan_chunk_for_signals(call_llm, normalized, signals, chunk_lines, plan)
                            if hits:
                                candidate_lines.extend(hits)
                    candidate_lines = list(dict.fromkeys(candidate_lines))
                    if candidate_lines:
                        if observer:
                            observer("retrieve", "pruning candidates")
                        metric_facts = await _prune_metric_candidates(call_llm, normalized, candidate_lines, plan, plan.metric_retries)
                        if metric_facts:
                            key_facts = _merge_fact_lines(metric_facts, key_facts)
                            if settings.debug_pipeline:
                                _debug_pipeline_log(settings, "metric_facts_selected", {"facts": metric_facts})
                if not metric_facts:
                    if observer:
                        observer("retrieve", "fallback metric selection")
                    token_set = {tok for tok in all_tokens if tok and tok not in GENERIC_METRIC_TOKENS}
                    fallback_candidates = _rank_metric_lines(summary_lines, token_set, max_lines=200)
                    if fallback_candidates:
                        metric_facts = await _select_fact_lines(call_llm, normalized, fallback_candidates, plan, max_lines=max(2, plan.max_subquestions))
                    if not metric_facts and fallback_candidates:
                        metric_facts = fallback_candidates[: max(2, plan.max_subquestions)]
                if metric_keys:
                    key_lines = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3)
                    if key_lines:
                        metric_facts = _merge_fact_lines(key_lines, metric_facts)
                if metric_facts:
                    metric_cover_tokens = [tok for tok in keyword_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
                    if not metric_cover_tokens:
                        metric_cover_tokens = [tok for tok in question_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
                    metric_facts = _ensure_token_coverage(metric_facts, metric_cover_tokens or all_tokens, summary_lines, max_add=plan.max_subquestions)
                    if metric_cover_tokens:
                        ranked_metric_lines = _rank_metric_lines(summary_lines, set(metric_cover_tokens), max_lines=max(1, plan.max_subquestions))
                        if ranked_metric_lines:
                            metric_facts = _merge_fact_lines(ranked_metric_lines, metric_facts)
                    if metric_facts and not _has_keyword_overlap(metric_facts, keyword_tokens):
                        best_line = _best_keyword_line(summary_lines, keyword_tokens)
                        if best_line:
                            metric_facts = _merge_fact_lines([best_line], metric_facts)
                    if metric_facts:
                        key_facts = _merge_fact_lines(metric_facts, key_facts)
                if global_metric_facts:
                    metric_facts = _merge_fact_lines(global_metric_facts, metric_facts)
            if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not metric_facts and key_facts:
                metric_facts = key_facts
            if key_facts:
                key_facts = _ensure_token_coverage(key_facts, _merge_tokens(keyword_tokens, question_tokens), summary_lines, max_add=plan.max_subquestions)
            facts_used = list(dict.fromkeys(key_facts)) if key_facts else list(dict.fromkeys(metric_facts))
            snapshot_context = "ClusterSnapshot:\n" + "\n".join([chunk["text"] for chunk in selected])
            combined_facts = _merge_fact_lines(global_facts, key_facts) if global_facts else key_facts
            if combined_facts:
                snapshot_context = "KeyFacts:\n" + "\n".join(combined_facts) + "\n\n" + snapshot_context
        context = _join_context([kb_summary, _format_runbooks(runbooks), snapshot_context, history_ctx if classify.get("follow_up") else ""])
        if plan.use_tool and classify.get("needs_tool"):
            if observer:
                observer("tool", "suggesting tools")
            tool_prompt = prompts.TOOL_PROMPT + "\nQuestion: " + normalized
            tool_raw = await call_llm(prompts.TOOL_SYSTEM, tool_prompt, context=context, model=plan.fast_model, tag="tool")
            tool_hint = _parse_json_block(tool_raw, fallback={})
        if observer:
            observer("subanswers", "drafting subanswers")
        async def _subanswer_for(subq: str) -> str:
            sub_prompt = prompts.SUBANSWER_PROMPT + "\nQuestion: " + subq
            if plan.subanswer_retries > 1:
                candidates = await _gather_limited(
                    [call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer") for _ in range(plan.subanswer_retries)],
                    plan.parallelism,
                )
                best_idx = await _select_best_candidate(call_llm, subq, candidates, plan, "subanswer_select")
                return candidates[best_idx]
            return await call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer")
        subanswers: list[str] = []
        if plan.parallelism > 1 and len(sub_questions) > 1:
            subanswers = await _gather_limited([_subanswer_for(subq) for subq in sub_questions], plan.parallelism)
        else:
            for subq in sub_questions:
                subanswers.append(await _subanswer_for(subq))
        if observer:
            observer("synthesize", "synthesizing")
        reply, scores, claims = await finalize_answer(
            engine=engine,
            call_llm=call_llm,
            normalized=normalized,
            subanswers=subanswers,
            context=context,
            classify=classify,
            plan=plan,
            summary=summary,
            summary_lines=summary_lines,
            metric_facts=metric_facts,
            key_facts=key_facts,
            facts_used=facts_used,
            allowed_nodes=allowed_nodes,
            allowed_namespaces=allowed_namespaces,
            runbook_paths=runbook_paths,
            lowered_question=lowered_question,
            force_metric=force_metric,
            keyword_tokens=keyword_tokens,
            question_tokens=question_tokens,
            snapshot_context=snapshot_context,
            observer=observer,
            mode=mode,
            metric_keys=metric_keys if 'metric_keys' in locals() else None,
        )
    except LLMTimeBudgetExceeded:
        time_budget_hit = True
        if not reply:
            budget = max(1, round(time_budget_sec)) if time_budget_sec > 0 else 0
            budget_text = f"{budget}s" if budget else "its configured"
            if mode in {"quick", "fast"}:
                reply = f"Quick mode hit {budget_text} time budget before finishing. Try atlas-smart for a deeper answer."
            elif mode == "smart":
                reply = f"Smart mode hit {budget_text} time budget before finishing. Try atlas-genius or ask a narrower follow-up."
            else:
                reply = "I ran out of time before I could finish this answer."
        scores = _default_scores()
    except LLMLimitReached:
        if not reply:
            reply = "I started working on this but hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
        scores = _default_scores()
    finally:
        elapsed = round(time.monotonic() - started, 2)
        log.info(
            "atlasbot_answer",
            extra={
                "extra": {
                    "mode": mode,
                    "seconds": elapsed,
                    "llm_calls": call_count,
                    "limit": call_cap,
                    "limit_hit": limit_hit,
                    "time_budget_sec": time_budget_sec,
                    "time_budget_hit": time_budget_hit,
                }
            },
        )
    if limit_hit and "run limitless" not in reply.lower():
        reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
    if conversation_id and claims:
        engine._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
    return AnswerResult(
        reply,
        scores,
        _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started),
    )
--- a/atlasbot/engine/answerer/workflow_post.py
+++ b/atlasbot/engine/answerer/workflow_post.py
@ -0,0 +1,170 @@
 from __future__ import annotations
 import json
 import re
 from collections.abc import Callable
 from typing import Any
 from atlasbot.llm import prompts
 from ._base import *
 from .common import *
 from .post import *
 from .post_ext import *
 from .retrieval import *
 from .spine import *
 async def finalize_answer(*, engine: Any, call_llm: Callable[..., Any], normalized: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, summary: dict[str, Any], summary_lines: list[str], metric_facts: list[str], key_facts: list[str], facts_used: list[str], allowed_nodes: list[str], allowed_namespaces: list[str], runbook_paths: list[str], lowered_question: str, force_metric: bool, keyword_tokens: list[str], question_tokens: list[str], snapshot_context: str, observer: Callable[[str, str], None] | None, mode: str, metric_keys: list[str] | None = None) -> tuple[str, AnswerScores, list[ClaimItem]]:  # noqa: C901
    """Synthesize and post-process the final answer."""
    reply = await engine._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
    unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
    unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
    runbook_fix = _needs_runbook_fix(reply, runbook_paths)
    runbook_needed = _needs_runbook_reference(normalized, runbook_paths, reply)
    needs_evidence = _needs_evidence_fix(reply, classify)
    hardware_terms = ("rpi", "raspberry", "jetson", "amd64", "arm64", "hardware")
    hardware_line = _line_starting_with(summary_lines, "hardware_nodes:")
    if any(term in lowered_question for term in hardware_terms) and hardware_line:
        needs_evidence = True
    if metric_facts and (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
        needs_evidence = True
    if classify.get("question_type") in {"open_ended", "planning"} and metric_facts:
        needs_evidence = True
    resolved_runbook = None
    if runbook_paths and (runbook_fix or runbook_needed):
        resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
        resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
        resolver = _parse_json_block(resolver_raw, fallback={})
        candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
        if candidate and candidate in runbook_paths:
            resolved_runbook = candidate
    if (snapshot_context and needs_evidence) or unknown_nodes or unknown_namespaces or runbook_fix or runbook_needed:
        if observer:
            observer("evidence_fix", "repairing missing evidence")
        extra_bits = []
        if unknown_nodes:
            extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
        if unknown_namespaces:
            extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
        if runbook_paths:
            extra_bits.append("AllowedRunbooks: " + ", ".join(runbook_paths))
        if resolved_runbook:
            extra_bits.append("ResolvedRunbook: " + resolved_runbook)
        if metric_facts:
            extra_bits.append("MustUseFacts: " + "; ".join(metric_facts[:4]))
        if hardware_line:
            extra_bits.append("HardwareNodes: " + hardware_line)
        if allowed_nodes:
            extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
        if allowed_namespaces:
            extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
        fix_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + ("\n" + "\n".join(extra_bits) if extra_bits else "")
        reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, fix_prompt, context=context, model=plan.model, tag="evidence_fix")
        if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
            enforce_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nMustIncludeFacts: " + "; ".join(metric_facts[:6]) + "\nInstruction: The answer must include all MustIncludeFacts items."
            reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, enforce_prompt, context=context, model=plan.model, tag="evidence_fix_enforce")
    if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
        direct_candidates = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3) if 'metric_keys' in locals() and metric_keys else summary_lines
        direct_line = _select_metric_line(direct_candidates, normalized, _merge_tokens(keyword_tokens, question_tokens))
        if direct_line:
            direct_prompt = f"Question: {normalized}\nFact: {direct_line}\nAnswer using the fact."
            reply = await call_llm(prompts.ANSWER_SYSTEM, direct_prompt, context="", model=plan.fast_model, tag="metric_direct")
            if (mode == "quick" and any(term in normalized.lower() for term in ("how many", "count", "total"))) or not _reply_matches_metric_facts(reply, [direct_line], _merge_tokens(keyword_tokens, question_tokens)):
                reply = _format_direct_metric_line(direct_line)
    if "raspberry" in lowered_question and "not" in lowered_question:
        non_rpi = _non_rpi_nodes(summary)
        if non_rpi:
            reply = _format_hardware_groups(non_rpi, "Non-Raspberry Pi nodes")
        if unknown_nodes or unknown_namespaces:
            refreshed_nodes = _find_unknown_nodes(reply, allowed_nodes)
            refreshed_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
            if refreshed_nodes or refreshed_namespaces:
                reply = _strip_unknown_entities(reply, refreshed_nodes, refreshed_namespaces)
        if runbook_paths and resolved_runbook and _needs_runbook_reference(normalized, runbook_paths, reply):
            if observer:
                observer("runbook_enforce", "enforcing runbook path")
            enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=resolved_runbook)
            reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
        if runbook_paths:
            invalid = [token for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply) if token.lower() not in {p.lower() for p in runbook_paths}]
            if invalid:
                if observer:
                    observer("runbook_enforce", "replacing invalid runbook path")
                resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
                resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
                resolver = _parse_json_block(resolver_raw, fallback={})
                candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
                if not (candidate and candidate in runbook_paths):
                    candidate = _best_runbook_match(invalid[0], runbook_paths)
                if candidate and candidate in runbook_paths:
                    enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=candidate)
                    reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
        reply = _strip_unknown_entities(reply, unknown_nodes, unknown_namespaces)
    if facts_used and _needs_evidence_guard(reply, facts_used):
        if observer:
            observer("evidence_guard", "tightening unsupported claims")
        use_guard = True
        if mode in {"smart", "genius"}:
            decision = await _contradiction_decision(ContradictionContext(call_llm, normalized, reply, facts_used, plan), attempts=3 if mode == "genius" else 1)
            use_guard = decision.get("use_facts", True)
        if use_guard:
            guard_prompt = prompts.EVIDENCE_GUARD_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nFactsUsed:\n" + "\n".join(facts_used)
            reply = await call_llm(prompts.EVIDENCE_GUARD_SYSTEM, guard_prompt, context=context, model=plan.model, tag="evidence_guard")
    if _needs_focus_fix(normalized, reply, classify):
        if observer:
            observer("focus_fix", "tightening answer")
        reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, prompts.FOCUS_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply, context=context, model=plan.model, tag="focus_fix")
        if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
            best_line = _best_keyword_line(summary_lines, keyword_tokens)
            if best_line:
                reply = f"Latest metrics: {best_line}."
    if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
        best_line = None
        lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
        for line in metric_facts:
            if any(kw in line.lower() for kw in lowered_keywords):
                best_line = line
                break
        best_line = best_line or metric_facts[0]
        reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
        fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
        if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
            reply = f"Latest metrics: {best_line}."
    if _should_use_insight_guard(classify):
        if observer:
            observer("insight_guard", "checking for concrete signals")
        reply = await _apply_insight_guard(InsightGuardInput(question=normalized, reply=reply, classify=classify, context=context, plan=plan, call_llm=call_llm, facts=metric_facts or key_facts))
    if plan.use_critic:
        if observer:
            observer("critic", "reviewing")
        critic_prompt = prompts.CRITIC_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
        critic_raw = await call_llm(prompts.CRITIC_SYSTEM, critic_prompt, context=context, model=plan.model, tag="critic")
        critic = _parse_json_block(critic_raw, fallback={})
        if critic.get("issues"):
            revise_prompt = prompts.REVISION_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nCritique: " + json.dumps(critic)
            reply = await call_llm(prompts.REVISION_SYSTEM, revise_prompt, context=context, model=plan.model, tag="revise")
    if plan.use_gap:
        if observer:
            observer("gap", "checking gaps")
        gap_prompt = prompts.EVIDENCE_GAP_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
        gap_raw = await call_llm(prompts.GAP_SYSTEM, gap_prompt, context=context, model=plan.fast_model, tag="gap")
        gap = _parse_json_block(gap_raw, fallback={})
        note = str(gap.get("note") or "").strip()
        if note:
            reply = f"{reply}\n\n{note}"
    reply = await engine._dedup_reply(reply, plan, call_llm, tag="dedup")
    scores = await engine._score_answer(normalized, reply, plan, call_llm)
    claims = await engine._extract_claims(normalized, reply, summary, facts_used, call_llm)
    return reply, scores, claims
--- a/atlasbot/engine/intent_router.py
+++ b/atlasbot/engine/intent_router.py
@ -1,35 +1,46 @@
 from __future__ import annotations
 from dataclasses import dataclass
 import re
 from dataclasses import dataclass
@dataclass(frozen=True)
 class IntentMatch:
    """Describe the best cluster intent match for a user question."""
    kind: str
    score: int
-_COUNT_TERMS = r"(how\\s+many|count|number\\s+of|total|totals|tally|amount\\s+of|quantity|sum\\s+of|overall|in\\s+total|all\\s+up)"
+_COUNT_TERMS = r"(how\s+many|count|number\s+of|total|totals|tally|amount\s+of|quantity|sum\s+of|overall|in\s+total|all\s+up)"
-_NODE_TERMS = r"(nodes?|workers?|worker\\s+nodes?|cluster\\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\\s]?plane|control\\s+plane)"
+_NODE_TERMS = r"(nodes?|workers?|worker\s+nodes?|cluster\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\s]?plane|control\s+plane)"
-_READY_TERMS = r"(ready|unready|not\\s+ready|down|offline|not\\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
+_READY_TERMS = r"(ready|unready|not\s+ready|down|offline|not\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
 _HOTTEST_TERMS = r"(hottest|hot|highest|max(?:imum)?|peak|top|most|worst|spikiest|heaviest|largest|biggest|noisiest|loudest)"
-_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\\s+avg|load\\s+average|util(?:ization)?|usage)"
+_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\s+avg|load\s+average|util(?:ization)?|usage)"
 _RAM_TERMS = r"(ram|memory|mem|heap|rss|resident|swap)"
 _NET_TERMS = r"(net|network|bandwidth|throughput|traffic|rx|tx|ingress|egress|bits|bytes|packets|pps|bps)"
-_IO_TERMS = r"(\\bio\\b|i/o|disk\\s+io|disk\\s+activity|read/?write|storage\\s+io|iops|latency)"
+_IO_TERMS = r"(\bio\b|i/o|disk\s+io|disk\s+activity|read/?write|storage\s+io|iops|latency)"
-_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\\bspace\\b|full|usage)"
+_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\bspace\b|full|usage)"
-_PG_TERMS = r"(postgres|postgresql|pg\\b|database|db|sql|psql)"
+_PG_TERMS = r"(postgres|postgresql|pg\b|database|db|sql|psql)"
-_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\\s+connections?|open\\s+connections?)"
+_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\s+connections?|open\s+connections?)"
-_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\\s+load)"
+_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\s+load)"
-_NAMESPACE_TERMS = r"(namespace|namespaces|ns\\b|tenant|workload\\s+namespace)"
+_NAMESPACE_TERMS = r"(namespace|namespaces|ns\b|tenant|workload\s+namespace)"
 _PODS_TERMS = r"(pods?|workloads?|tasks?|containers?|deployments?|jobs?|cronjobs?|daemonsets?|statefulsets?)"
-_NON_RPI_TERMS = r"(non[-\\s]?raspberry|not\\s+raspberry|non[-\\s]?rpi|not\\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\\b(?!.*rpi))"
+_NON_RPI_TERMS = r"(non[-\s]?raspberry|not\s+raspberry|non[-\s]?rpi|not\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\b(?!.*rpi))"
-_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\\s+capacity|near\\s+limit)"
+_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\s+capacity|near\s+limit)"
-_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\\s+types?)"
+_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\s+types?)"
 def route_intent(question: str) -> IntentMatch | None:
    """Classify a question into a deterministic cluster intent.
    Input:
    - `question`: user text to inspect.
    Output:
    - the highest-confidence `IntentMatch`, or `None` when no intent fits.
    """
    text = (question or "").lower()
    if not text:
        return None
@ -44,13 +55,13 @@ def route_intent(question: str) -> IntentMatch | None:
        return any(_has(pat) for pat in patterns)
    intents = [
        (lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
        (
            lambda: _all(_READY_TERMS) and (_any(_NODE_TERMS) or "cluster" in text or "workers" in text),
            IntentMatch("nodes_ready", 85),
        ),
        (lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
        (lambda: _all(_NON_RPI_TERMS) and (_any(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_non_rpi", 80)),
-        (lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("hardware_mix", 75)),
+        (lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text or "mix" in text), IntentMatch("hardware_mix", 75)),
        (lambda: _all(_HOTTEST_TERMS, _CPU_TERMS), IntentMatch("hottest_cpu", 80)),
        (lambda: _all(_HOTTEST_TERMS, _RAM_TERMS), IntentMatch("hottest_ram", 80)),
        (lambda: _all(_HOTTEST_TERMS, _NET_TERMS), IntentMatch("hottest_net", 80)),
--- a/atlasbot/knowledge/loader.py
+++ b/atlasbot/knowledge/loader.py
@ -7,6 +7,8 @@ log = logging.getLogger(__name__)
 class KnowledgeBase:
    """Load Atlas knowledge-base files and expose summary snippets."""
    def __init__(self, base_dir: str) -> None:
        self._base = Path(base_dir) if base_dir else None
        self._atlas: dict[str, Any] = {}
@ -14,6 +16,8 @@ class KnowledgeBase:
        self._loaded = False
    def load(self) -> None:
        """Load catalog files once so subsequent reads stay cheap."""
        if self._loaded or not self._base:
            return
        self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
@ -30,6 +34,8 @@ class KnowledgeBase:
            return {}
    def summary(self) -> str:
        """Return a short human-readable KB summary for prompt context."""
        self.load()
        if not self._atlas:
            return ""
@ -42,12 +48,14 @@ class KnowledgeBase:
        if services:
            parts.append(f"Services indexed: {len(services)}.")
        if isinstance(self._atlas, dict):
-            keys = [key for key in self._atlas.keys() if key not in {"sources"}]
+            keys = [key for key in self._atlas if key not in {"sources"}]
            if keys:
                parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
        return " ".join(parts)
    def runbook_titles(self, *, limit: int = 5) -> str:
        """Render the top runbook titles for prompt context."""
        self.load()
        if not self._runbooks:
            return ""
@ -64,6 +72,8 @@ class KnowledgeBase:
        return "Relevant runbooks:\n" + "\n".join(titles[:limit])
    def runbook_paths(self, *, limit: int = 10) -> list[str]:
        """Return the runbook paths used for exact-path enforcement."""
        self.load()
        if not self._runbooks:
            return []
@ -77,6 +87,8 @@ class KnowledgeBase:
        return paths[:limit]
    def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
        """Collect KB excerpts into prompt-sized chunks."""
        self.load()
        if not self._base:
            return []
--- a/atlasbot/llm/client.py
+++ b/atlasbot/llm/client.py
@ -17,6 +17,8 @@ class LLMError(RuntimeError):
 class LLMClient:
    """Wrap the Ollama chat endpoint with retries and fallback-model support."""
    def __init__(self, settings: Settings) -> None:
        self._settings = settings
        self._timeout = settings.ollama_timeout_sec
@ -37,6 +39,8 @@ class LLMClient:
        model: str | None = None,
        timeout_sec: float | None = None,
    ) -> str:
        """Send a chat request and return the model content text."""
        payload = {
            "model": model or self._settings.ollama_model,
            "messages": messages,
@ -77,6 +81,8 @@ class LLMClient:
 def build_messages(system: str, prompt: str, *, context: str | None = None) -> list[dict[str, str]]:
    """Assemble the minimal chat message list used by the answer pipeline."""
    messages: list[dict[str, str]] = [{"role": "system", "content": system}]
    if context:
        messages.append({"role": "user", "content": "Context (grounded facts):\n" + context})
@ -85,6 +91,8 @@ def build_messages(system: str, prompt: str, *, context: str | None = None) -> l
 def parse_json(text: str, *, fallback: dict[str, Any] | None = None) -> dict[str, Any]:
    """Parse a JSON blob from model output and fall back to a safe default."""
    try:
        raw = text.strip()
        if raw.startswith("`"):
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -253,7 +253,7 @@ CONTRADICTION_PROMPT = (
    "Question: {question}\n"
    "Draft: {draft}\n"
    "FactsUsed:\n{facts}\n\n"
-    "Return JSON: {\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}"
+    "Return JSON: {{\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}}"
 )
 CANDIDATE_SELECT_SYSTEM = (
--- a/atlasbot/logging.py
+++ b/atlasbot/logging.py
@ -1,13 +1,17 @@
 import json
 import logging
 import sys
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 class JsonFormatter(logging.Formatter):
    """Emit structured log records for the atlasbot services."""
    def format(self, record: logging.LogRecord) -> str:
        """Render a log record as JSON for downstream ingestion."""
        payload = {
-            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "timestamp": datetime.now(UTC).isoformat(),
            "level": record.levelname.lower(),
            "logger": record.name,
            "message": record.getMessage(),
@ -21,6 +25,8 @@ class JsonFormatter(logging.Formatter):
 def configure_logging(level: str = "INFO") -> None:
    """Install JSON logging on the process root logger."""
    root = logging.getLogger()
    root.setLevel(level.upper())
    handler = logging.StreamHandler(sys.stdout)
--- a/atlasbot/main.py
+++ b/atlasbot/main.py
@ -17,6 +17,8 @@ log = logging.getLogger(__name__)
 def _build_engine(settings) -> AnswerEngine:
    """Construct the answer engine from the configured backends."""
    kb = KnowledgeBase(settings.kb_dir)
    snapshot = SnapshotProvider(settings)
    llm = LLMClient(settings)
@ -24,6 +26,8 @@ def _build_engine(settings) -> AnswerEngine:
 async def main() -> None:
    """Start the HTTP API, Matrix bots, and queue worker."""
    settings = load_settings()
    configure_logging("INFO")
@ -45,14 +49,7 @@ async def main() -> None:
    queue = QueueManager(settings, handler)
    await queue.start()
-    async def answer_handler(  # noqa: PLR0913
+    async def answer_handler(question: str, mode: str, history=None, conversation_id=None, snapshot_pin: bool | None = None, observer=None) -> AnswerResult:
        question: str,
        mode: str,
        history=None,
        conversation_id=None,
        snapshot_pin: bool | None = None,
        observer=None,
    ) -> AnswerResult:
        if settings.queue_enabled:
            payload = await queue.submit(
                {
@ -86,6 +83,8 @@ async def main() -> None:
 def result_scores(payload: dict[str, object]) -> AnswerScores:
    """Coerce a queue payload into the public `AnswerScores` shape."""
    scores = payload.get("scores") if isinstance(payload, dict) else None
    if isinstance(scores, dict):
        try:
--- a/atlasbot/matrix/bot.py
+++ b/atlasbot/matrix/bot.py
@ -15,11 +15,15 @@ log = logging.getLogger(__name__)
 class MatrixClient:
    """Wrap the Matrix client endpoints used by the bot runtime."""
    def __init__(self, settings: Settings, bot: MatrixBotConfig) -> None:
        self._settings = settings
        self._bot = bot
    async def login(self) -> str:
        """Exchange bot credentials for a Matrix access token."""
        payload = {
            "type": "m.login.password",
            "identifier": {"type": "m.id.user", "user": self._bot.username},
@ -33,6 +37,8 @@ class MatrixClient:
        return data.get("access_token", "")
    async def resolve_room(self, token: str) -> str:
        """Resolve the configured room alias into a room id."""
        alias = quote(self._settings.room_alias, safe="")
        url = f"{self._settings.matrix_base}/_matrix/client/v3/directory/room/{alias}"
        headers = {"Authorization": f"Bearer {token}"}
@ -50,12 +56,16 @@ class MatrixClient:
        return data.get("room_id", "")
    async def join_room(self, token: str, room_id: str) -> None:
        """Join the target room if the bot is not already present."""
        url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/join"
        headers = {"Authorization": f"Bearer {token}"}
        async with httpx.AsyncClient(timeout=15.0) as client:
            await client.post(url, headers=headers)
    async def send_message(self, token: str, room_id: str, text: str) -> None:
        """Send a plain text message to the Matrix room."""
        url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
        headers = {"Authorization": f"Bearer {token}"}
        payload = {"msgtype": "m.text", "body": text}
@ -63,6 +73,8 @@ class MatrixClient:
            await client.post(url, json=payload, headers=headers)
    async def sync(self, token: str, since: str | None) -> dict[str, Any]:
        """Fetch the incremental Matrix sync payload."""
        base = f"{self._settings.matrix_base}/_matrix/client/v3/sync"
        params = {"timeout": 30000}
        if since:
@ -75,17 +87,9 @@ class MatrixClient:
 class MatrixBot:
-    def __init__(
+    """Drive Matrix conversation handling and heartbeat replies."""
-        self,
+
-        settings: Settings,
+    def __init__(self, settings: Settings, bot: MatrixBotConfig, engine: AnswerEngine, answer_handler: Callable[[str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None], Awaitable[AnswerResult]] | None = None) -> None:
        bot: MatrixBotConfig,
        engine: AnswerEngine,
        answer_handler: Callable[
            [str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None],
            Awaitable[AnswerResult],
        ]
        | None = None,
    ) -> None:
        self._settings = settings
        self._bot = bot
        self._engine = engine
@ -94,6 +98,8 @@ class MatrixBot:
        self._history: dict[str, list[dict[str, str]]] = {}
    async def run(self) -> None:
        """Continuously bootstrap, sync, and answer Matrix events."""
        while True:
            try:
                token = await self._client.login()
--- a/atlasbot/queue/nats.py
+++ b/atlasbot/queue/nats.py
@ -1,7 +1,8 @@
 import asyncio
 import json
 import logging
-from typing import Any, Awaitable, Callable
+from collections.abc import Awaitable, Callable
 from typing import Any
 from nats.aio.client import Client as NATS
 from nats.js.errors import NotFoundError
@ -12,6 +13,8 @@ log = logging.getLogger(__name__)
 class QueueManager:
    """Manage optional NATS-backed work queue processing."""
    def __init__(self, settings: Settings, handler: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> None:
        self._settings = settings
        self._handler = handler
@ -20,6 +23,8 @@ class QueueManager:
        self._worker_task: asyncio.Task | None = None
    async def start(self) -> None:
        """Connect to NATS and start the worker loop when queueing is enabled."""
        if not self._settings.queue_enabled:
            return
        self._nc = NATS()
@ -29,12 +34,16 @@ class QueueManager:
        self._worker_task = asyncio.create_task(self._worker_loop())
    async def stop(self) -> None:
        """Drain the NATS connection and cancel background work."""
        if self._worker_task:
            self._worker_task.cancel()
        if self._nc:
            await self._nc.drain()
    async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
        """Submit work to NATS or fall back to direct handling."""
        if not self._settings.queue_enabled:
            return await self._handler(payload)
        if not self._nc or not self._js:
--- a/atlasbot/snapshot/builder.py
+++ b/atlasbot/snapshot/builder.py
--- a/atlasbot/snapshot/builder/init.py
+++ b/atlasbot/snapshot/builder/init.py
@ -0,0 +1,8 @@
 """Snapshot summary builder and text render helpers."""
 from .core_a import *
 from .core_b import *
 from .format_a import *
 from .format_b import *
 from .format_c import *
 from .summary_text import *
--- a/atlasbot/snapshot/builder/core_a.py
+++ b/atlasbot/snapshot/builder/core_a.py
@ -0,0 +1,492 @@
 from __future__ import annotations
 import logging
 import time
 from typing import Any
 import httpx
 from atlasbot.config import Settings
 log = logging.getLogger(__name__)
 PVC_USAGE_CRITICAL = 90
 _BYTES_KB = 1024
 _BYTES_MB = 1024 * 1024
 _BYTES_GB = 1024 * 1024 * 1024
 _VALUE_PAIR_LEN = 2
 class SnapshotProvider:
    """Fetch and cache the Ariadne snapshot used by the answer engine."""
    def __init__(self, settings: Settings) -> None:
        self._settings = settings
        self._cache: dict[str, Any] = {}
        self._cache_ts = 0.0
    def _cache_valid(self) -> bool:
        return time.monotonic() - self._cache_ts < max(5, self._settings.snapshot_ttl_sec)
    def get(self) -> dict[str, Any] | None:
        """Return the cached snapshot or refresh it from Ariadne."""
        if self._cache and self._cache_valid():
            return self._cache
        if not self._settings.ariadne_state_url:
            return self._cache or None
        headers = {}
        if self._settings.ariadne_state_token:
            headers["x-internal-token"] = self._settings.ariadne_state_token
        try:
            resp = httpx.get(self._settings.ariadne_state_url, headers=headers, timeout=10.0)
            resp.raise_for_status()
            payload = resp.json()
            if isinstance(payload, dict):
                self._cache = payload
                self._cache_ts = time.monotonic()
                return payload
        except Exception as exc:
            log.warning("snapshot fetch failed", extra={"extra": {"error": str(exc)}})
        return self._cache or None
 def _node_usage_top(series: list[dict[str, Any]]) -> dict[str, Any] | None:
    best = None
    for entry in series or []:
        if not isinstance(entry, dict):
            continue
        node = entry.get("node")
        value = entry.get("value")
        try:
            numeric = float(value)
        except (TypeError, ValueError):
            continue
        if best is None or numeric > best["value"]:
            best = {"node": node, "value": numeric}
    return best
 def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
    """Condense a raw snapshot into the summary shape used for prompts."""
    if not snapshot:
        return {}
    from .core_b import (
        _build_flux,
        _build_hottest,
        _build_namespace_capacity,
        _build_namespace_capacity_summary,
        _build_node_load_summary,
        _build_pvc,
        _build_workloads,
    )
    from .format_c import _build_cluster_watchlist
    nodes_detail = _nodes_detail(snapshot)
    metrics = _metrics(snapshot)
    summary: dict[str, Any] = {}
    if isinstance(snapshot.get("nodes_summary"), dict):
        summary["nodes_summary"] = snapshot.get("nodes_summary")
    if metrics:
        summary["metrics"] = metrics
    if isinstance(snapshot.get("jobs"), dict):
        summary["jobs"] = snapshot.get("jobs")
    summary.update(_build_nodes(snapshot))
    summary.update(_build_pressure(snapshot))
    summary.update(_build_hardware(nodes_detail))
    summary.update(_build_hardware_by_node(nodes_detail))
    summary.update(_build_hardware_usage(metrics, summary.get("hardware_by_node")))
    summary.update(_build_node_facts(nodes_detail))
    summary.update(_build_node_ages(nodes_detail))
    summary.update(_build_node_taints(nodes_detail))
    summary.update(_build_capacity(metrics))
    summary.update(_build_pods(metrics))
    summary.update(_build_namespace_pods(snapshot))
    summary.update(_build_namespace_nodes(snapshot))
    summary.update(_build_node_pods(snapshot))
    summary.update(_build_node_pods_top(metrics))
    summary.update(_build_pod_issues(snapshot))
    summary.update(_build_workload_health(snapshot))
    summary.update(_build_events(snapshot))
    summary.update(_build_event_summary(snapshot))
    summary.update(_build_postgres(metrics))
    summary.update(_build_hottest(metrics))
    summary.update(_build_pvc(metrics))
    summary.update(_build_namespace_capacity(metrics))
    summary.update(_build_namespace_capacity_summary(metrics))
    summary.update(_build_longhorn(snapshot))
    summary.update(_build_root_disk_headroom(metrics))
    summary.update(_build_node_load(metrics))
    summary.update(_build_node_load_summary(metrics))
    summary.update(_build_cluster_watchlist(summary))
    summary.update(_build_workloads(snapshot))
    summary.update(_build_flux(snapshot))
    _merge_cluster_summary(snapshot, summary)
    _augment_lexicon(summary)
    return summary
 def _merge_cluster_summary(snapshot: dict[str, Any], summary: dict[str, Any]) -> None:
    cluster_summary = snapshot.get("summary") if isinstance(snapshot.get("summary"), dict) else {}
    if not cluster_summary:
        return
    _merge_cluster_fields(
        summary,
        cluster_summary,
        {
            "signals": list,
            "profiles": dict,
            "inventory": dict,
            "topology": dict,
            "lexicon": dict,
            "cross_stats": dict,
            "baseline_deltas": dict,
            "pod_issue_summary": dict,
            "trend_requests": dict,
            "pod_waiting_trends": dict,
            "pod_terminated_trends": dict,
        },
    )
 def _merge_cluster_fields(summary: dict[str, Any], cluster_summary: dict[str, Any], field_types: dict[str, type]) -> None:
    for key, expected in field_types.items():
        value = cluster_summary.get(key)
        if isinstance(value, expected):
            summary[key] = value
 def _augment_lexicon(summary: dict[str, Any]) -> None:
    lexicon = summary.get("lexicon")
    if not isinstance(lexicon, dict):
        lexicon = {"terms": [], "aliases": {}}
    terms = list(lexicon.get("terms") or [])
    aliases = dict(lexicon.get("aliases") or {})
    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
    hardware_map = {
        "rpi5": "Raspberry Pi 5 nodes",
        "rpi4": "Raspberry Pi 4 nodes",
        "rpi": "Raspberry Pi nodes",
        "jetson": "NVIDIA Jetson nodes",
        "amd64": "AMD64 nodes",
    }
    existing_terms = {entry.get("term") for entry in terms if isinstance(entry, dict)}
    for key, meaning in hardware_map.items():
        if key not in hardware:
            continue
        if key not in existing_terms:
            terms.append({"term": key, "meaning": meaning})
        if key not in aliases:
            aliases[key] = meaning
    if "raspberry pi 5" not in aliases and "rpi5" in hardware:
        aliases["raspberry pi 5"] = "rpi5"
    if "raspberry pi 4" not in aliases and "rpi4" in hardware:
        aliases["raspberry pi 4"] = "rpi4"
    lexicon["terms"] = terms
    lexicon["aliases"] = aliases
    summary["lexicon"] = lexicon
 def _nodes_detail(snapshot: dict[str, Any]) -> list[dict[str, Any]]:
    items = snapshot.get("nodes_detail")
    return items if isinstance(items, list) else []
 def _metrics(snapshot: dict[str, Any]) -> dict[str, Any]:
    metrics = snapshot.get("metrics")
    return metrics if isinstance(metrics, dict) else {}
 def _build_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
    nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
    if not nodes_summary:
        return {}
    return {
        "nodes": {
            "total": nodes_summary.get("total"),
            "ready": nodes_summary.get("ready"),
            "not_ready": nodes_summary.get("not_ready"),
        }
    }
 def _build_pressure(snapshot: dict[str, Any]) -> dict[str, Any]:
    nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
    pressure = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary.get("pressure_nodes"), dict) else {}
    if not pressure:
        return {}
    return {"pressure_nodes": pressure}
 def _build_hardware(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
    hardware: dict[str, list[str]] = {}
    for node in nodes_detail or []:
        if not isinstance(node, dict):
            continue
        name = node.get("name")
        hardware_class = node.get("hardware") or "unknown"
        if name:
            hardware.setdefault(hardware_class, []).append(name)
    if not hardware:
        return {}
    return {"hardware": {key: sorted(value) for key, value in hardware.items()}}
 def _build_hardware_by_node(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
    mapping: dict[str, str] = {}
    for node in nodes_detail or []:
        if not isinstance(node, dict):
            continue
        name = node.get("name")
        if isinstance(name, str) and name:
            hardware = node.get("hardware") or "unknown"
            mapping[name] = str(hardware)
    return {"hardware_by_node": mapping} if mapping else {}
 def _build_hardware_usage(metrics: dict[str, Any], hardware_by_node: dict[str, Any] | None) -> dict[str, Any]:  # noqa: C901
    if not isinstance(hardware_by_node, dict) or not hardware_by_node:
        return {}
    node_load = metrics.get("node_load") if isinstance(metrics.get("node_load"), list) else []
    if not node_load:
        return {}
    buckets: dict[str, dict[str, list[float]]] = {}
    for entry in node_load:
        if not isinstance(entry, dict):
            continue
        node = entry.get("node")
        if not isinstance(node, str) or not node:
            continue
        hardware = hardware_by_node.get(node, "unknown")
        bucket = buckets.setdefault(str(hardware), {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []})
        for key in ("load_index", "cpu", "ram", "net", "io"):
            value = entry.get(key)
            if isinstance(value, (int, float)):
                bucket[key].append(float(value))
    output: list[dict[str, Any]] = []
    for hardware, metrics_bucket in buckets.items():
        row: dict[str, Any] = {"hardware": hardware}
        for key, values in metrics_bucket.items():
            if values:
                row[key] = sum(values) / len(values)
        output.append(row)
    output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or ""))
    return {"hardware_usage_avg": output}
 def _build_node_ages(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
    ages: list[dict[str, Any]] = []
    for node in nodes_detail or []:
        if not isinstance(node, dict):
            continue
        name = node.get("name")
        age = node.get("age_hours")
        if name and isinstance(age, (int, float)):
            ages.append({"name": name, "age_hours": age})
    ages.sort(key=lambda item: -(item.get("age_hours") or 0))
    return {"node_ages": ages[:5]} if ages else {}
 def _count_values(nodes_detail: list[dict[str, Any]], key: str) -> dict[str, int]:
    counts: dict[str, int] = {}
    for node in nodes_detail or []:
        if not isinstance(node, dict):
            continue
        value = node.get(key)
        if isinstance(value, str) and value:
            counts[value] = counts.get(value, 0) + 1
    return counts
 def _build_node_facts(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
    if not nodes_detail:
        return {}
    role_counts: dict[str, int] = {}
    for node in nodes_detail:
        if not isinstance(node, dict):
            continue
        if node.get("is_worker"):
            role_counts["worker"] = role_counts.get("worker", 0) + 1
        roles = node.get("roles")
        if isinstance(roles, list):
            for role in roles:
                if isinstance(role, str) and role:
                    role_counts[role] = role_counts.get(role, 0) + 1
    return {
        "node_arch_counts": _count_values(nodes_detail, "arch"),
        "node_os_counts": _count_values(nodes_detail, "os"),
        "node_kubelet_versions": _count_values(nodes_detail, "kubelet"),
        "node_kernel_versions": _count_values(nodes_detail, "kernel"),
        "node_runtime_versions": _count_values(nodes_detail, "container_runtime"),
        "node_role_counts": role_counts,
    }
 def _build_node_taints(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
    taints: dict[str, list[str]] = {}
    for node in nodes_detail or []:
        if not isinstance(node, dict):
            continue
        name = node.get("name")
        if not name:
            continue
        entries = node.get("taints") if isinstance(node.get("taints"), list) else []
        for entry in entries:
            if not isinstance(entry, dict):
                continue
            key = entry.get("key")
            effect = entry.get("effect")
            if isinstance(key, str) and isinstance(effect, str):
                label = f"{key}:{effect}"
                taints.setdefault(label, []).append(name)
    if not taints:
        return {}
    return {"node_taints": {key: sorted(names) for key, names in taints.items()}}
 def _build_root_disk_headroom(metrics: dict[str, Any]) -> dict[str, Any]:
    node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
    disk = node_usage.get("disk") if isinstance(node_usage.get("disk"), list) else []
    if not disk:
        return {}
    entries = []
    for entry in disk:
        if not isinstance(entry, dict):
            continue
        node = entry.get("node")
        try:
            used_pct = float(entry.get("value"))
        except (TypeError, ValueError):
            continue
        headroom = max(0.0, 100.0 - used_pct)
        if node:
            entries.append({"node": node, "headroom_pct": headroom, "used_pct": used_pct})
    entries.sort(key=lambda item: (item.get("headroom_pct") or 0.0, item.get("node") or ""))
    return {"root_disk_low_headroom": entries[:5]} if entries else {}
 def _build_longhorn(snapshot: dict[str, Any]) -> dict[str, Any]:
    longhorn = snapshot.get("longhorn")
    return {"longhorn": longhorn} if isinstance(longhorn, dict) and longhorn else {}
 def _build_node_load(metrics: dict[str, Any]) -> dict[str, Any]:
    node_load = metrics.get("node_load")
    if not isinstance(node_load, list) or not node_load:
        return {}
    return {"node_load": node_load}
 def _build_pods(metrics: dict[str, Any]) -> dict[str, Any]:
    pods = {
        "running": metrics.get("pods_running"),
        "pending": metrics.get("pods_pending"),
        "failed": metrics.get("pods_failed"),
        "succeeded": metrics.get("pods_succeeded"),
    }
    if not any(value is not None for value in pods.values()):
        return {}
    return {"pods": pods}
 def _build_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
    if not metrics:
        return {}
    capacity = {
        "cpu": metrics.get("capacity_cpu"),
        "allocatable_cpu": metrics.get("allocatable_cpu"),
        "mem_bytes": metrics.get("capacity_mem_bytes"),
        "allocatable_mem_bytes": metrics.get("allocatable_mem_bytes"),
        "pods": metrics.get("capacity_pods"),
        "allocatable_pods": metrics.get("allocatable_pods"),
    }
    if not any(value is not None for value in capacity.values()):
        return {}
    return {"capacity": capacity}
 def _build_namespace_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
    namespaces = snapshot.get("namespace_pods")
    if not isinstance(namespaces, list) or not namespaces:
        return {}
    return {"namespace_pods": namespaces}
 def _build_namespace_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
    namespace_nodes = snapshot.get("namespace_nodes")
    if not isinstance(namespace_nodes, list) or not namespace_nodes:
        return {}
    return {"namespace_nodes": namespace_nodes}
 def _build_node_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
    node_pods = snapshot.get("node_pods")
    if not isinstance(node_pods, list) or not node_pods:
        return {}
    return {"node_pods": node_pods}
 def _build_node_pods_top(metrics: dict[str, Any]) -> dict[str, Any]:
    top = metrics.get("node_pods_top")
    if not isinstance(top, list) or not top:
        return {}
    return {"node_pods_top": top}
 def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]:
    pod_issues = snapshot.get("pod_issues")
    if not isinstance(pod_issues, dict) or not pod_issues:
        return {}
    return {"pod_issues": pod_issues}
 def _build_workload_health(snapshot: dict[str, Any]) -> dict[str, Any]:
    health = snapshot.get("workloads_health")
    if not isinstance(health, dict) or not health:
        return {}
    deployments = health.get("deployments")
    statefulsets = health.get("statefulsets")
    daemonsets = health.get("daemonsets")
    if not isinstance(deployments, dict) or not isinstance(statefulsets, dict) or not isinstance(daemonsets, dict):
        return {}
    return {
        "workloads_health": {
            "deployments": deployments,
            "statefulsets": statefulsets,
            "daemonsets": daemonsets,
        }
    }
 def _build_events(snapshot: dict[str, Any]) -> dict[str, Any]:
    events = snapshot.get("events")
    if not isinstance(events, dict) or not events:
        return {}
    return {"events": events}
 def _build_event_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
    events = snapshot.get("events")
    if not isinstance(events, dict) or not events:
        return {}
    summary = {}
    if isinstance(events.get("warnings_top_reason"), dict):
        summary["warnings_top_reason"] = events.get("warnings_top_reason")
    if events.get("warnings_latest"):
        summary["warnings_latest"] = events.get("warnings_latest")
    return {"event_summary": summary} if summary else {}
 def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
    postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {}
    if not postgres:
        return {}
    return {
        "postgres": {
            "used": postgres.get("used"),
            "max": postgres.get("max"),
            "hottest_db": postgres.get("hottest_db"),
            "by_db": postgres.get("by_db"),
        }
    }
--- a/atlasbot/snapshot/builder/core_b.py
+++ b/atlasbot/snapshot/builder/core_b.py
@ -0,0 +1,57 @@
 from __future__ import annotations
 from typing import Any
 from .core_a import _node_usage_top
 def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]:
    node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
    hottest: dict[str, Any] = {}
    for key in ("cpu", "ram", "net", "io", "disk"):
        top = _node_usage_top(node_usage.get(key, []))
        if top:
            hottest[key] = top
    if not hottest:
        return {}
    return {"hottest": hottest}
 def _build_pvc(metrics: dict[str, Any]) -> dict[str, Any]:
    pvc_usage = metrics.get("pvc_usage_top") if isinstance(metrics.get("pvc_usage_top"), list) else []
    if not pvc_usage:
        return {}
    return {"pvc_usage_top": pvc_usage}
 def _build_namespace_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
    capacity = metrics.get("namespace_capacity")
    if not isinstance(capacity, list) or not capacity:
        return {}
    return {"namespace_capacity": capacity}
 def _build_namespace_capacity_summary(metrics: dict[str, Any]) -> dict[str, Any]:
    summary = metrics.get("namespace_capacity_summary")
    if not isinstance(summary, dict) or not summary:
        return {}
    return {"namespace_capacity_summary": summary}
 def _build_node_load_summary(metrics: dict[str, Any]) -> dict[str, Any]:
    summary = metrics.get("node_load_summary")
    if not isinstance(summary, dict) or not summary:
        return {}
    return {"node_load_summary": summary}
 def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]:
    workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else []
    return {"workloads": workloads}
 def _build_flux(snapshot: dict[str, Any]) -> dict[str, Any]:
    flux = snapshot.get("flux") if isinstance(snapshot.get("flux"), dict) else {}
    return {"flux": flux}
 __all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/format_a.py
+++ b/atlasbot/snapshot/builder/format_a.py
@ -0,0 +1,497 @@
 from __future__ import annotations
 from typing import Any
 from .core_a import _BYTES_GB, _BYTES_KB, _BYTES_MB
 from .core_b import *
 def _format_float(value: Any) -> str:
    try:
        numeric = float(value)
    except (TypeError, ValueError):
        return str(value)
    return f"{numeric:.2f}".rstrip("0").rstrip(".")
 def _format_rate_bytes(value: Any) -> str:
    try:
        numeric = float(value)
    except (TypeError, ValueError):
        return str(value)
    if numeric >= _BYTES_MB:
        return f"{numeric / _BYTES_MB:.2f} MB/s"
    if numeric >= _BYTES_KB:
        return f"{numeric / _BYTES_KB:.2f} KB/s"
    return f"{numeric:.2f} B/s"
 def _format_bytes(value: Any) -> str:
    try:
        numeric = float(value)
    except (TypeError, ValueError):
        return str(value)
    if numeric >= _BYTES_GB:
        return f"{numeric / _BYTES_GB:.2f} GB"
    if numeric >= _BYTES_MB:
        return f"{numeric / _BYTES_MB:.2f} MB"
    if numeric >= _BYTES_KB:
        return f"{numeric / _BYTES_KB:.2f} KB"
    return f"{numeric:.2f} B"
 def _format_kv_map(values: dict[str, Any]) -> str:
    parts = []
    for key, value in values.items():
        parts.append(f"{key}={value}")
    return ", ".join(parts)
 def _format_names(names: list[str]) -> str:
    if not names:
        return ""
    return ", ".join(sorted(names))
 def _append_nodes(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
    if not nodes:
        return
    workers = {}
    if isinstance(summary.get("nodes_summary"), dict):
        workers = summary["nodes_summary"].get("workers") or {}
    workers_total = workers.get("total")
    workers_ready = workers.get("ready")
    workers_str = ""
    if workers_total is not None and workers_ready is not None:
        workers_str = f", workers_ready={workers_ready}/{workers_total}"
    total = nodes.get("total")
    ready = nodes.get("ready")
    not_ready = nodes.get("not_ready")
    if not_ready is None:
        not_ready = 0
    lines.append(f"nodes: total={total}, ready={ready}, not_ready={not_ready}{workers_str}")
    if total is not None:
        lines.append(f"nodes_total: {total}")
    if ready is not None:
        lines.append(f"nodes_ready: {ready}")
    if not_ready is not None:
        lines.append(f"nodes_not_ready_count: {not_ready}")
    if not isinstance(summary.get("nodes_summary"), dict):
        return
    not_ready_names = summary["nodes_summary"].get("not_ready_names") or []
    if not_ready_names:
        lines.append("nodes_not_ready: " + _format_names(not_ready_names))
    by_arch = summary["nodes_summary"].get("by_arch") or {}
    if isinstance(by_arch, dict) and by_arch:
        lines.append("archs: " + _format_kv_map(by_arch))
    by_role = summary["nodes_summary"].get("by_role") or {}
    if isinstance(by_role, dict) and by_role:
        lines.append("roles: " + _format_kv_map(by_role))
 def _append_hardware(lines: list[str], summary: dict[str, Any]) -> None:
    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
    if not hardware:
        return
    parts = []
    for key, names in hardware.items():
        if not isinstance(names, list):
            continue
        label = f"{key}={len(names)}"
        name_list = _format_names([str(name) for name in names if name])
        if name_list:
            label = f"{label} ({name_list})"
        parts.append(label)
    if parts:
        lines.append("hardware: " + "; ".join(sorted(parts)))
 def _append_hardware_groups(lines: list[str], summary: dict[str, Any]) -> None:
    hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
    if not hardware:
        return
    parts = []
    for key, names in hardware.items():
        if not isinstance(names, list):
            continue
        name_list = _format_names([str(name) for name in names if name])
        if name_list:
            parts.append(f"{key}={name_list}")
    if parts:
        lines.append("hardware_nodes: " + "; ".join(sorted(parts)))
 def _append_node_ages(lines: list[str], summary: dict[str, Any]) -> None:
    ages = summary.get("node_ages") if isinstance(summary.get("node_ages"), list) else []
    if not ages:
        return
    parts = []
    for entry in ages[:3]:
        if not isinstance(entry, dict):
            continue
        name = entry.get("name")
        age = entry.get("age_hours")
        if name and isinstance(age, (int, float)):
            parts.append(f"{name}={_format_float(age)}h")
    if parts:
        lines.append("node_age_top: " + "; ".join(parts))
 def _append_node_taints(lines: list[str], summary: dict[str, Any]) -> None:
    taints = summary.get("node_taints") if isinstance(summary.get("node_taints"), dict) else {}
    if not taints:
        return
    parts = []
    for key, names in taints.items():
        if not isinstance(names, list):
            continue
        name_list = _format_names([str(name) for name in names if name])
        parts.append(f"{key}={len(names)} ({name_list})" if name_list else f"{key}={len(names)}")
    if parts:
        lines.append("node_taints: " + "; ".join(sorted(parts)))
 def _append_node_facts(lines: list[str], summary: dict[str, Any]) -> None:
    def top_counts(label: str, counts: dict[str, int], limit: int = 4) -> None:
        if not counts:
            return
        top = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:limit]
        rendered = "; ".join([f"{name}={count}" for name, count in top])
        if rendered:
            lines.append(f"{label}: {rendered}")
    top_counts("node_arch", summary.get("node_arch_counts") or {})
    top_counts("node_os", summary.get("node_os_counts") or {})
    top_counts("node_kubelet_versions", summary.get("node_kubelet_versions") or {})
    top_counts("node_kernel_versions", summary.get("node_kernel_versions") or {})
    top_counts("node_runtime_versions", summary.get("node_runtime_versions") or {})
    top_counts("node_roles", summary.get("node_role_counts") or {})
 def _append_pressure(lines: list[str], summary: dict[str, Any]) -> None:
    pressure = summary.get("pressure_nodes")
    if not isinstance(pressure, dict) or not pressure:
        return
    parts = []
    for cond, nodes in sorted(pressure.items()):
        if not nodes:
            continue
        name_list = _format_names([str(name) for name in nodes if name])
        parts.append(f"{cond}={len(nodes)} ({name_list})" if name_list else f"{cond}={len(nodes)}")
    if parts:
        lines.append("node_pressure: " + "; ".join(parts))
 def _append_pods(lines: list[str], summary: dict[str, Any]) -> None:
    pods = summary.get("pods") if isinstance(summary.get("pods"), dict) else {}
    if not pods:
        return
    lines.append(
        "pods: running={running}, pending={pending}, failed={failed}, succeeded={succeeded}".format(
            running=pods.get("running"),
            pending=pods.get("pending"),
            failed=pods.get("failed"),
            succeeded=pods.get("succeeded"),
        )
    )
 def _append_capacity(lines: list[str], summary: dict[str, Any]) -> None:
    capacity = summary.get("capacity") if isinstance(summary.get("capacity"), dict) else {}
    if not capacity:
        return
    parts = []
    if capacity.get("cpu") is not None:
        parts.append(f"cpu={_format_float(capacity.get('cpu'))}")
    if capacity.get("allocatable_cpu") is not None:
        parts.append(f"alloc_cpu={_format_float(capacity.get('allocatable_cpu'))}")
    if capacity.get("mem_bytes") is not None:
        parts.append(f"mem={_format_bytes(capacity.get('mem_bytes'))}")
    if capacity.get("allocatable_mem_bytes") is not None:
        parts.append(f"alloc_mem={_format_bytes(capacity.get('allocatable_mem_bytes'))}")
    if capacity.get("pods") is not None:
        parts.append(f"pods={_format_float(capacity.get('pods'))}")
    if capacity.get("allocatable_pods") is not None:
        parts.append(f"alloc_pods={_format_float(capacity.get('allocatable_pods'))}")
    if parts:
        lines.append("capacity: " + "; ".join(parts))
 def _append_namespace_pods(lines: list[str], summary: dict[str, Any]) -> None:
    namespaces = summary.get("namespace_pods")
    if not isinstance(namespaces, list) or not namespaces:
        return
    top = sorted(
        (item for item in namespaces if isinstance(item, dict)),
        key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
    )[:8]
    parts = []
    for item in top:
        name = item.get("namespace")
        total = item.get("pods_total")
        running = item.get("pods_running")
        if not name:
            continue
        label = f"{name}={total}"
        if running is not None:
            label = f"{label} (running={running})"
        parts.append(label)
    if parts:
        lines.append("namespaces_top: " + "; ".join(parts))
 def _append_namespace_nodes(lines: list[str], summary: dict[str, Any]) -> None:
    namespace_nodes = summary.get("namespace_nodes")
    if not isinstance(namespace_nodes, list) or not namespace_nodes:
        return
    top = sorted(
        (item for item in namespace_nodes if isinstance(item, dict)),
        key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
    )[:8]
    parts = []
    for item in top:
        namespace = item.get("namespace")
        pods_total = item.get("pods_total")
        primary = item.get("primary_node")
        if namespace:
            label = f"{namespace}={pods_total}"
            if primary:
                label = f"{label} (primary={primary})"
            parts.append(label)
    if parts:
        lines.append("namespace_nodes_top: " + "; ".join(parts))
 def _append_node_pods(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    node_pods = summary.get("node_pods")
    if not isinstance(node_pods, list) or not node_pods:
        return
    sortable: list[dict[str, Any]] = []
    for item in node_pods:
        if not isinstance(item, dict):
            continue
        try:
            pods_value = int(item.get("pods_total") or 0)
        except (TypeError, ValueError):
            continue
        sortable.append({**item, "pods_total": pods_value})
    top = sorted(sortable, key=lambda item: (-int(item.get("pods_total") or 0), item.get("node") or ""))[:8]
    max_entry = None
    for entry in node_pods:
        if not isinstance(entry, dict):
            continue
        pods_total = entry.get("pods_total")
        try:
            pods_value = int(pods_total)
        except (TypeError, ValueError):
            continue
        if max_entry is None or pods_value > max_entry["pods_total"]:
            max_entry = {
                "node": entry.get("node"),
                "pods_total": pods_value,
                "namespaces_top": entry.get("namespaces_top") or [],
            }
    parts = []
    for item in top:
        node = item.get("node")
        pods_total = item.get("pods_total")
        namespaces = item.get("namespaces_top") or []
        ns_label = ""
        if namespaces:
            ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
        if node:
            label = f"{node}={pods_total}"
            if ns_label:
                label = f"{label} ({ns_label})"
            parts.append(label)
    if parts:
        lines.append("node_pods_top: " + "; ".join(parts))
    if max_entry and isinstance(max_entry.get("node"), str):
        ns_label = ""
        namespaces = max_entry.get("namespaces_top") or []
        if namespaces:
            ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
        label = f"{max_entry.get('node')}={max_entry.get('pods_total')}"
        if ns_label:
            label = f"{label} ({ns_label})"
        lines.append("node_pods_max: " + label)
    for item in top:
        node = item.get("node")
        namespaces = item.get("namespaces_top") or []
        if not node or not namespaces:
            continue
        ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
        lines.append(f"node_namespaces_top: {node} ({ns_label})")
 def _append_pod_issues(lines: list[str], summary: dict[str, Any]) -> None:
    pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
    if not pod_issues:
        return
    counts_line = _format_pod_issue_counts(pod_issues)
    if counts_line:
        lines.append(counts_line)
    top_line = _format_pod_issue_top(pod_issues)
    if top_line:
        lines.append(top_line)
    pending_line = _format_pod_pending_oldest(pod_issues)
    if pending_line:
        lines.append(pending_line)
    pending_over_line = _format_pod_pending_over_15m(pod_issues)
    if pending_over_line:
        lines.append(pending_over_line)
    reasons_line = _format_pod_waiting_reasons(pod_issues)
    if reasons_line:
        lines.append(reasons_line)
 def _format_pod_issue_counts(pod_issues: dict[str, Any]) -> str:
    counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
    if not counts:
        return ""
    parts = []
    for key in ("Failed", "Pending", "Unknown"):
        if key in counts:
            parts.append(f"{key}={counts.get(key)}")
    return "pod_issues: " + "; ".join(parts) if parts else ""
 def _format_pod_issue_top(pod_issues: dict[str, Any]) -> str:
    items = pod_issues.get("items") if isinstance(pod_issues.get("items"), list) else []
    if not items:
        return ""
    top = []
    for item in items[:5]:
        if not isinstance(item, dict):
            continue
        namespace = item.get("namespace")
        pod = item.get("pod")
        if not namespace or not pod:
            continue
        phase = item.get("phase") or ""
        restarts = item.get("restarts") or 0
        top.append(f"{namespace}/{pod}({phase},r={restarts})")
    return "pod_issues_top: " + "; ".join(top) if top else ""
 def _format_pod_pending_oldest(pod_issues: dict[str, Any]) -> str:
    pending = pod_issues.get("pending_oldest") if isinstance(pod_issues.get("pending_oldest"), list) else []
    if not pending:
        return ""
    parts = []
    for item in pending[:5]:
        if not isinstance(item, dict):
            continue
        namespace = item.get("namespace")
        pod = item.get("pod")
        age = item.get("age_hours")
        reason = item.get("reason") or ""
        if namespace and pod and age is not None:
            label = f"{namespace}/{pod}={_format_float(age)}h"
            if reason:
                label = f"{label} ({reason})"
            parts.append(label)
    return "pods_pending_oldest: " + "; ".join(parts) if parts else ""
 def _format_pod_waiting_reasons(pod_issues: dict[str, Any]) -> str:
    reasons = pod_issues.get("waiting_reasons") if isinstance(pod_issues.get("waiting_reasons"), dict) else {}
    if not reasons:
        return ""
    pairs = sorted(reasons.items(), key=lambda item: (-item[1], item[0]))[:5]
    return "pod_waiting_reasons: " + "; ".join([f"{key}={val}" for key, val in pairs])
 def _format_pod_pending_over_15m(pod_issues: dict[str, Any]) -> str:
    count = pod_issues.get("pending_over_15m")
    if count is None:
        return ""
    try:
        count_val = int(count)
    except (TypeError, ValueError):
        return ""
    return f"pods_pending_over_15m: {count_val}"
 def _append_workload_health(lines: list[str], summary: dict[str, Any]) -> None:
    health = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
    if not health:
        return
    deployments = health.get("deployments") if isinstance(health.get("deployments"), dict) else {}
    statefulsets = health.get("statefulsets") if isinstance(health.get("statefulsets"), dict) else {}
    daemonsets = health.get("daemonsets") if isinstance(health.get("daemonsets"), dict) else {}
    total_not_ready = 0
    for entry in (deployments, statefulsets, daemonsets):
        total_not_ready += int(entry.get("not_ready") or 0)
    lines.append(
        "workloads_not_ready: "
        f"deployments={deployments.get('not_ready', 0)}, "
        f"statefulsets={statefulsets.get('not_ready', 0)}, "
        f"daemonsets={daemonsets.get('not_ready', 0)} "
        f"(total={total_not_ready})"
    )
 def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    stats = metrics.get("node_usage_stats") if isinstance(metrics.get("node_usage_stats"), dict) else {}
    if not stats:
        return
    parts = []
    for key in ("cpu", "ram", "net", "io", "disk"):
        entry = stats.get(key) if isinstance(stats.get(key), dict) else {}
        avg = entry.get("avg")
        if avg is None:
            continue
        value = _format_rate_bytes(avg) if key in {"net", "io"} else _format_float(avg)
        parts.append(f"{key}={value}")
    if parts:
        lines.append("node_usage_avg: " + "; ".join(parts))
 def _append_events(lines: list[str], summary: dict[str, Any]) -> None:
    events = summary.get("events") if isinstance(summary.get("events"), dict) else {}
    if not events:
        return
    total = events.get("warnings_total")
    by_reason = events.get("warnings_by_reason") if isinstance(events.get("warnings_by_reason"), dict) else {}
    if total is None:
        return
    if by_reason:
        top = sorted(by_reason.items(), key=lambda item: (-item[1], item[0]))[:3]
        reasons = "; ".join([f"{reason}={count}" for reason, count in top])
        lines.append(f"warnings: total={total}; top={reasons}")
    else:
        lines.append(f"warnings: total={total}")
 def _append_pvc_usage(lines: list[str], summary: dict[str, Any]) -> None:
    pvc_usage = summary.get("pvc_usage_top")
    if not isinstance(pvc_usage, list) or not pvc_usage:
        return
    parts = []
    for entry in pvc_usage:
        if not isinstance(entry, dict):
            continue
        metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
        namespace = metric.get("namespace")
        pvc = metric.get("persistentvolumeclaim")
        value = entry.get("value")
        if namespace and pvc:
            parts.append(f"{namespace}/{pvc}={_format_float(value)}%")
    if parts:
        lines.append("pvc_usage_top: " + "; ".join(parts))
 def _append_root_disk_headroom(lines: list[str], summary: dict[str, Any]) -> None:
    headroom = summary.get("root_disk_low_headroom")
    if not isinstance(headroom, list) or not headroom:
        return
    parts = []
    for entry in headroom:
        if not isinstance(entry, dict):
            continue
        node = entry.get("node")
        headroom_pct = entry.get("headroom_pct")
        if node and headroom_pct is not None:
            parts.append(f"{node}={_format_float(headroom_pct)}%")
    if parts:
        lines.append("root_disk_low_headroom: " + "; ".join(parts))
 __all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/format_b.py
+++ b/atlasbot/snapshot/builder/format_b.py
@ -0,0 +1,435 @@
 from __future__ import annotations
 from typing import Any
 from .core_a import _VALUE_PAIR_LEN
 from .format_a import *
 def _append_namespace_metric_series(
    lines: list[str],
    label: str,
    entries: list[Any],
    formatter: Any,
 ) -> None:
    parts = []
    for entry in entries:
        if not isinstance(entry, dict):
            continue
        metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
        namespace = metric.get("namespace")
        value = entry.get("value")
        if namespace:
            parts.append(f"{namespace}={formatter(value)}")
    if parts:
        lines.append(f"{label}: " + "; ".join(parts))
 def _append_longhorn(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    longhorn = summary.get("longhorn") if isinstance(summary.get("longhorn"), dict) else {}
    if not longhorn:
        return
    total = longhorn.get("total")
    attached = longhorn.get("attached_count")
    detached = longhorn.get("detached_count")
    degraded = longhorn.get("degraded_count")
    by_state = longhorn.get("by_state") if isinstance(longhorn.get("by_state"), dict) else {}
    by_robust = longhorn.get("by_robustness") if isinstance(longhorn.get("by_robustness"), dict) else {}
    if total is not None:
            if attached is None and detached is None and degraded is None:
                unhealthy = longhorn.get("unhealthy_count")
                lines.append(f"longhorn: total={total}, unhealthy={unhealthy if unhealthy is not None else 0}")
            else:
                lines.append(
                    f"longhorn: total={total}, attached={attached if attached is not None else 0}, "
                    f"detached={detached if detached is not None else 0}, "
                    f"degraded={degraded if degraded is not None else 0}"
                )
    if by_state:
        lines.append("longhorn_state: " + _format_kv_map(by_state))
    if by_robust:
        lines.append("longhorn_robustness: " + _format_kv_map(by_robust))
    unhealthy_items = longhorn.get("unhealthy")
    if isinstance(unhealthy_items, list) and unhealthy_items:
        parts = []
        for entry in unhealthy_items[:5]:
            if not isinstance(entry, dict):
                continue
            name = entry.get("name")
            state = entry.get("state")
            robustness = entry.get("robustness")
            if name:
                label = name
                if state or robustness:
                    label = f"{label}({state},{robustness})"
                parts.append(label)
        if parts:
            lines.append("longhorn_unhealthy_top: " + "; ".join(parts))
 def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else []
    mem_top = metrics.get("namespace_mem_top") if isinstance(metrics.get("namespace_mem_top"), list) else []
    _append_namespace_metric_series(lines, "namespace_cpu_top", cpu_top, _format_float)
    _append_namespace_metric_series(lines, "namespace_mem_top", mem_top, _format_bytes)
 def _append_namespace_requests(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    cpu_req = metrics.get("namespace_cpu_requests_top") if isinstance(metrics.get("namespace_cpu_requests_top"), list) else []
    mem_req = metrics.get("namespace_mem_requests_top") if isinstance(metrics.get("namespace_mem_requests_top"), list) else []
    _append_namespace_metric_series(lines, "namespace_cpu_requests_top", cpu_req, _format_float)
    _append_namespace_metric_series(lines, "namespace_mem_requests_top", mem_req, _format_bytes)
 def _append_namespace_io_net(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    net_top = metrics.get("namespace_net_top") if isinstance(metrics.get("namespace_net_top"), list) else []
    io_top = metrics.get("namespace_io_top") if isinstance(metrics.get("namespace_io_top"), list) else []
    _append_namespace_metric_series(lines, "namespace_net_top", net_top, _format_rate_bytes)
    _append_namespace_metric_series(lines, "namespace_io_top", io_top, _format_rate_bytes)
 def _append_pod_usage(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    cpu_top = metrics.get("pod_cpu_top") if isinstance(metrics.get("pod_cpu_top"), list) else []
    cpu_top_node = (
        metrics.get("pod_cpu_top_node")
        if isinstance(metrics.get("pod_cpu_top_node"), list)
        else []
    )
    mem_top = metrics.get("pod_mem_top") if isinstance(metrics.get("pod_mem_top"), list) else []
    mem_top_node = (
        metrics.get("pod_mem_top_node")
        if isinstance(metrics.get("pod_mem_top_node"), list)
        else []
    )
    if cpu_top:
        parts = []
        for entry in cpu_top:
            if not isinstance(entry, dict):
                continue
            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
            namespace = metric.get("namespace")
            pod = metric.get("pod")
            value = entry.get("value")
            if namespace and pod and value is not None:
                parts.append(f"{namespace}/{pod}={_format_float(value)}")
        if parts:
            lines.append("pod_cpu_top: " + "; ".join(parts))
    if cpu_top_node:
        parts = []
        for entry in cpu_top_node:
            if not isinstance(entry, dict):
                continue
            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
            namespace = metric.get("namespace")
            pod = metric.get("pod")
            node = metric.get("node")
            value = entry.get("value")
            if namespace and pod and node and value is not None:
                parts.append(f"{node}:{namespace}/{pod}={_format_float(value)}")
        if parts:
            lines.append("pod_cpu_top_node: " + "; ".join(parts))
    if mem_top:
        parts = []
        for entry in mem_top:
            if not isinstance(entry, dict):
                continue
            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
            namespace = metric.get("namespace")
            pod = metric.get("pod")
            value = entry.get("value")
            if namespace and pod and value is not None:
                parts.append(f"{namespace}/{pod}={_format_bytes(value)}")
        if parts:
            lines.append("pod_mem_top: " + "; ".join(parts))
    if mem_top_node:
        parts = []
        for entry in mem_top_node:
            if not isinstance(entry, dict):
                continue
            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
            namespace = metric.get("namespace")
            pod = metric.get("pod")
            node = metric.get("node")
            value = entry.get("value")
            if namespace and pod and node and value is not None:
                parts.append(f"{node}:{namespace}/{pod}={_format_bytes(value)}")
        if parts:
            lines.append("pod_mem_top_node: " + "; ".join(parts))
 def _append_restarts(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    top_restarts = metrics.get("top_restarts_1h") or []
    if not isinstance(top_restarts, list) or not top_restarts:
        top_restarts = []
    parts = []
    for entry in top_restarts:
        metric = entry.get("metric") if isinstance(entry, dict) else {}
        value = entry.get("value") if isinstance(entry, dict) else []
        if not isinstance(metric, dict) or not isinstance(value, list) or len(value) < _VALUE_PAIR_LEN:
            continue
        namespace = metric.get("namespace")
        pod = metric.get("pod")
        count = _format_float(value[1])
        if namespace and pod:
            parts.append(f"{namespace}/{pod}={count}")
    if parts:
        lines.append("restarts_1h_top: " + "; ".join(parts))
    else:
        lines.append("restarts_1h_top: none")
    ns_top = metrics.get("restart_namespace_top") or []
    if isinstance(ns_top, list) and ns_top:
        ns_parts = []
        for entry in ns_top:
            metric = entry.get("metric") if isinstance(entry, dict) else {}
            value = entry.get("value")
            namespace = metric.get("namespace") if isinstance(metric, dict) else None
            if namespace and value is not None:
                ns_parts.append(f"{namespace}={_format_float(value)}")
        if ns_parts:
            lines.append("restarts_1h_namespace_top: " + "; ".join(ns_parts))
    else:
        lines.append("restarts_1h_namespace_top: none")
 def _append_job_failures(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    failures = metrics.get("job_failures_24h") if isinstance(metrics.get("job_failures_24h"), list) else []
    if not failures:
        return
    parts = []
    for entry in failures:
        if not isinstance(entry, dict):
            continue
        metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
        namespace = metric.get("namespace")
        job_name = metric.get("job_name") or metric.get("job")
        value = entry.get("value")
        if namespace and job_name and value is not None:
            parts.append(f"{namespace}/{job_name}={_format_float(value)}")
    if parts:
        lines.append("job_failures_24h: " + "; ".join(parts))
 def _append_jobs(lines: list[str], summary: dict[str, Any]) -> None:
    jobs = summary.get("jobs") if isinstance(summary.get("jobs"), dict) else {}
    if not jobs:
        return
    totals_line = _format_jobs_totals(jobs)
    if totals_line:
        lines.append(totals_line)
    failing_line = _format_jobs_failing(jobs)
    if failing_line:
        lines.append(failing_line)
    active_line = _format_jobs_active_oldest(jobs)
    if active_line:
        lines.append(active_line)
 def _format_jobs_totals(jobs: dict[str, Any]) -> str:
    totals = jobs.get("totals") if isinstance(jobs.get("totals"), dict) else {}
    if not totals:
        return ""
    return "jobs: total={total}, active={active}, failed={failed}, succeeded={succeeded}".format(
        total=totals.get("total"),
        active=totals.get("active"),
        failed=totals.get("failed"),
        succeeded=totals.get("succeeded"),
    )
 def _format_jobs_failing(jobs: dict[str, Any]) -> str:
    failing = jobs.get("failing") if isinstance(jobs.get("failing"), list) else []
    if not failing:
        return ""
    parts = []
    for item in failing[:5]:
        if not isinstance(item, dict):
            continue
        namespace = item.get("namespace")
        name = item.get("job")
        failed = item.get("failed")
        age = item.get("age_hours")
        if namespace and name and failed is not None:
            label = f"{namespace}/{name}={failed}"
            if age is not None:
                label = f"{label} ({_format_float(age)}h)"
            parts.append(label)
    return "jobs_failing_top: " + "; ".join(parts) if parts else ""
 def _format_jobs_active_oldest(jobs: dict[str, Any]) -> str:
    active_oldest = jobs.get("active_oldest") if isinstance(jobs.get("active_oldest"), list) else []
    if not active_oldest:
        return ""
    parts = []
    for item in active_oldest[:5]:
        if not isinstance(item, dict):
            continue
        namespace = item.get("namespace")
        name = item.get("job")
        age = item.get("age_hours")
        if namespace and name and age is not None:
            parts.append(f"{namespace}/{name}={_format_float(age)}h")
    return "jobs_active_oldest: " + "; ".join(parts) if parts else ""
 def _append_postgres(lines: list[str], summary: dict[str, Any]) -> None:
    postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
    if not postgres:
        return
    hottest = postgres.get("hottest_db") or ""
    lines.append(
        "postgres: used={used}, max={max}, hottest_db={hottest}".format(
            used=postgres.get("used"),
            max=postgres.get("max"),
            hottest=hottest,
        )
    )
    used = postgres.get("used")
    max_conn = postgres.get("max")
    if used is not None or max_conn is not None:
        lines.append(f"postgres_connections_total: used={_format_float(used)}, max={_format_float(max_conn)}")
    by_db = postgres.get("by_db")
    if isinstance(by_db, list) and by_db:
        parts = []
        for entry in by_db:
            if not isinstance(entry, dict):
                continue
            metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
            value = entry.get("value")
            if isinstance(value, list) and len(value) >= _VALUE_PAIR_LEN:
                value = value[1]
            name = metric.get("datname") if isinstance(metric, dict) else None
            if name and value is not None:
                parts.append(f"{name}={_format_float(value)}")
        if parts:
            lines.append("postgres_connections_by_db: " + "; ".join(parts))
 def _append_hottest(lines: list[str], summary: dict[str, Any]) -> None:
    hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
    if not hottest:
        return
    hardware_map = summary.get("hardware_by_node")
    if not isinstance(hardware_map, dict):
        hardware_map = {}
    parts = []
    for key, entry in hottest.items():
        if not isinstance(entry, dict):
            continue
        node = entry.get("node")
        hardware = hardware_map.get(node) if node else None
        if key in {"net", "io"}:
            value = _format_rate_bytes(entry.get("value"))
        else:
            value = _format_float(entry.get("value"))
            if value and key in {"cpu", "ram", "disk"}:
                value = f"{value}%"
        if node:
            label = node
            if hardware:
                label = f"{label} [{hardware}]"
            parts.append(f"{key}={label} ({value})")
    if parts:
        lines.append("hottest: " + "; ".join(parts))
 def _append_workloads(lines: list[str], summary: dict[str, Any]) -> None:
    workloads = summary.get("workloads")
    if not isinstance(workloads, list) or not workloads:
        return
    lines.append(f"workloads: total={len(workloads)}")
    top_workloads = sorted(
        (item for item in workloads if isinstance(item, dict)),
        key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or ""),
    )[:5]
    if not top_workloads:
        return
    parts = []
    for item in top_workloads:
        namespace = item.get("namespace")
        name = item.get("workload")
        pods_total = item.get("pods_total")
        primary = item.get("primary_node")
        if namespace and name:
            label = f"{namespace}/{name}={pods_total}"
            if primary:
                label = f"{label} (primary={primary})"
            parts.append(label)
    if parts:
        lines.append("workloads_top: " + "; ".join(parts))
 def _append_topology(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    topology = summary.get("topology") if isinstance(summary.get("topology"), dict) else {}
    if not topology:
        return
    nodes = topology.get("nodes") if isinstance(topology.get("nodes"), list) else []
    workloads = topology.get("workloads") if isinstance(topology.get("workloads"), list) else []
    if nodes:
        parts = []
        for entry in nodes[:5]:
            if not isinstance(entry, dict):
                continue
            node = entry.get("node")
            top = entry.get("workloads_top") if isinstance(entry.get("workloads_top"), list) else []
            if not node or not top:
                continue
            items = ", ".join([f"{name}({count})" for name, count in top if name and count is not None])
            if items:
                parts.append(f"{node}={items}")
        if parts:
            lines.append("node_workloads_top: " + "; ".join(parts))
    if workloads:
        parts = []
        for entry in workloads[:5]:
            if not isinstance(entry, dict):
                continue
            namespace = entry.get("namespace")
            name = entry.get("workload")
            nodes_top = entry.get("nodes_top") if isinstance(entry.get("nodes_top"), list) else []
            if not namespace or not name:
                continue
            nodes_label = ", ".join([f"{node}:{count}" for node, count in nodes_top if node])
            label = f"{namespace}/{name}"
            if nodes_label:
                label = f"{label} [{nodes_label}]"
            parts.append(label)
        if parts:
            lines.append("workload_nodes_top: " + "; ".join(parts))
 def _append_flux(lines: list[str], summary: dict[str, Any]) -> None:
    flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
    if not flux:
        return
    not_ready = flux.get("not_ready")
    if not_ready is not None:
        lines.append(f"flux_not_ready: {not_ready}")
    items = flux.get("items")
    if isinstance(items, list) and items:
        parts = []
        for item in items[:10]:
            if not isinstance(item, dict):
                continue
            name = item.get("name") or ""
            namespace = item.get("namespace") or ""
            reason = item.get("reason") or ""
            suspended = item.get("suspended")
            label = f"{namespace}/{name}".strip("/")
            if reason:
                label = f"{label} ({reason})"
            if suspended:
                label = f"{label} [suspended]"
            if label:
                parts.append(label)
        if parts:
            lines.append("flux_not_ready_items: " + "; ".join(parts))
 __all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/format_c.py
+++ b/atlasbot/snapshot/builder/format_c.py
@ -0,0 +1,448 @@
 from __future__ import annotations
 from typing import Any
 from .core_a import PVC_USAGE_CRITICAL
 from .format_b import *
 def _append_signals(lines: list[str], summary: dict[str, Any]) -> None:
    signals = summary.get("signals") if isinstance(summary.get("signals"), list) else []
    if not signals:
        return
    lines.append("signals:")
    for entry in signals[:8]:
        if not isinstance(entry, dict):
            continue
        scope = entry.get("scope") or ""
        target = entry.get("target") or ""
        metric = entry.get("metric") or ""
        current = entry.get("current")
        delta = entry.get("delta_pct")
        severity = entry.get("severity") or ""
        detail = f"{scope}:{target} {metric}={current}"
        if delta is not None:
            detail += f" delta={delta}%"
        if severity:
            detail += f" severity={severity}"
        lines.append(f"- {detail}")
 def _append_profiles(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    profiles = summary.get("profiles") if isinstance(summary.get("profiles"), dict) else {}
    if not profiles:
        return
    nodes = profiles.get("nodes") if isinstance(profiles.get("nodes"), list) else []
    namespaces = profiles.get("namespaces") if isinstance(profiles.get("namespaces"), list) else []
    workloads = profiles.get("workloads") if isinstance(profiles.get("workloads"), list) else []
    if nodes:
        lines.append("node_profiles:")
        for entry in nodes[:3]:
            if not isinstance(entry, dict):
                continue
            lines.append(
                f"- {entry.get('node')}: load={entry.get('load_index')} cpu={entry.get('cpu')} ram={entry.get('ram')} "
                f"pods={entry.get('pods_total')} hw={entry.get('hardware')}"
            )
    if namespaces:
        lines.append("namespace_profiles:")
        for entry in namespaces[:3]:
            if not isinstance(entry, dict):
                continue
            lines.append(
                f"- {entry.get('namespace')}: pods={entry.get('pods_total')} cpu={entry.get('cpu_usage')} "
                f"mem={entry.get('mem_usage')} primary={entry.get('primary_node')}"
            )
    if workloads:
        lines.append("workload_profiles:")
        for entry in workloads[:3]:
            if not isinstance(entry, dict):
                continue
            lines.append(
                f"- {entry.get('namespace')}/{entry.get('workload')}: pods={entry.get('pods_total')} "
                f"running={entry.get('pods_running')} node={entry.get('primary_node')}"
            )
 def _append_units_windows(lines: list[str], summary: dict[str, Any]) -> None:
    metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
    units = metrics.get("units") if isinstance(metrics.get("units"), dict) else {}
    windows = metrics.get("windows") if isinstance(metrics.get("windows"), dict) else {}
    if units:
        lines.append("units: " + _format_kv_map(units))
    else:
        lines.append("units: cpu_pct, ram_pct, net=bytes_per_sec, io=bytes_per_sec")
    if windows:
        lines.append("windows: " + _format_kv_map(windows))
    else:
        lines.append("windows: rates=5m, restarts=1h")
 def _append_node_load_summary(lines: list[str], summary: dict[str, Any]) -> None:
    node_load = summary.get("node_load_summary")
    if not isinstance(node_load, dict) or not node_load:
        return
    hardware_by_node = summary.get("hardware_by_node")
    hardware_by_node = hardware_by_node if isinstance(hardware_by_node, dict) else {}
    top = node_load.get("top")
    if isinstance(top, list) and top:
        parts = []
        for entry in top[:5]:
            if not isinstance(entry, dict):
                continue
            node = entry.get("node") or ""
            load = entry.get("load_index")
            cpu = entry.get("cpu")
            ram = entry.get("ram")
            io = entry.get("io")
            net = entry.get("net")
            pods_total = entry.get("pods_total")
            label = f"{node} idx={_format_float(load)}"
            if node and node in hardware_by_node:
                label += f" hw={hardware_by_node.get(node)}"
            if isinstance(pods_total, (int, float)):
                label += f" pods={int(pods_total)}"
            label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
            label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
            parts.append(label)
        if parts:
            lines.append("node_load_top: " + "; ".join(parts))
    outliers = node_load.get("outliers")
    if isinstance(outliers, list) and outliers:
        names = [entry.get("node") for entry in outliers if isinstance(entry, dict)]
        names = [name for name in names if isinstance(name, str) and name]
        if names:
            lines.append("node_load_outliers: " + _format_names(names))
 def _append_hardware_usage(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    usage = summary.get("hardware_usage_avg")
    if not isinstance(usage, list) or not usage:
        return
    parts = []
    tops: dict[str, tuple[str, float]] = {}
    for entry in usage[:5]:
        if not isinstance(entry, dict):
            continue
        hardware = entry.get("hardware")
        load = entry.get("load_index")
        cpu = entry.get("cpu")
        ram = entry.get("ram")
        io = entry.get("io")
        net = entry.get("net")
        if not hardware:
            continue
        label = f"{hardware} idx={_format_float(load)}"
        label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
        label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
        parts.append(label)
        for metric, value in (("cpu", cpu), ("ram", ram), ("io", io), ("net", net), ("load", load)):
            if isinstance(value, (int, float)):
                current = tops.get(metric)
                if current is None or float(value) > current[1]:
                    tops[metric] = (hardware, float(value))
    if parts:
        lines.append("hardware_usage_avg: " + "; ".join(parts))
    if tops:
        top_parts = []
        for metric in ("cpu", "ram", "io", "net", "load"):
            entry = tops.get(metric)
            if not entry:
                continue
            hardware, value = entry
            if metric in {"io", "net"}:
                rendered = _format_rate_bytes(value)
            else:
                rendered = _format_float(value)
            top_parts.append(f"{metric}={hardware} ({rendered})")
        if top_parts:
            lines.append("hardware_usage_top: " + "; ".join(top_parts))
 def _append_cluster_watchlist(lines: list[str], summary: dict[str, Any]) -> None:
    watchlist = summary.get("cluster_watchlist")
    if not isinstance(watchlist, list) or not watchlist:
        return
    lines.append("cluster_watchlist: " + "; ".join(watchlist))
 def _append_baseline_deltas(lines: list[str], summary: dict[str, Any]) -> None:
    deltas = summary.get("baseline_deltas") if isinstance(summary.get("baseline_deltas"), dict) else {}
    nodes = deltas.get("nodes") if isinstance(deltas.get("nodes"), dict) else {}
    namespaces = deltas.get("namespaces") if isinstance(deltas.get("namespaces"), dict) else {}
    for scope, block in (("nodes", nodes), ("namespaces", namespaces)):
        if not isinstance(block, dict):
            continue
        for metric, entries in block.items():
            if not isinstance(entries, list) or not entries:
                continue
            parts: list[str] = []
            for entry in entries[:5]:
                if not isinstance(entry, dict):
                    continue
                name = entry.get("node") if scope == "nodes" else entry.get("namespace")
                delta = entry.get("delta")
                severity = entry.get("severity")
                if not isinstance(name, str) or not name or not isinstance(delta, (int, float)):
                    continue
                suffix = f" ({severity})" if isinstance(severity, str) and severity else ""
                parts.append(f"{name}={_format_float(delta)}%{suffix}")
            if parts:
                lines.append(f"{scope}_baseline_delta_{metric}: " + "; ".join(parts))
 def _append_pod_issue_summary(lines: list[str], summary: dict[str, Any]) -> None:
    issues = summary.get("pod_issue_summary") if isinstance(summary.get("pod_issue_summary"), dict) else {}
    waiting = issues.get("waiting_reasons_top") if isinstance(issues.get("waiting_reasons_top"), list) else []
    phases = issues.get("phase_reasons_top") if isinstance(issues.get("phase_reasons_top"), list) else []
    namespace_issue = issues.get("namespace_issue_top") if isinstance(issues.get("namespace_issue_top"), dict) else {}
    waiting_line = _reason_line(waiting, "pod_waiting_reasons_top")
    if waiting_line:
        lines.append(waiting_line)
    phase_line = _reason_line(phases, "pod_phase_reasons_top")
    if phase_line:
        lines.append(phase_line)
    if namespace_issue:
        _append_namespace_issue_lines(lines, namespace_issue)
 def _reason_line(entries: list[dict[str, Any]], label: str) -> str:
    parts = []
    for entry in entries[:5]:
        if not isinstance(entry, dict):
            continue
        reason = entry.get("reason")
        count = entry.get("count")
        if reason:
            parts.append(f"{reason}={count}")
    if parts:
        return f"{label}: " + "; ".join(parts)
    return ""
 def _append_namespace_issue_lines(lines: list[str], namespace_issue: dict[str, Any]) -> None:
    for key, entries in namespace_issue.items():
        if not isinstance(entries, list) or not entries:
            continue
        parts: list[str] = []
        for entry in entries[:5]:
            if not isinstance(entry, dict):
                continue
            ns = entry.get("namespace")
            value = entry.get("value")
            if ns:
                parts.append(f"{ns}={value}")
        if parts:
            lines.append(f"namespace_issue_top_{key}: " + "; ".join(parts))
 def _build_cluster_watchlist(summary: dict[str, Any]) -> dict[str, Any]:
    items: list[str] = []
    nodes_summary = summary.get("nodes_summary") if isinstance(summary.get("nodes_summary"), dict) else {}
    not_ready = int(nodes_summary.get("not_ready") or 0)
    if not_ready > 0:
        items.append(f"not_ready_nodes={not_ready}")
    pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
    pressure_nodes = pressure.get("names") if isinstance(pressure.get("names"), list) else []
    if pressure_nodes:
        items.append(f"pressure_nodes={len(pressure_nodes)}")
    pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
    pending_over = int(pod_issues.get("pending_over_15m") or 0)
    if pending_over > 0:
        items.append(f"pods_pending_over_15m={pending_over}")
    workloads = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
    deployments = workloads.get("deployments") if isinstance(workloads.get("deployments"), dict) else {}
    statefulsets = workloads.get("statefulsets") if isinstance(workloads.get("statefulsets"), dict) else {}
    daemonsets = workloads.get("daemonsets") if isinstance(workloads.get("daemonsets"), dict) else {}
    total_not_ready = int(deployments.get("not_ready") or 0) + int(statefulsets.get("not_ready") or 0) + int(daemonsets.get("not_ready") or 0)
    if total_not_ready > 0:
        items.append(f"workloads_not_ready={total_not_ready}")
    flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
    flux_not_ready = int(flux.get("not_ready") or 0)
    if flux_not_ready > 0:
        items.append(f"flux_not_ready={flux_not_ready}")
    pvc_usage = summary.get("pvc_usage_top") if isinstance(summary.get("pvc_usage_top"), list) else []
    high_pvc = [
        entry for entry in pvc_usage if isinstance(entry, dict) and (entry.get("value") or 0) >= PVC_USAGE_CRITICAL
    ]
    if high_pvc:
        items.append(f"pvc_usage>={PVC_USAGE_CRITICAL}%")
    return {"cluster_watchlist": items} if items else {}
 def _capacity_ratio_parts(entries: list[dict[str, Any]], ratio_key: str, usage_key: str, req_key: str) -> list[str]:
    parts: list[str] = []
    for entry in entries[:5]:
        if not isinstance(entry, dict):
            continue
        ns = entry.get("namespace") or ""
        ratio = entry.get(ratio_key)
        usage = entry.get(usage_key)
        req = entry.get(req_key)
        if ns:
            parts.append(
                f"{ns}={_format_float(ratio)} (usage={_format_float(usage)} req={_format_float(req)})"
            )
    return parts
 def _capacity_headroom_parts(entries: list[dict[str, Any]]) -> list[str]:
    parts: list[str] = []
    for entry in entries[:5]:
        if not isinstance(entry, dict):
            continue
        ns = entry.get("namespace") or ""
        headroom = entry.get("headroom")
        if ns:
            parts.append(f"{ns}={_format_float(headroom)}")
    return parts
 def _append_namespace_capacity_summary(  # noqa: C901
    lines: list[str],
    summary: dict[str, Any],
 ) -> None:
    cap = summary.get("namespace_capacity_summary")
    if not isinstance(cap, dict) or not cap:
        return
    cpu_ratio = cap.get("cpu_ratio_top")
    if isinstance(cpu_ratio, list):
        parts = _capacity_ratio_parts(cpu_ratio, "cpu_usage_ratio", "cpu_usage", "cpu_requests")
        if parts:
            lines.append("namespace_cpu_ratio_top: " + "; ".join(parts))
    mem_ratio = cap.get("mem_ratio_top")
    if isinstance(mem_ratio, list):
        parts = _capacity_ratio_parts(mem_ratio, "mem_usage_ratio", "mem_usage", "mem_requests")
        if parts:
            lines.append("namespace_mem_ratio_top: " + "; ".join(parts))
    cpu_headroom = cap.get("cpu_headroom_low")
    if isinstance(cpu_headroom, list):
        parts = _capacity_headroom_parts(cpu_headroom)
        if parts:
            lines.append("namespace_cpu_headroom_low: " + "; ".join(parts))
    mem_headroom = cap.get("mem_headroom_low")
    if isinstance(mem_headroom, list):
        parts = _capacity_headroom_parts(mem_headroom)
        if parts:
            lines.append("namespace_mem_headroom_low: " + "; ".join(parts))
    cpu_over = cap.get("cpu_overcommitted")
    mem_over = cap.get("mem_overcommitted")
    if cpu_over is not None or mem_over is not None:
        lines.append(f"namespace_overcommitted: cpu={cpu_over} mem={mem_over}")
    cpu_over_names = cap.get("cpu_overcommitted_names")
    if isinstance(cpu_over_names, list) and cpu_over_names:
        names = [name for name in cpu_over_names if isinstance(name, str) and name]
        if names:
            lines.append("namespace_cpu_overcommitted_names: " + _format_names(names))
    mem_over_names = cap.get("mem_overcommitted_names")
    if isinstance(mem_over_names, list) and mem_over_names:
        names = [name for name in mem_over_names if isinstance(name, str) and name]
        if names:
            lines.append("namespace_mem_overcommitted_names: " + _format_names(names))
 def _append_workloads_by_namespace(lines: list[str], summary: dict[str, Any]) -> None:
    workloads = summary.get("workloads")
    if not isinstance(workloads, list) or not workloads:
        return
    by_ns: dict[str, list[dict[str, Any]]] = {}
    for item in workloads:
        if not isinstance(item, dict):
            continue
        ns = item.get("namespace") or ""
        name = item.get("workload") or ""
        if not ns or not name:
            continue
        by_ns.setdefault(ns, []).append(item)
    for ns, items in sorted(by_ns.items()):
        items.sort(
            key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or "")
        )
        parts = []
        for entry in items[:2]:
            name = entry.get("workload") or ""
            pods = entry.get("pods_total")
            primary = entry.get("primary_node")
            label = f"{name}({pods})" if pods is not None else name
            if primary:
                label = f"{label}@{primary}"
            if label:
                parts.append(label)
        if parts:
            lines.append(f"workloads_top_{ns}: " + "; ".join(parts))
 def _append_lexicon(lines: list[str], summary: dict[str, Any]) -> None:
    lexicon = summary.get("lexicon")
    if not isinstance(lexicon, dict):
        return
    terms = lexicon.get("terms") if isinstance(lexicon.get("terms"), list) else []
    aliases = lexicon.get("aliases") if isinstance(lexicon.get("aliases"), dict) else {}
    for entry in terms[:8]:
        if not isinstance(entry, dict):
            continue
        term = entry.get("term")
        meaning = entry.get("meaning")
        if term and meaning:
            lines.append(f"lexicon_term: {term} => {meaning}")
    for key, value in list(aliases.items())[:6]:
        if key and value:
            lines.append(f"lexicon_alias: {key} => {value}")
 def _append_cross_stats(lines: list[str], summary: dict[str, Any]) -> None:  # noqa: C901
    cross_stats = summary.get("cross_stats")
    if not isinstance(cross_stats, dict):
        return
    node_entries = cross_stats.get("node_metric_top") if isinstance(cross_stats.get("node_metric_top"), list) else []
    for entry in node_entries[:10]:
        if not isinstance(entry, dict):
            continue
        metric = entry.get("metric")
        node = entry.get("node")
        value = entry.get("value")
        cpu = entry.get("cpu")
        ram = entry.get("ram")
        net = entry.get("net")
        io = entry.get("io")
        pods = entry.get("pods_total")
        if metric and node:
            parts = [
                f"value={_format_float(value)}",
                f"cpu={_format_float(cpu)}",
                f"ram={_format_float(ram)}",
                f"net={_format_float(net)}",
                f"io={_format_float(io)}",
            ]
            if pods is not None:
                parts.append(f"pods={pods}")
            lines.append(f"cross_node_{metric}: {node} " + " ".join(parts))
    ns_entries = cross_stats.get("namespace_metric_top") if isinstance(cross_stats.get("namespace_metric_top"), list) else []
    for entry in ns_entries[:10]:
        if not isinstance(entry, dict):
            continue
        metric = entry.get("metric")
        namespace = entry.get("namespace")
        value = entry.get("value")
        pods = entry.get("pods_total")
        cpu_ratio = entry.get("cpu_ratio")
        mem_ratio = entry.get("mem_ratio")
        if metric and namespace:
            parts = [
                f"value={_format_float(value)}",
                f"cpu_ratio={_format_float(cpu_ratio)}",
                f"mem_ratio={_format_float(mem_ratio)}",
            ]
            if pods is not None:
                parts.append(f"pods={pods}")
            lines.append(f"cross_namespace_{metric}: {namespace} " + " ".join(parts))
    pvc_entries = cross_stats.get("pvc_top") if isinstance(cross_stats.get("pvc_top"), list) else []
    for entry in pvc_entries[:5]:
        if not isinstance(entry, dict):
            continue
        namespace = entry.get("namespace")
        pvc = entry.get("pvc")
        used = entry.get("used_percent")
        if namespace and pvc:
            lines.append(f"cross_pvc_usage: {namespace}/{pvc} used={_format_float(used)}")
 __all__ = [name for name in globals() if not name.startswith("__")]
--- a/atlasbot/snapshot/builder/summary_text.py
+++ b/atlasbot/snapshot/builder/summary_text.py
@ -0,0 +1,72 @@
 from __future__ import annotations
 from typing import Any
 from .core_a import *
 from .core_b import *
 from .format_a import *
 from .format_b import *
 from .format_c import *
 def summary_text(snapshot: dict[str, Any] | None) -> str:
    """Render the snapshot summary into deterministic prompt text."""
    summary = build_summary(snapshot)
    if not summary:
        return ""
    lines: list[str] = []
    lines.append("atlas_cluster: Titan Lab Atlas Kubernetes cluster (internal).")
    collected_at = snapshot.get("collected_at") if isinstance(snapshot, dict) else None
    snapshot_version = snapshot.get("snapshot_version") if isinstance(snapshot, dict) else None
    if collected_at or snapshot_version:
        bits = []
        if collected_at:
            bits.append(f"collected_at={collected_at}")
        if snapshot_version:
            bits.append(f"version={snapshot_version}")
        lines.append("snapshot: " + ", ".join(bits))
    _append_nodes(lines, summary)
    _append_hardware(lines, summary)
    _append_hardware_groups(lines, summary)
    _append_lexicon(lines, summary)
    _append_pressure(lines, summary)
    _append_node_facts(lines, summary)
    _append_node_ages(lines, summary)
    _append_node_taints(lines, summary)
    _append_capacity(lines, summary)
    _append_pods(lines, summary)
    _append_namespace_pods(lines, summary)
    _append_namespace_nodes(lines, summary)
    _append_node_pods(lines, summary)
    _append_pod_issues(lines, summary)
    _append_pod_issue_summary(lines, summary)
    _append_workload_health(lines, summary)
    _append_events(lines, summary)
    _append_node_usage_stats(lines, summary)
    _append_namespace_usage(lines, summary)
    _append_namespace_requests(lines, summary)
    _append_namespace_io_net(lines, summary)
    _append_pod_usage(lines, summary)
    _append_restarts(lines, summary)
    _append_job_failures(lines, summary)
    _append_jobs(lines, summary)
    _append_postgres(lines, summary)
    _append_hottest(lines, summary)
    _append_pvc_usage(lines, summary)
    _append_root_disk_headroom(lines, summary)
    _append_namespace_capacity_summary(lines, summary)
    _append_baseline_deltas(lines, summary)
    _append_longhorn(lines, summary)
    _append_workloads(lines, summary)
    _append_topology(lines, summary)
    _append_workloads_by_namespace(lines, summary)
    _append_node_load_summary(lines, summary)
    _append_cluster_watchlist(lines, summary)
    _append_hardware_usage(lines, summary)
    _append_cross_stats(lines, summary)
    _append_flux(lines, summary)
    _append_signals(lines, summary)
    _append_profiles(lines, summary)
    _append_units_windows(lines, summary)
    return "\n".join(lines)
--- a/atlasbot/state/store.py
+++ b/atlasbot/state/store.py
@ -6,6 +6,17 @@ from typing import Any
 class ClaimStore:
    """Persist conversation claims for follow-up answers.
    Why:
    - keep short-lived conversation state durable across turns without
      forcing the answer engine to own storage mechanics.
    Input/Output:
    - accepts a SQLite path and TTL, stores claim payloads, and returns
      normalized payload dictionaries when queried.
    """
    def __init__(self, path: str, ttl_sec: int) -> None:
        self._path = path or ":memory:"
        self._ttl = max(60, ttl_sec)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,21 @@
 [tool.pytest.ini_options]
 testpaths = ["tests", "testing"]
 pythonpath = ["."]
 [tool.ruff]
 line-length = 100
 target-version = "py312"
 [tool.ruff.lint]
 select = ["E", "F", "W", "B", "C90", "I", "PLR", "RUF", "SIM", "UP", "ARG"]
 ignore = ["E501"]
 [tool.ruff.lint.per-file-ignores]
 "atlasbot/engine/answerer/*.py" = ["F403", "F405", "I001"]
 "atlasbot/engine/answerer/__init__.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
 "atlasbot/matrix/bot.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
 "atlasbot/snapshot/builder/__init__.py" = ["F403", "F405", "I001"]
 "atlasbot/snapshot/builder/*.py" = ["F403", "F405", "I001"]
 "testing/*.py" = ["PLR0911", "ARG002", "PLR2004"]
 "tests/*.py" = ["PLR2004", "I001", "ARG001", "ARG002", "ARG005", "C901", "PLR0915", "UP037"]
 "scripts/*.py" = ["PLR0911", "PLR2004"]
--- a/scripts/check_coverage.py
+++ b/scripts/check_coverage.py
@ -0,0 +1,42 @@
 #!/usr/bin/env python3
 """Enforce per-file coverage thresholds from SlipCover JSON output."""
 from __future__ import annotations
 import argparse
 import json
 from pathlib import Path
 def main() -> int:
    """Check each production file against a minimum coverage percentage."""
    parser = argparse.ArgumentParser()
    parser.add_argument("coverage_json")
    parser.add_argument("--root", default="atlasbot")
    parser.add_argument("--threshold", type=float, default=95.0)
    args = parser.parse_args()
    data = json.loads(Path(args.coverage_json).read_text(encoding="utf-8"))
    files = data.get("files") if isinstance(data, dict) else {}
    violations: list[tuple[float, str]] = []
    for path, payload in sorted(files.items()):
        if not path.startswith(f"{args.root}/"):
            continue
        summary = payload.get("summary") if isinstance(payload, dict) else {}
        percent = summary.get("percent_covered") if isinstance(summary, dict) else None
        if not isinstance(percent, (int, float)):
            continue
        if float(percent) < args.threshold:
            violations.append((float(percent), path))
    if violations:
        for percent, path in sorted(violations):
            print(f"{path}: {percent:.2f}% < {args.threshold:.2f}%")
        return 1
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/scripts/check_docstrings.py
+++ b/scripts/check_docstrings.py
@ -0,0 +1,83 @@
 #!/usr/bin/env python3
 """Require docstrings on public production APIs."""
 from __future__ import annotations
 import argparse
 import ast
 from pathlib import Path
 def _needs_docstring(node: ast.AST, *, parent_class: str | None = None) -> bool:
    """Decide whether `node` should carry a contract docstring."""
    if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
        name = node.name
        if name.startswith("_") and name != "__init__":
            return False
        return not (parent_class and name.startswith("_"))
    if isinstance(node, ast.ClassDef):
        if node.name.startswith("_"):
            return False
        if any(
            (isinstance(dec, ast.Name) and dec.id == "dataclass")
            or (isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name) and dec.func.id == "dataclass")
            for dec in node.decorator_list
        ):
            return False
        if any(
            isinstance(base, ast.Name) and base.id in {"Exception", "RuntimeError", "BaseException"}
            for base in node.bases
        ):
            return False
        return not any(isinstance(base, ast.Name) and base.id == "BaseModel" for base in node.bases)
    return False
 def _iter_nodes(tree: ast.AST) -> list[tuple[ast.AST, str | None]]:
    """Yield top-level public nodes only.
    The gate focuses on the module surface area rather than every internal
    method so we can keep contracts on the actual API seams.
    """
    items: list[tuple[ast.AST, str | None]] = []
    for node in getattr(tree, "body", []):
        items.append((node, None))
    return items
 def main() -> int:
    """Check modules under the production package and report missing contracts."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--root", default="atlasbot")
    args = parser.parse_args()
    root = Path(args.root)
    violations: list[str] = []
    for path in sorted(root.rglob("*.py")):
        if "__pycache__" in path.parts or ".venv" in path.parts:
            continue
        tree = ast.parse(path.read_text(encoding="utf-8"))
        for node, parent_class in _iter_nodes(tree):
            if not _needs_docstring(node, parent_class=parent_class):
                continue
            doc = ast.get_docstring(node)
            if doc:
                continue
            if isinstance(node, ast.ClassDef):
                violations.append(f"{path}: class {node.name} is missing a docstring")
            elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                owner = f"{parent_class}." if parent_class else ""
                violations.append(f"{path}: {owner}{node.name} is missing a docstring")
    if violations:
        for item in violations:
            print(item)
        return 1
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/scripts/check_file_sizes.py
+++ b/scripts/check_file_sizes.py
@ -0,0 +1,70 @@
 #!/usr/bin/env python3
 """Fail when production Python files exceed the configured line budget.
 The gate is intentionally narrow:
 - it only checks the `atlasbot/` package tree;
 - it treats each file independently;
 - it keeps the threshold explicit so CI can ratchet without guesswork.
 """
 from __future__ import annotations
 import argparse
 from pathlib import Path
 def _count_lines(path: Path) -> int:
    """Return the physical line count for `path`.
    Input:
    - `path`: a readable Python source file.
    Output:
    - The number of newline-delimited lines in the file.
    """
    return len(path.read_text(encoding="utf-8").splitlines())
 def _iter_python_files(root: Path) -> list[Path]:
    """List production Python files under `root`.
    Input:
    - `root`: repository package root to scan.
    Output:
    - Sorted Python file paths, excluding bytecode and hidden caches.
    """
    return sorted(
        path
        for path in root.rglob("*.py")
        if path.is_file() and "__pycache__" not in path.parts and ".venv" not in path.parts
    )
 def main() -> int:
    """Run the size gate and return a process exit code."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--root", default="atlasbot")
    parser.add_argument("--max-lines", type=int, default=500)
    args = parser.parse_args()
    root = Path(args.root)
    violations: list[tuple[int, Path]] = []
    for path in _iter_python_files(root):
        lines = _count_lines(path)
        if lines > args.max_lines:
            violations.append((lines, path))
    if violations:
        for lines, path in sorted(violations, reverse=True):
            print(f"{path}: {lines} lines (limit {args.max_lines})")
        return 1
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/testing/init.py
+++ b/testing/init.py
@ -0,0 +1,2 @@
 """Shared testing helpers for atlasbot."""
--- a/testing/fakes.py
+++ b/testing/fakes.py
@ -0,0 +1,108 @@
 """Reusable test doubles and settings factories."""
 from __future__ import annotations
 import asyncio
 from atlasbot.config import Settings
 class FakeLLM:
    """Deterministic LLM double for pipeline tests.
    Why:
    - keeps the answer engine tests fast and predictable.
    Input/Output:
    - accepts the same `chat()` signature as the real client;
    - returns canned JSON or text snippets based on the prompt content.
    """
    def __init__(self) -> None:
        self.calls: list[str] = []
    async def chat(self, messages, *, model=None, timeout_sec=None):
        """Return a prompt-shaped response and remember the last user prompt."""
        prompt = messages[-1]["content"]
        self.calls.append(prompt)
        if "normalized" in prompt and "keywords" in prompt:
            return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
        if "needs_snapshot" in prompt:
            return '{"needs_snapshot": true, "answer_style": "direct"}'
        if "sub-questions" in prompt:
            return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
        if "sub-question" in prompt:
            return "Atlas has 22 nodes."
        if "Answer using only the Fact Sheet" in prompt:
            return "Atlas has 22 nodes."
        if "final response" in prompt:
            return "Atlas has 22 nodes."
        if "Score response quality" in prompt:
            return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
        if "claims list" in prompt:
            return '{"claims": []}'
        return "{}"
 class SlowFakeLLM(FakeLLM):
    """Variant that sleeps briefly so timeout guards can be exercised."""
    async def chat(self, messages, *, model=None, timeout_sec=None):
        """Delay before answering to make budget handling deterministic."""
        await asyncio.sleep(0.02)
        return await super().chat(messages, model=model, timeout_sec=timeout_sec)
 def build_test_settings() -> Settings:
    """Create a fully populated `Settings` instance for unit tests."""
    return Settings(
        matrix_base="",
        auth_base="",
        bot_user="",
        bot_pass="",
        room_alias="",
        server_name="",
        bot_mentions=(),
        matrix_bots=(),
        ollama_url="",
        ollama_model="base",
        ollama_model_fast="fast",
        ollama_model_smart="smart",
        ollama_model_genius="genius",
        ollama_fallback_model="",
        ollama_timeout_sec=1.0,
        ollama_retries=0,
        ollama_api_key="",
        http_port=8090,
        internal_token="",
        kb_dir="",
        vm_url="",
        ariadne_state_url="",
        ariadne_state_token="",
        snapshot_ttl_sec=30,
        thinking_interval_sec=30,
        quick_time_budget_sec=15.0,
        smart_time_budget_sec=45.0,
        genius_time_budget_sec=180.0,
        conversation_ttl_sec=300,
        snapshot_pin_enabled=False,
        queue_enabled=False,
        nats_url="",
        nats_stream="",
        nats_subject="",
        nats_result_bucket="",
        fast_max_angles=1,
        smart_max_angles=1,
        genius_max_angles=1,
        fast_max_candidates=1,
        smart_max_candidates=1,
        genius_max_candidates=1,
        fast_llm_calls_max=9,
        smart_llm_calls_max=17,
        genius_llm_calls_max=32,
        llm_limit_multiplier=1.5,
        state_db_path="/tmp/atlasbot_test_state.db",
    )
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@ -1,98 +1,21 @@
 """Answer-engine regression tests."""
 from __future__ import annotations
 import asyncio
 from dataclasses import replace
 from atlasbot.engine.answerer import AnswerEngine
 from atlasbot.knowledge.loader import KnowledgeBase
 from atlasbot.snapshot.builder import SnapshotProvider
-from atlasbot.config import Settings
+from testing.fakes import FakeLLM, SlowFakeLLM, build_test_settings
-class FakeLLM:
+def test_engine_answer_basic() -> None:
-    def __init__(self) -> None:
+    """The quick path should answer from the fact sheet."""
        self.calls: list[str] = []
    async def chat(self, messages, *, model=None, timeout_sec=None):
        prompt = messages[-1]["content"]
        self.calls.append(prompt)
        if "normalized" in prompt and "keywords" in prompt:
            return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
        if "needs_snapshot" in prompt:
            return '{"needs_snapshot": true, "answer_style": "direct"}'
        if "sub-questions" in prompt:
            return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
        if "sub-question" in prompt:
            return "Atlas has 22 nodes."
        if "Answer using only the Fact Sheet" in prompt:
            return "Atlas has 22 nodes."
        if "final response" in prompt:
            return "Atlas has 22 nodes."
        if "Score response quality" in prompt:
            return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
        if "claims list" in prompt:
            return '{"claims": []}'
        return "{}"
 class SlowFakeLLM(FakeLLM):
    async def chat(self, messages, *, model=None, timeout_sec=None):
        await asyncio.sleep(0.02)
        return await super().chat(messages, model=model, timeout_sec=timeout_sec)
 def _settings() -> Settings:
    return Settings(
        matrix_base="",
        auth_base="",
        bot_user="",
        bot_pass="",
        room_alias="",
        server_name="",
        bot_mentions=(),
        matrix_bots=(),
        ollama_url="",
        ollama_model="base",
        ollama_model_fast="fast",
        ollama_model_smart="smart",
        ollama_model_genius="genius",
        ollama_fallback_model="",
        ollama_timeout_sec=1.0,
        ollama_retries=0,
        ollama_api_key="",
        http_port=8090,
        internal_token="",
        kb_dir="",
        vm_url="",
        ariadne_state_url="",
        ariadne_state_token="",
        snapshot_ttl_sec=30,
        thinking_interval_sec=30,
        quick_time_budget_sec=15.0,
        smart_time_budget_sec=45.0,
        genius_time_budget_sec=180.0,
        conversation_ttl_sec=300,
        snapshot_pin_enabled=False,
        queue_enabled=False,
        nats_url="",
        nats_stream="",
        nats_subject="",
        nats_result_bucket="",
        fast_max_angles=1,
        smart_max_angles=1,
        genius_max_angles=1,
        fast_max_candidates=1,
        smart_max_candidates=1,
        genius_max_candidates=1,
        fast_llm_calls_max=9,
        smart_llm_calls_max=17,
        genius_llm_calls_max=32,
        llm_limit_multiplier=1.5,
        state_db_path="/tmp/atlasbot_test_state.db",
    )
 def test_engine_answer_basic():
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -101,9 +24,11 @@ def test_engine_answer_basic():
    assert "Atlas has 22 nodes" in result.reply
-def test_smart_mode_uses_factsheet_path():
+def test_smart_mode_uses_factsheet_path() -> None:
    """Smart mode should stay on the factsheet branch for direct cluster questions."""
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -113,9 +38,11 @@ def test_smart_mode_uses_factsheet_path():
    assert "time budget" not in result.reply.lower()
-def test_genius_mode_uses_factsheet_path():
+def test_genius_mode_uses_factsheet_path() -> None:
    """Genius mode should also return the factsheet answer for the same query."""
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -125,9 +52,11 @@ def test_genius_mode_uses_factsheet_path():
    assert "time budget" not in result.reply.lower()
-def test_plain_math_question_is_rejected_for_cluster_modes():
+def test_plain_math_question_is_rejected_for_cluster_modes() -> None:
    """The bot should keep users on cluster questions instead of generic math."""
    llm = FakeLLM()
-    settings = _settings()
+    settings = build_test_settings()
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
@ -136,9 +65,11 @@ def test_plain_math_question_is_rejected_for_cluster_modes():
    assert "focus on Titan cluster operations" in result.reply
-def test_quick_mode_time_budget_guard():
+def test_quick_mode_time_budget_guard() -> None:
    """A slow model call should trip the quick-mode budget guard."""
    llm = SlowFakeLLM()
-    settings = replace(_settings(), quick_time_budget_sec=0.01)
+    settings = replace(build_test_settings(), quick_time_budget_sec=0.01)
    kb = KnowledgeBase("")
    snapshot = SnapshotProvider(settings)
    engine = AnswerEngine(settings, llm, kb, snapshot)
--- a/tests/test_quality_gate_paths.py
+++ b/tests/test_quality_gate_paths.py
@ -0,0 +1,810 @@
 """Targeted quality-gate coverage for runtime and answerer orchestration."""
 from __future__ import annotations
 import asyncio
 import json
 from dataclasses import replace
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
 import httpx
 import pytest
 from atlasbot.api.http import Api, AnswerRequest
 from atlasbot.config import MatrixBotConfig
 from atlasbot.engine.answerer import (
    AnswerEngine,
    AnswerResult,
    AnswerScores,
    ClaimItem,
    EvidenceItem,
    ModePlan,
 )
 from atlasbot.engine.answerer.common import _mode_plan
 from atlasbot.engine.answerer.engine import AnswerEngine as EngineClass
 from atlasbot.engine.answerer.workflow import run_answer
 from atlasbot.engine.answerer.workflow_post import finalize_answer
 from atlasbot.knowledge.loader import KnowledgeBase
 from atlasbot.llm.client import LLMClient, LLMError, parse_json
 from atlasbot.main import result_scores
 from atlasbot.matrix.bot import MatrixBot, MatrixClient
 from atlasbot.queue.nats import QueueManager
 from atlasbot.snapshot.builder import SnapshotProvider, build_summary
 from testing.fakes import build_test_settings
 from tests.test_support_modules import _rich_snapshot
 class StaticSnapshot:
    """Return a fixed snapshot for answer-engine tests."""
    def __init__(self, payload: dict[str, Any]) -> None:
        self._payload = payload
    def get(self) -> dict[str, Any]:
        """Return the stored snapshot payload."""
        return self._payload
 class PromptLLM:
    """Map prompt fragments to canned responses for workflow tests."""
    def __init__(self) -> None:
        self.calls: list[tuple[str, str]] = []
    async def chat(
        self,
        messages: list[dict[str, str]],
        *,
        model: str | None = None,
        timeout_sec: float | None = None,
    ) -> str:
        """Return the scripted response for the latest user prompt."""
        del timeout_sec
        system = messages[0]["content"]
        prompt = messages[-1]["content"]
        self.calls.append((model or "", prompt))
        if "Given chunk summaries, score relevance" in prompt:
            items = []
            for line in prompt.splitlines():
                if line.startswith("- c"):
                    chunk_id = line.split()[1].rstrip(":")
                    score = 95 if "cpu" in line.lower() or "synapse" in line.lower() else 80
                    items.append({"id": chunk_id, "score": score, "reason": "relevant"})
            return json.dumps(items or [{"id": "c0", "score": 90, "reason": "relevant"}])
        direct = self._direct_response(prompt)
        if direct is not None:
            return direct
        response = self._lookup_response(system, prompt)
        if response is not None:
            return response
        raise AssertionError(f"Unhandled prompt:\nSYSTEM={system}\nPROMPT={prompt}")
    def _direct_response(self, prompt: str) -> str | None:
        """Return direct string responses for a few prompt families."""
        if "Answer the sub-question using the context" in prompt:
            return "The best runbook path is runbooks/fix.md." if "runbook" in prompt.lower() else "synapse is hottest with cpu 95 on titan-01."
        markers = [
            ("Write a final response to the user", "titan-99 is hottest and the runbook is runbooks/wrong.md."),
            ("Draft:", "synapse is hottest at cpu 95 on titan-01, and amd64 nodes remain separate from raspberry hardware."),
            ("Return JSON with fields: issues", '{"issues":["mention the exact runbook"],"missing_data":[],"risky_claims":[]}'),
            ("command (string), rationale", '{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}'),
            ("confidence (0-100)", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
        ]
        for marker, response in markers:
            if marker in prompt:
                if marker == "Draft:" and "If Facts are provided" not in prompt:
                    continue
                return response
        return None
    def _lookup_response(self, system: str, prompt: str) -> str | None:
        """Return canned responses for prompt markers."""
        del system
        markers = [
            (
                "normalized (string), keywords",
                '{"normalized":"Which namespace is hottest on raspberry hardware and which runbook should I use?","keywords":["namespace","hottest","cpu","raspberry","runbook"]}',
            ),
            (
                "needs_snapshot (bool)",
                '{"needs_snapshot":true,"needs_kb":true,"needs_tool":true,"answer_style":"insightful","follow_up":false,"question_type":"open_ended","focus_entity":"namespace","focus_metric":"cpu"}',
            ),
            (
                "Generate up to",
                '[{"id":"q1","question":"Which namespace is hottest?","priority":5,"kind":"metric"},{"id":"q2","question":"Which runbook applies?","priority":4,"kind":"context"}]',
            ),
            ("Choose the run that best aligns", '{"selected_index": 1}'),
            ("AvailableKeys:", '{"keys":["namespace_cpu_top","namespace_pods","hardware_nodes"]}'),
            ("Return JSON with field: missing", '{"missing":[]}'),
            ("Return JSON with fields: prefixes", '{"prefixes":["namespace","hottest"]}'),
            ("fact_types", '{"fact_types":["namespace_cpu_top","hardware_nodes"]}'),
            ("Return JSON with field: signals", '{"signals":["cpu","synapse","raspberry"]}'),
            (
                "Signals:",
                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
            ),
            (
                "Return JSON with field: lines",
                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
            ),
            (
                "CandidateFacts:",
                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
            ),
            (
                "FactCandidates:",
                '{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
            ),
            (
                "Suggest a safe, read-only command",
                '{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}',
            ),
            ("Pick the best candidate for accuracy and grounding", '{"best": 1}'),
            ("Pick the best draft for accuracy", '{"best": 1}'),
            ("Pick the best runbook path", '{"path":"runbooks/fix.md"}'),
            ("Check the draft against the context", "synapse is hottest on titan-01, but see runbooks/wrong.md."),
            ("Answer using the fact", "Latest metrics: namespace_cpu_top: synapse=95."),
            ("Rewrite the draft to only include claims supported by FactsUsed", "synapse is hottest on titan-01."),
            ("Check if an open-ended answer includes at least two concrete signals", '{"ok": false, "reason": "needs more detail"}'),
            ("ok (bool), reason (string)", '{"ok": false, "reason": "needs more detail"}'),
            ("Rewrite the answer using the critique", "synapse is hottest at cpu 95 on titan-01. Use runbooks/fix.md."),
            ("Return JSON with field: note", '{"note":"The answer would benefit from per-pod CPU samples."}'),
            ("Score response quality", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
            (
                "Return JSON with fields: confidence (0-100), relevance (0-100), satisfaction (0-100), hallucination_risk (low|medium|high).",
                '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}',
            ),
            (
                "claims list",
                '{"claims":[{"id":"c1","claim":"synapse is hottest","evidence":[{"path":"hottest.cpu.node","reason":"snapshot"}]}]}',
            ),
            ("Select the claims most relevant", '{"claim_ids":["c1"]}'),
            ("Follow-up:", "titan-99 is still hottest."),
            ("Rewrite the answer to be concise and directly answer the question", "Latest metrics: namespace_cpu_top: synapse=95."),
            ("Deduplicate repeated statements", "Latest metrics: namespace_cpu_top: synapse=95."),
            ("Answer using only the Fact Sheet", "Fact sheet answer: namespace_cpu_top: synapse=95. Use runbooks/fix.md."),
        ]
        for marker, response in markers:
            if marker in prompt:
                return response
        return None
 class TimeoutLLM:
    """Raise a timeout as soon as the workflow makes an LLM call."""
    async def chat(
        self,
        messages: list[dict[str, str]],
        *,
        model: str | None = None,
        timeout_sec: float | None = None,
    ) -> str:
        """Trigger the workflow timeout handling branch."""
        del messages, model, timeout_sec
        raise TimeoutError("boom")
 class LimitLLM(PromptLLM):
    """Reuse prompt handling while allowing the workflow to hit call caps."""
 def _settings(tmp_path: Path, **overrides: Any):
    """Build settings with an isolated claim-store path."""
    return replace(build_test_settings(), state_db_path=str(tmp_path / "state.db"), **overrides)
 def _make_engine(tmp_path: Path, llm: Any, **setting_overrides: Any) -> AnswerEngine:
    """Construct a real engine with static snapshot and KB doubles."""
    settings = _settings(tmp_path, **setting_overrides)
    snapshot = StaticSnapshot(_rich_snapshot())
    kb = KnowledgeBase("")
    kb.summary = lambda: "KB summary."  # type: ignore[method-assign]
    kb.runbook_titles = lambda limit=5: "Relevant runbooks:\n- Fix (runbooks/fix.md)"  # type: ignore[method-assign]
    kb.runbook_paths = lambda limit=10: ["runbooks/fix.md"]  # type: ignore[method-assign]
    kb.chunk_lines = lambda max_files=20, max_chars=6000: [  # type: ignore[method-assign]
        "runbooks/fix.md",
        "namespace_cpu_top: synapse=95",
        "hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
    ]
    return AnswerEngine(settings, llm, kb, snapshot)  # type: ignore[arg-type]
 def test_engine_helper_methods_cover_state_and_followup(tmp_path: Path) -> None:
    """Cover answer-engine helper branches outside the main workflow."""
    settings = _settings(tmp_path)
    class StockLLM:
        async def chat(self, messages, *, model=None, timeout_sec=None):
            del messages, model, timeout_sec
            return "stock reply"
    engine = EngineClass(settings, StockLLM(), KnowledgeBase(""), StaticSnapshot(_rich_snapshot()))
    async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
        del _system, context, model
        static = {
            "draft_select": '{"best": 2}',
            "score": '{"confidence":90,"relevance":91,"satisfaction":92,"hallucination_risk":"low"}',
            "claim_map": '{"claims":[{"id":"c1","claim":"cpu is high","evidence":[{"path":"hottest.cpu.node","reason":"why"},{"path":"","reason":"skip"}]},"bad"]}',
            "select_claims": '{"claim_ids":["c1"]}',
            "followup": "titan-99 is hottest. The draft is correct.",
            "followup_fix": "titan-01 is hottest.",
            "dedup_followup": "The draft is correct. titan-01 is hottest.",
            "dedup": "deduped",
        }
        if tag == "synth":
            return "draft one" if "DraftIndex: 1" in _prompt else "draft two"
        if tag in static:
            return static[tag]
        raise AssertionError(tag)
    stock = asyncio.run(engine._answer_stock("hello"))
    assert stock.reply == "stock reply"
    plan = replace(_mode_plan(settings, "smart"), drafts=2, parallelism=2)
    synth = asyncio.run(
        engine._synthesize_answer(
            "Which node is hottest?",
            ["draft one", "draft two"],
            "ctx",
            {"question_type": "metric", "answer_style": "direct"},
            plan,
            call_llm,
        )
    )
    synth_empty = asyncio.run(
        engine._synthesize_answer(
            "Which node is hottest?",
            [],
            "ctx",
            {"question_type": "metric", "answer_style": "direct"},
            replace(plan, drafts=1, parallelism=1),
            call_llm,
        )
    )
    assert synth == "draft two"
    assert synth_empty == "draft two"
    scored = asyncio.run(engine._score_answer("q", "a", plan, call_llm))
    assert scored.hallucination_risk == "low"
    assert asyncio.run(engine._score_answer("q", "a", replace(plan, use_scores=False), call_llm)).confidence == 60
    summary = build_summary(_rich_snapshot())
    claims = asyncio.run(engine._extract_claims("q", "a", summary, ["fact"], call_llm))
    assert claims and claims[0].evidence[0].path == "hottest.cpu.node"
    assert asyncio.run(engine._extract_claims("q", "", summary, [], call_llm)) == []
    assert asyncio.run(engine._dedup_reply("one. one. one.", plan, call_llm, "dedup")) == "deduped"
    assert asyncio.run(engine._dedup_reply("single answer", plan, call_llm, "dedup")) == "single answer"
    engine._store_state("conv-1", claims, summary, _rich_snapshot(), True)
    state = engine._get_state("conv-1")
    assert state and state.snapshot
    assert engine._get_state(None) is None
    engine._cleanup_state()
    followup = asyncio.run(
        engine._answer_followup(
            "Which hardware hotspot is there?",
            state,
            summary,
            {"question_type": "diagnostic"},
            plan,
            call_llm,
        )
    )
    assert "titan-01" in followup
    assert asyncio.run(engine._select_claims("what about that?", claims, plan, call_llm)) == ["c1"]
    assert asyncio.run(engine._select_claims("what about that?", [], plan, call_llm)) == []
 def test_finalize_answer_covers_post_processing_branches(tmp_path: Path) -> None:
    """Exercise evidence-fix, runbook, guard, critic, and gap paths."""
    settings = _settings(tmp_path)
    plan = replace(_mode_plan(settings, "smart"), use_gap=True, use_critic=True)
    summary = build_summary(_rich_snapshot())
    summary_lines = [
        "namespace_cpu_top: synapse=95",
        "hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
        "runbooks/fix.md",
    ]
    observed: list[tuple[str, str]] = []
    async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
        del _system, context, model
        responses = {
            "runbook_select": '{"path":"runbooks/fix.md"}',
            "evidence_fix": "titan-99 is hottest and see runbooks/wrong.md.",
            "evidence_fix_enforce": "titan-99 is hottest and see runbooks/wrong.md.",
            "metric_direct": "no numbers here",
            "runbook_enforce": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
            "evidence_guard": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
            "focus_fix": "Latest metrics: namespace_cpu_top: synapse=95.",
            "insight_guard": '{"ok": false, "reason": "needs more detail"}',
            "insight_fix": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
            "critic": '{"issues":["too vague"]}',
            "revise": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
            "gap": '{"note":"The answer would benefit from per-pod CPU samples."}',
        }
        if tag not in responses:
            raise AssertionError(_prompt)
        return responses[tag]
    class FinalizeEngine:
        async def _synthesize_answer(self, *args: Any) -> str:
            return "titan-99 is hottest and see runbooks/wrong.md."
        async def _dedup_reply(self, reply: str, _plan: ModePlan, _call_llm, tag: str) -> str:
            assert tag == "dedup"
            return reply
        async def _score_answer(self, _question: str, _reply: str, _plan: ModePlan, _call_llm) -> AnswerScores:
            return AnswerScores(80, 81, 82, "low")
        async def _extract_claims(self, _question: str, _reply: str, _summary: dict[str, Any], _facts_used: list[str], _call_llm) -> list[ClaimItem]:
            return [ClaimItem(id="c1", claim="cpu high", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot")])]
    reply, scores, claims = asyncio.run(
        finalize_answer(
            engine=FinalizeEngine(),
            call_llm=call_llm,
            normalized="Which namespace is hottest on raspberry hardware and which runbook should I use?",
            subanswers=["synapse is hottest"],
            context="ctx",
            classify={"question_type": "open_ended", "answer_style": "direct"},
            plan=plan,
            summary=summary,
            summary_lines=summary_lines,
            metric_facts=["namespace_cpu_top: synapse=95"],
            key_facts=["namespace_cpu_top: synapse=95"],
            facts_used=["hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"],
            allowed_nodes=["titan-01", "titan-02"],
            allowed_namespaces=["synapse"],
            runbook_paths=["runbooks/fix.md"],
            lowered_question="which namespace is hottest on raspberry hardware and which runbook should i use?",
            force_metric=True,
            keyword_tokens=["namespace", "cpu", "raspberry"],
            question_tokens=["namespace", "cpu", "raspberry"],
            snapshot_context="ClusterSnapshot:\nnamespace_cpu_top: synapse=95",
            observer=lambda stage, note: observed.append((stage, note)),
            mode="smart",
            metric_keys=["namespace_cpu_top"],
        )
    )
    assert "runbooks/fix.md" in reply
    assert "synapse=95" in reply
    assert scores.confidence == 80
    assert claims and claims[0].id == "c1"
    assert ("evidence_fix", "repairing missing evidence") in observed
    assert ("critic", "reviewing") in observed
    assert ("gap", "checking gaps") in observed
 def test_run_answer_deep_workflow_persists_state(tmp_path: Path) -> None:
    """Drive the full smart workflow through retrieval, synthesis, and post-processing."""
    engine = _make_engine(tmp_path, PromptLLM())
    observed: list[tuple[str, str]] = []
    result = asyncio.run(
        run_answer(
            engine,
            "Run limitless Which namespace is hottest on raspberry hardware and which runbook should I use?",
            mode="smart",
            history=[{"q": "before", "a": "earlier"}],
            observer=lambda stage, note: observed.append((stage, note)),
            conversation_id="room-1",
            snapshot_pin=True,
        )
    )
    assert "runbooks/fix.md" in result.reply
    assert result.meta["tool_hint"]["command"] == "kubectl top pods -n synapse"
    state = engine._get_state("room-1")
    assert state and state.claims and state.snapshot
    stages = {stage for stage, _note in observed}
    assert {"normalize", "route", "retrieve", "tool", "subanswers", "synthesize"} <= stages
 def test_run_answer_followup_and_limits(tmp_path: Path) -> None:
    """Cover follow-up routing, reasoning limit, and timeout fallbacks."""
    class FollowupLLM(PromptLLM):
        def _lookup_response(self, system: str, prompt: str) -> str | None:
            if "normalized (string), keywords" in prompt:
                return '{"normalized":"What about that?","keywords":["that"]}'
            if "needs_snapshot (bool)" in prompt:
                return '{"needs_snapshot":true,"needs_kb":false,"needs_tool":false,"answer_style":"direct","follow_up":false,"question_type":"open_ended","focus_entity":"unknown","focus_metric":"unknown"}'
            if "Select the claims most relevant" in prompt:
                return '{"claim_ids":["c1"]}'
            if "Follow-up:" in prompt:
                return "titan-99 is still hottest."
            return super()._lookup_response(system, prompt)
    engine = _make_engine(tmp_path, FollowupLLM())
    summary = build_summary(_rich_snapshot())
    engine._store_state(
        "conv-1",
        [ClaimItem(id="c1", claim="synapse is hottest", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot", value_at_claim="titan-01")])],
        summary,
        _rich_snapshot(),
        True,
    )
    followup = asyncio.run(
        run_answer(
            engine,
            "Run limitless What about that?",
            mode="smart",
            conversation_id="conv-1",
            snapshot_pin=True,
        )
    )
    assert "titan-01" in followup.reply
    limit_engine = _make_engine(
        tmp_path / "limit",
        LimitLLM(),
        fast_llm_calls_max=1,
        llm_limit_multiplier=1.0,
    )
    limited = asyncio.run(run_answer(limit_engine, "tell me about cpu and runbooks", mode="custom"))
    assert "reasoning limit" in limited.reply
    assert limited.meta["llm_limit_hit"] is True
    timeout_engine = _make_engine(
        tmp_path / "timeout",
        TimeoutLLM(),
        smart_time_budget_sec=0.1,
        ollama_timeout_sec=0.1,
    )
    timed_out = asyncio.run(run_answer(timeout_engine, "Run limitless tell me about cpu and runbooks", mode="smart"))
    assert "time budget" in timed_out.reply.lower()
    assert timed_out.meta["time_budget_hit"] is True
 def test_api_matrix_queue_main_and_store_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
    """Exercise remaining API, Matrix, queue, main, and store branches."""
    settings = _settings(
        tmp_path,
        internal_token="secret",
        queue_enabled=True,
        matrix_bots=(MatrixBotConfig("bot", "pw", ("atlas",), "quick"),),
    )
    async def handler(
        question: str,
        mode: str,
        history: list[dict[str, str]] | None,
        conversation_id: str | None,
        snapshot_pin: bool | None,
    ) -> AnswerResult:
        del history, conversation_id, snapshot_pin
        return AnswerResult(question + ":" + mode, AnswerScores(1, 2, 3, "low"), {"mode": mode})
    api = Api(settings, handler)
    from fastapi.testclient import TestClient
    client = TestClient(api.app)
    assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={}).status_code == 400
    assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"content": "hi"}).json()["reply"] == "hi:quick"
    assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"question": "  "}).status_code == 400
    assert AnswerRequest(message=" hello ").message == " hello "
    class FakeResp:
        def __init__(self, payload: dict[str, Any], *, status_code: int = 200) -> None:
            self._payload = payload
            self.status_code = status_code
        def raise_for_status(self) -> None:
            if self.status_code >= 400:
                raise httpx.HTTPStatusError("bad", request=httpx.Request("GET", "http://x"), response=httpx.Response(self.status_code))
        def json(self) -> dict[str, Any]:
            return self._payload
    class MatrixAsyncClient:
        async def __aenter__(self) -> "MatrixAsyncClient":
            return self
        async def __aexit__(self, *exc: object) -> None:
            return None
        async def post(self, url: str, json: dict[str, Any] | None = None, headers: dict[str, str] | None = None) -> FakeResp:
            del json, headers
            if "login" in url:
                return FakeResp({"access_token": "tok"})
            return FakeResp({})
        async def get(self, url: str, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None) -> FakeResp:
            del headers, params
            if "directory/room" in url:
                return FakeResp({}, status_code=404)
            return FakeResp({"next_batch": "n1", "rooms": {"join": {}}})
    monkeypatch.setattr("atlasbot.matrix.bot.httpx.AsyncClient", lambda timeout=None: MatrixAsyncClient())
    matrix_client = MatrixClient(settings, settings.matrix_bots[0])
    assert asyncio.run(matrix_client.login()) == "tok"
    assert asyncio.run(matrix_client.resolve_room("tok")) == ""
    bot = MatrixBot(settings, settings.matrix_bots[0], SimpleNamespace(answer=None), handler)
    class BotClient:
        def __init__(self) -> None:
            self.sent: list[str] = []
            self.sync_calls = 0
        async def login(self) -> str:
            return "tok"
        async def resolve_room(self, token: str) -> str:
            del token
            return "!room"
        async def join_room(self, token: str, room_id: str) -> None:
            del token, room_id
        async def send_message(self, token: str, room_id: str, text: str) -> None:
            del token, room_id
            self.sent.append(text)
        async def sync(self, token: str, since: str | None) -> dict[str, Any]:
            del token, since
            self.sync_calls += 1
            if self.sync_calls == 1:
                return {
                    "next_batch": "n1",
                    "rooms": {
                        "join": {
                            "!room": {
                                "timeline": {
                                    "events": [
                                        {"type": "m.room.member", "sender": "user"},
                                        {"type": "m.room.message", "sender": "bot", "content": {"body": "ignore"}},
                                        {"type": "m.room.message", "sender": "user", "content": {"body": "atlas quick hi"}},
                                    ]
                                }
                            }
                        }
                    },
                }
            raise RuntimeError("stop")
    bot._client = BotClient()
    async def run_bot_once() -> None:
        task = asyncio.create_task(bot.run())
        await asyncio.sleep(0.01)
        task.cancel()
        with pytest.raises(asyncio.CancelledError):
            await task
    asyncio.run(run_bot_once())
    assert any("Thinking" in msg for msg in bot._client.sent)
    timeout_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001, quick_time_budget_sec=0.01), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
    timeout_bot._client = SimpleNamespace(
        sent=[],
        send_message=lambda token, room_id, text: asyncio.sleep(0, result=timeout_bot._client.sent.append(text)),
    )
    async def sleepy_handler(question: str, mode: str, history, conversation_id, observer):
        del question, mode, history, conversation_id, observer
        await asyncio.sleep(1.2)
        return AnswerResult("late", AnswerScores(1, 2, 3, "low"), {})
    timeout_bot._answer_handler = sleepy_handler
    asyncio.run(timeout_bot._answer_with_heartbeat("tok", "!room", "q", "quick"))
    assert any("time budget" in msg for msg in timeout_bot._client.sent)
    error_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
    error_bot._client = SimpleNamespace(
        sent=[],
        send_message=lambda token, room_id, text: asyncio.sleep(0, result=error_bot._client.sent.append(text)),
    )
    async def failing_handler(question: str, mode: str, history, conversation_id, observer):
        del question, mode, history, conversation_id, observer
        raise RuntimeError("boom")
    error_bot._answer_handler = failing_handler
    asyncio.run(error_bot._answer_with_heartbeat("tok", "!room", "q", "smart"))
    assert any("internal error" in msg for msg in error_bot._client.sent)
    class DirectQueue:
        async def __call__(self, payload: dict[str, Any]) -> dict[str, Any]:
            return {"reply": payload["question"]}
    direct_qm = QueueManager(replace(settings, queue_enabled=False), DirectQueue())
    assert asyncio.run(direct_qm.submit({"question": "direct"})) == {"reply": "direct"}
    class FakeSub:
        async def next_msg(self, timeout: float) -> Any:
            del timeout
            return SimpleNamespace(data=json.dumps({"reply": "queued"}).encode())
        async def unsubscribe(self) -> None:
            return None
    class FakeMsg:
        def __init__(self, raw: bytes, reply: str = "reply") -> None:
            self.data = raw
            self.reply = reply
            self.acked = False
        async def ack(self) -> None:
            self.acked = True
    published: list[tuple[str, bytes]] = []
    class ExistingStreamJS:
        async def stream_info(self, stream: str) -> None:
            assert stream == settings.nats_stream
        async def publish(self, subject: str, data: bytes) -> None:
            published.append((subject, data))
        async def pull_subscribe(self, subject: str, durable: str):
            del subject, durable
            class Pull:
                def __init__(self) -> None:
                    self.calls = 0
                async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
                    del count, timeout
                    self.calls += 1
                    if self.calls == 1:
                        raise RuntimeError("retry")
                    raise asyncio.CancelledError
            return Pull()
    class FakeNats:
        def __init__(self) -> None:
            self.drained = False
        async def connect(self, url: str) -> None:
            assert url == settings.nats_url
        def jetstream(self) -> ExistingStreamJS:
            return ExistingStreamJS()
        def new_inbox(self) -> str:
            return "inbox"
        async def subscribe(self, reply: str) -> FakeSub:
            assert reply == "inbox"
            return FakeSub()
        async def publish(self, reply: str, data: bytes) -> None:
            published.append((reply, data))
        async def drain(self) -> None:
            self.drained = True
    monkeypatch.setattr("atlasbot.queue.nats.NATS", FakeNats)
    queue = QueueManager(settings, DirectQueue())
    asyncio.run(queue.start())
    assert asyncio.run(queue.submit({"question": "queued", "mode": "smart"})) == {"reply": "queued"}
    invalid_msg = FakeMsg(b"not-json")
    asyncio.run(queue._handle_message(invalid_msg))
    assert invalid_msg.acked is True
    handled_msg = FakeMsg(json.dumps({"payload": {"question": "x"}, "reply": "reply"}).encode())
    asyncio.run(queue._handle_message(handled_msg))
    assert handled_msg.acked is True
    failing_queue = QueueManager(settings, lambda payload: (_ for _ in ()).throw(RuntimeError("boom")))
    failing_queue._nc = FakeNats()
    failing_queue._js = ExistingStreamJS()
    failure_msg = FakeMsg(json.dumps({"payload": {"question": "x"}}).encode())
    async def failing_handler(payload: dict[str, Any]) -> dict[str, Any]:
        del payload
        raise RuntimeError("boom")
    failing_queue._handler = failing_handler
    asyncio.run(failing_queue._handle_message(failure_msg))
    assert failure_msg.acked is True
    asyncio.run(queue.stop())
    assert result_scores({"scores": {"confidence": "9", "relevance": "8", "satisfaction": "7", "hallucination_risk": "low"}}).confidence == 9
    assert result_scores({"scores": "bad"}).confidence == 60
 def test_kb_llm_snapshot_and_json_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
    """Cover remaining KB, LLM, snapshot, and JSON parsing branches."""
    base = tmp_path / "kb"
    catalog = base / "catalog"
    catalog.mkdir(parents=True)
    (catalog / "atlas.json").write_text(json.dumps({"cluster": "atlas", "sources": ["bad"]}), encoding="utf-8")
    (catalog / "runbooks.json").write_text(json.dumps([{"title": "Fix", "path": "runbooks/fix.md"}, {"title": "No path"}]), encoding="utf-8")
    (base / "docs.md").write_text("x" * 120, encoding="utf-8")
    kb = KnowledgeBase(str(base))
    assert kb.runbook_titles(limit=1).count("runbooks/fix.md") == 1
    assert kb.chunk_lines(max_files=1, max_chars=60)
    assert kb._extend_with_limit([], ["abcdef"], 3) is False
    empty_kb = KnowledgeBase("")
    assert empty_kb.chunk_lines() == []
    settings = _settings(tmp_path, ollama_url="http://example/api/chat", ollama_api_key="secret", ollama_retries=0, ollama_fallback_model="")
    client = LLMClient(settings)
    assert client._endpoint() == "http://example/api/chat"
    assert client._headers["x-api-key"] == "secret"
    assert parse_json("```{\"ok\": true}```") == {"ok": True}
    assert parse_json("not-json", fallback={"fallback": True}) == {"fallback": True}
    class FakeResponse:
        def __init__(self, status_code: int, payload: Any) -> None:
            self.status_code = status_code
            self._payload = payload
        def raise_for_status(self) -> None:
            if self.status_code >= 400:
                raise httpx.HTTPStatusError("bad", request=httpx.Request("POST", "http://example"), response=httpx.Response(self.status_code))
        def json(self) -> Any:
            return self._payload
    responses = iter([FakeResponse(200, {"response": "plain"}), FakeResponse(200, {"reply": "fallback"}), FakeResponse(200, {"message": {}})])
    class FakeAsyncClient:
        def __init__(self, timeout: float | None = None) -> None:
            self.timeout = timeout
        async def __aenter__(self) -> "FakeAsyncClient":
            return self
        async def __aexit__(self, *exc: object) -> None:
            return None
        async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
            del _url, json, headers
            item = next(responses)
            if isinstance(item, Exception):
                raise item
            return item
    monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
    assert asyncio.run(client.chat([{"role": "user", "content": "a"}], timeout_sec=1.0)) == "plain"
    assert asyncio.run(client.chat([{"role": "user", "content": "b"}], timeout_sec=1.0)) == "fallback"
    with pytest.raises(LLMError, match="empty response"):
        asyncio.run(client.chat([{"role": "user", "content": "c"}], timeout_sec=1.0))
    error_settings = replace(settings, ollama_retries=1)
    error_client = LLMClient(error_settings)
    error_responses = iter([httpx.ConnectError("nope"), httpx.ConnectError("still nope")])
    class ErrorAsyncClient(FakeAsyncClient):
        async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
            del _url, json, headers
            raise next(error_responses)
    monkeypatch.setattr(httpx, "AsyncClient", ErrorAsyncClient)
    with pytest.raises(LLMError):
        asyncio.run(error_client.chat([{"role": "user", "content": "d"}], timeout_sec=1.0))
    provider = SnapshotProvider(replace(settings, ariadne_state_url="http://snapshot", ariadne_state_token="tok"))
    class SnapshotResp:
        def raise_for_status(self) -> None:
            return None
        def json(self) -> dict[str, Any]:
            return {"snapshot_id": "snap-1"}
    monkeypatch.setattr("atlasbot.snapshot.builder.httpx.get", lambda url, headers, timeout: SnapshotResp())
    assert provider.get() == {"snapshot_id": "snap-1"}
    provider._cache = {"snapshot_id": "cached"}
    provider._cache_ts = 10_000.0
    monkeypatch.setattr("atlasbot.snapshot.builder.time.monotonic", lambda: 10_001.0)
    assert provider.get() == {"snapshot_id": "cached"}
--- a/tests/test_split_helper_coverage.py
+++ b/tests/test_split_helper_coverage.py
--- a/tests/test_support_modules.py
+++ b/tests/test_support_modules.py
		`@ -0,0 +1,2 @@`
							`"""Shared testing helpers for atlasbot."""`