atlasbot/tests/test_support_modules.py

"""Coverage-oriented tests for support modules and render helpers."""

from __future__ import annotations

import asyncio
import importlib
import json
import logging as pylogging
from dataclasses import replace
from pathlib import Path
from types import SimpleNamespace
from typing import Any

import httpx
import pytest
from fastapi.testclient import TestClient
from nats.js.errors import NotFoundError

import atlasbot
import atlasbot.api
import atlasbot.engine
import atlasbot.engine.answerer
import atlasbot.knowledge
import atlasbot.llm
import atlasbot.matrix
import atlasbot.queue
import atlasbot.snapshot
from atlasbot.api.http import Api, AnswerRequest, _extract_question
from atlasbot.config import (
    MatrixBotConfig,
    Settings,
    _env_bool,
    _env_float,
    _env_int,
    _load_matrix_bots,
    load_settings,
)
import atlasbot.engine.answerer.common as answer_common
import atlasbot.engine.answerer.factsheet as answer_factsheet
import atlasbot.engine.answerer.post as answer_post
import atlasbot.engine.answerer.post_ext as answer_post_ext
import atlasbot.engine.answerer.retrieval as answer_retrieval
import atlasbot.engine.answerer.retrieval_ext as answer_retrieval_ext
import atlasbot.engine.answerer.spine as answer_spine
from atlasbot.engine.answerer import AnswerResult, AnswerScores
from atlasbot.engine.answerer._base import ClaimItem, ContradictionContext, EvidenceItem, InsightGuardInput, ScoreContext
from atlasbot.engine.intent_router import route_intent
from atlasbot.knowledge.loader import KnowledgeBase
from atlasbot.llm.client import LLMClient, build_messages, parse_json
from atlasbot.logging import JsonFormatter, configure_logging
from atlasbot.main import result_scores
from atlasbot.matrix.bot import MatrixBot, MatrixClient, _extract_mode, _mode_timeout_sec
from atlasbot.queue.nats import QueueManager
from atlasbot.snapshot.builder import core_a, core_b, format_a, format_b, format_c, summary_text
from atlasbot.state.store import ClaimStore, _safe_json
from testing.fakes import build_test_settings


def _rich_snapshot() -> dict[str, Any]:
    return {
        "collected_at": "2026-04-10T12:00:00Z",
        "snapshot_version": "v1",
        "summary": {
            "signals": [
                {"scope": "node", "target": "titan-01", "metric": "cpu", "current": 90, "delta_pct": 15, "severity": "warn"}
            ],
            "profiles": {
                "nodes": [
                    {"node": "titan-01", "load_index": 0.9, "cpu": 90, "ram": 80, "pods_total": 10, "hardware": "rpi5"}
                ],
                "namespaces": [
                    {"namespace": "synapse", "pods_total": 5, "cpu_usage": 40, "mem_usage": 50, "primary_node": "titan-01"}
                ],
                "workloads": [
                    {"namespace": "synapse", "workload": "matrix", "pods_total": 3, "pods_running": 3, "primary_node": "titan-01"}
                ],
            },
            "inventory": {"workers": {"total": 2, "ready": 1}},
            "topology": {
                "nodes": [{"name": "titan-01", "role": "worker"}],
                "workloads": [{"name": "matrix", "node": "titan-01"}],
                "namespaces": [{"name": "synapse", "pods": 5}],
            },
            "lexicon": {
                "terms": [{"term": "atlas", "meaning": "Atlas cluster"}],
                "aliases": {"atlasbot": "atlas"},
            },
            "cross_stats": {
                "node_metric_top": [
                    {
                        "metric": "cpu",
                        "node": "titan-01",
                        "value": 90,
                        "cpu": 90,
                        "ram": 80,
                        "net": 2.5,
                        "io": 1.5,
                        "pods_total": 10,
                    }
                ],
                "namespace_metric_top": [
                    {
                        "metric": "cpu",
                        "namespace": "synapse",
                        "value": 40,
                        "cpu": 40,
                        "ram": 50,
                        "net": 1.5,
                        "io": 1.0,
                        "pods_total": 5,
                    }
                ],
                "pvc_top": [{"metric": "usage", "namespace": "synapse", "pvc": "data", "value": 95}],
            },
            "baseline_deltas": {
                "nodes": {
                    "cpu": [{"node": "titan-01", "delta": 10, "severity": "warn"}],
                    "ram": [{"node": "titan-01", "delta": 5}],
                },
                "namespaces": {
                    "pods": [{"namespace": "synapse", "delta": 8, "severity": "high"}],
                },
            },
            "pod_issue_summary": {
                "waiting_reasons_top": [{"reason": "ImagePullBackOff", "count": 3}],
                "phase_reasons_top": [{"reason": "Pending", "count": 2}],
                "namespace_issue_top": {"waiting": [{"namespace": "synapse", "value": 2}]},
            },
            "trend_requests": {},
            "pod_waiting_trends": {},
            "pod_terminated_trends": {},
        },
        "nodes_summary": {
            "total": 2,
            "ready": 1,
            "not_ready": 1,
            "not_ready_names": ["titan-02"],
            "by_arch": {"rpi5": 1, "amd64": 1},
            "by_role": {"worker": 2},
            "workers": {"total": 2, "ready": 1},
            "pressure_nodes": {"names": ["titan-02"]},
        },
        "nodes_detail": [
            {
                "name": "titan-01",
                "hardware": "rpi5",
                "arch": "arm64",
                "os": "linux",
                "kubelet": "1.30",
                "kernel": "6.8",
                "container_runtime": "containerd",
                "is_worker": True,
                "roles": ["worker"],
                "age_hours": 12,
                "taints": [{"key": "dedicated", "effect": "NoSchedule"}],
            },
            {
                "name": "titan-02",
                "hardware": "amd64",
                "arch": "amd64",
                "os": "linux",
                "kubelet": "1.30",
                "kernel": "6.8",
                "container_runtime": "containerd",
                "is_worker": True,
                "roles": ["worker"],
                "age_hours": 24,
                "taints": [{"key": "pressure", "effect": "NoExecute"}],
            },
        ],
        "metrics": {
            "node_load": [
                {"node": "titan-01", "load_index": 0.9, "cpu": 90, "ram": 80, "net": 100, "io": 50},
                {"node": "titan-02", "load_index": 0.4, "cpu": 30, "ram": 20, "net": 10, "io": 5},
            ],
            "pods_running": 12,
            "pods_pending": 1,
            "pods_failed": 2,
            "pods_succeeded": 3,
            "capacity_cpu": 8,
            "allocatable_cpu": 7,
            "capacity_mem_bytes": 8 * 1024 * 1024 * 1024,
            "allocatable_mem_bytes": 6 * 1024 * 1024 * 1024,
            "capacity_pods": 110,
            "allocatable_pods": 100,
            "namespace_cpu_top": [{"metric": {"namespace": "synapse"}, "value": 95}],
            "namespace_mem_top": [{"metric": {"namespace": "synapse"}, "value": 1024 * 1024}],
            "namespace_cpu_requests_top": [{"metric": {"namespace": "synapse"}, "value": 50}],
            "namespace_mem_requests_top": [{"metric": {"namespace": "synapse"}, "value": 2 * 1024 * 1024}],
            "namespace_net_top": [{"metric": {"namespace": "synapse"}, "value": 1024}],
            "namespace_io_top": [{"metric": {"namespace": "synapse"}, "value": 2048}],
            "pod_cpu_top": [{"metric": {"namespace": "synapse", "pod": "matrix"}, "value": 3.3}],
            "pod_cpu_top_node": [{"metric": {"namespace": "synapse", "pod": "matrix", "node": "titan-01"}, "value": 3.3}],
            "pod_mem_top": [{"metric": {"namespace": "synapse", "pod": "matrix"}, "value": 4096}],
            "pod_mem_top_node": [{"metric": {"namespace": "synapse", "pod": "matrix", "node": "titan-01"}, "value": 4096}],
            "top_restarts_1h": [{"metric": {"namespace": "synapse", "pod": "matrix"}, "value": [0, 4]}],
            "restart_namespace_top": [{"metric": {"namespace": "synapse"}, "value": 4}],
            "job_failures_24h": [{"metric": {"namespace": "synapse", "job_name": "backup"}, "value": 2}],
            "node_pods_top": [{"node": "titan-01", "pods_total": 5, "namespaces": [{"name": "synapse", "count": 3}]}],
            "postgres_connections": {"used": 5, "max": 10, "hottest_db": {"label": "synapse", "value": 3}},
            "node_usage": {
                "cpu": [{"node": "titan-01", "value": 90}],
                "ram": [{"node": "titan-02", "value": 70}],
                "net": [{"node": "titan-02", "value": 2}],
                "io": [{"node": "titan-01", "value": 0.5}],
                "disk": [{"node": "titan-01", "value": 80}],
            },
            "node_load_summary": {
                "top": [{"node": "titan-01", "load_index": 0.9, "cpu": 90, "ram": 80, "io": 1.5, "net": 2.5, "pods_total": 10}],
                "outliers": [{"node": "titan-02"}],
            },
            "hardware_usage_avg": [
                {"hardware": "rpi5", "load_index": 0.9, "cpu": 90, "ram": 80, "io": 1.5, "net": 2.5},
            ],
            "namespace_capacity_summary": {
                "cpu_ratio_top": [
                    {"namespace": "synapse", "cpu_usage_ratio": 0.8, "cpu_usage": 40, "cpu_requests": 50}
                ],
                "mem_ratio_top": [
                    {"namespace": "synapse", "mem_usage_ratio": 0.7, "mem_usage": 70, "mem_requests": 100}
                ],
                "cpu_headroom_low": [{"namespace": "synapse", "headroom": 0.2}],
                "mem_headroom_low": [{"namespace": "synapse", "headroom": 0.3}],
                "cpu_overcommitted": 1,
                "mem_overcommitted": 0,
                "cpu_overcommitted_names": ["synapse"],
                "mem_overcommitted_names": [],
            },
            "namespace_capacity": [{"namespace": "synapse", "cpu": 1, "mem": 2}],
            "units": {"cpu_pct": "%", "ram_pct": "%", "net": "bytes/s"},
            "windows": {"rates": "5m", "restarts": "1h"},
        },
        "namespace_pods": [{"namespace": "synapse", "pods_total": 5, "pods_running": 4}],
        "namespace_nodes": [{"namespace": "synapse", "pods_total": 5, "primary_node": "titan-01"}],
        "node_pods": [{"node": "titan-01", "pods_total": 5, "namespaces": [{"name": "synapse", "count": 3}]}],
        "pod_issues": {
            "counts": {"Failed": 2, "Pending": 1, "Unknown": 0},
            "top": [{"namespace": "synapse", "pod": "matrix", "phase": "Pending", "age_hours": 2}],
            "pending_oldest": [{"namespace": "synapse", "pod": "matrix", "age_hours": 2}],
            "waiting_reasons_top": [{"reason": "ImagePullBackOff", "count": 3}],
            "pending_over_15m": 1,
            "waiting_reasons": {"ImagePullBackOff": 3},
        },
        "workloads_health": {
            "deployments": {"ready": 2, "not_ready": 1, "desired": 3},
            "statefulsets": {"ready": 1, "not_ready": 0, "desired": 1},
            "daemonsets": {"ready": 1, "not_ready": 0, "desired": 1},
        },
        "events": {
            "warnings_top_reason": {"ImagePullBackOff": 3},
            "warnings_latest": [{"reason": "FailedScheduling", "count": 2}],
            "warnings_total": 5,
        },
        "jobs": {
            "totals": {"total": 4, "active": 1, "failed": 1, "succeeded": 2},
            "failing": [{"namespace": "synapse", "job_name": "backup", "failed": 1}],
            "active_oldest": [{"namespace": "synapse", "job_name": "backup", "age_minutes": 30}],
        },
        "postgres": {
            "used": 5,
            "max": 10,
            "hottest_db": {"label": "synapse", "value": 3},
            "by_db": [{"label": "synapse", "value": 3}],
        },
        "hottest": {
            "cpu": {"node": "titan-01", "value": 90},
            "ram": {"node": "titan-02", "value": 70},
            "net": {"node": "titan-02", "value": 2},
            "io": {"node": "titan-01", "value": 0.5},
            "disk": {"node": "titan-01", "value": 80},
        },
        "pvc_usage_top": [{"namespace": "synapse", "pvc": "data", "value": 95}],
        "root_disk_low_headroom": [{"node": "titan-01", "headroom_pct": 20, "used_pct": 80}],
        "longhorn": {
            "total": 2,
            "attached_count": 1,
            "detached_count": 1,
            "degraded_count": 0,
            "by_state": {"attached": 1, "detached": 1},
            "by_robustness": {"healthy": 1, "degraded": 1},
            "unhealthy": [{"name": "vol1", "state": "detached", "robustness": "degraded"}],
        },
        "workloads": [{"namespace": "synapse", "name": "matrix", "pods_total": 3, "pods_running": 3}],
        "flux": {
            "ready": 1,
            "not_ready": 1,
            "items": [{"kind": "HelmRelease", "name": "matrix", "status": "Ready"}],
        },
    }


def test_package_imports() -> None:
    """Import package shims so their `__init__` modules stay covered."""

    importlib.import_module("atlasbot")
    importlib.import_module("atlasbot.api")
    importlib.import_module("atlasbot.engine")
    importlib.import_module("atlasbot.engine.answerer")
    importlib.import_module("atlasbot.knowledge")
    importlib.import_module("atlasbot.llm")
    importlib.import_module("atlasbot.matrix")
    importlib.import_module("atlasbot.queue")
    importlib.import_module("atlasbot.snapshot")
    assert atlasbot.snapshot.__name__ == "atlasbot.snapshot"


def test_config_helpers_and_load_settings(monkeypatch: pytest.MonkeyPatch) -> None:
    """Exercise config parsing branches and matrix bot loading."""

    monkeypatch.setenv("BOOL_ONE", "yes")
    monkeypatch.setenv("INT_BAD", "nope")
    monkeypatch.setenv("FLOAT_BAD", "nope")
    assert _env_bool("BOOL_ONE")
    assert _env_int("INT_BAD", "7") == 7
    assert _env_float("FLOAT_BAD", "2.5") == 2.5
    monkeypatch.setenv("BOT_USER_QUICK", "quick")
    monkeypatch.setenv("BOT_PASS_QUICK", "pw")
    monkeypatch.setenv("BOT_USER_SMART", "smart")
    monkeypatch.setenv("BOT_PASS_SMART", "pw")
    settings = load_settings()
    assert settings.matrix_bots[0].mode == "quick"
    assert settings.matrix_bots[1].mode == "smart"
    monkeypatch.delenv("BOT_USER_QUICK", raising=False)
    monkeypatch.delenv("BOT_PASS_QUICK", raising=False)
    monkeypatch.delenv("BOT_USER_SMART", raising=False)
    monkeypatch.delenv("BOT_PASS_SMART", raising=False)
    monkeypatch.setenv("BOT_USER", "atlasbot")
    monkeypatch.setenv("BOT_PASS", "legacy")
    legacy = _load_matrix_bots(("atlasbot",))
    assert legacy and legacy[0].mode == ""


def test_knowledge_base_helpers(tmp_path: Path, caplog: pytest.LogCaptureFixture) -> None:
    """Read KB data, titles, paths, and prompt chunks from a temp catalog."""

    base = tmp_path / "kb"
    catalog = base / "catalog"
    catalog.mkdir(parents=True)
    (catalog / "atlas.json").write_text(
        json.dumps({"cluster": "titan", "sources": [{"name": "docs"}], "extra": True}),
        encoding="utf-8",
    )
    (catalog / "runbooks.json").write_text(json.dumps([{"title": "Fix", "path": "runbooks/fix.md"}]), encoding="utf-8")
    (base / "notes.md").write_text("hello atlas", encoding="utf-8")
    kb = KnowledgeBase(str(base))
    assert "Cluster: titan." in kb.summary()
    assert "Relevant runbooks" in kb.runbook_titles(limit=1)
    assert kb.runbook_paths() == ["runbooks/fix.md"]
    assert kb.chunk_lines(max_files=1, max_chars=200)
    bad = base / "bad"
    bad.mkdir()
    (bad / "catalog").mkdir()
    (bad / "catalog" / "atlas.json").write_text("{broken", encoding="utf-8")
    broken = KnowledgeBase(str(bad))
    with caplog.at_level(pylogging.WARNING):
        assert broken.summary() == ""


def test_llm_client_helpers_and_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
    """Exercise message building, JSON parsing, and fallback model logic."""

    settings = replace(
        build_test_settings(),
        ollama_url="http://example",
        ollama_model="base",
        ollama_fallback_model="fallback",
        ollama_retries=1,
    )
    client = LLMClient(settings)
    assert client._endpoint().endswith("/api/chat")
    assert build_messages("sys", "prompt", context="ctx")[1]["content"].startswith("Context")
    assert parse_json("{\"ok\": true}", fallback={}) == {"ok": True}

    class FakeResponse:
        def __init__(self, status_code: int, payload: dict[str, Any]):
            self.status_code = status_code
            self._payload = payload

        def raise_for_status(self) -> None:
            if self.status_code >= 400:
                raise httpx.HTTPStatusError("bad", request=httpx.Request("POST", "http://example"), response=httpx.Response(self.status_code))

        def json(self) -> dict[str, Any]:
            return self._payload

    class FakeAsyncClient:
        def __init__(self, timeout: float | None = None):
            self.timeout = timeout

        async def __aenter__(self) -> FakeAsyncClient:
            return self

        async def __aexit__(self, *exc: object) -> None:
            return None

        async def post(
            self,
            _url: str,
            *,
            json: dict[str, Any],
            headers: dict[str, str],
        ) -> FakeResponse:
            model = json["model"]
            assert headers["Content-Type"] == "application/json"
            if model == "base":
                return FakeResponse(404, {})
            return FakeResponse(200, {"message": {"content": "hello"}})

    monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
    reply = asyncio.run(client.chat([{"role": "user", "content": "hi"}], model=None, timeout_sec=1.0))
    assert reply == "hello"


def test_logging_formatter_and_configure() -> None:
    """Format a structured record and install JSON logging on the root logger."""

    formatter = JsonFormatter()
    record = pylogging.LogRecord("atlasbot", pylogging.INFO, __file__, 1, "hello %s", ("world",), None)
    record.extra = {"mode": "quick"}
    payload = json.loads(formatter.format(record))
    assert payload["message"] == "hello world"
    assert payload["mode"] == "quick"
    configure_logging("debug")
    root = pylogging.getLogger()
    assert root.handlers and isinstance(root.handlers[0].formatter, JsonFormatter)


def test_state_store_roundtrip_and_cleanup(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
    """Persist, read, and expire a claim payload."""

    path = tmp_path / "state.db"
    store = ClaimStore(str(path), 60)
    store.set(
        "conv",
        {
            "snapshot_id": "snap-1",
            "claims": [{"id": "c1"}],
            "snapshot": {"nodes": 1},
        },
    )
    payload = store.get("conv")
    assert payload and payload["snapshot_id"] == "snap-1"
    assert payload["claims"] == [{"id": "c1"}]
    assert _safe_json("{broken", []) == []
    monkeypatch.setattr("atlasbot.state.store.time.monotonic", lambda: 1_000_000.0)
    store.cleanup()
    assert store.get("conv") is None


@pytest.mark.parametrize(
    ("question", "kind"),
    [
        ("How many nodes are ready?", "nodes_ready"),
        ("How many cluster nodes do we have?", "nodes_count"),
        ("Which nodes are not rpi?", "nodes_non_rpi"),
        ("What hardware mix do we have?", "hardware_mix"),
        ("What is the hottest cpu?", "hottest_cpu"),
        ("What is the hottest ram?", "hottest_ram"),
        ("How many postgres connections?", "postgres_connections"),
        ("Which postgres db is hottest?", "postgres_hottest"),
        ("Which namespace has most pods?", "namespace_most_pods"),
        ("Is there pressure on the nodes?", "pressure_summary"),
    ],
)
def test_intent_router_patterns(question: str, kind: str) -> None:
    """Route the main cluster intents into deterministic matches."""

    match = route_intent(question)
    assert match and match.kind == kind


def test_api_routes_and_auth() -> None:
    """Exercise the HTTP wrapper, token check, and question extraction."""

    settings = replace(build_test_settings(), internal_token="secret")

    async def handler(
        question: str,
        mode: str,
        _history: list[dict[str, str]] | None,
        _conversation_id: str | None,
        _snapshot_pin: bool | None,
    ) -> AnswerResult:
        return AnswerResult(
            reply=f"{question}:{mode}",
            scores=AnswerScores(confidence=1, relevance=2, satisfaction=3, hallucination_risk="low"),
            meta={"mode": mode},
        )

    api = Api(settings, handler)
    client = TestClient(api.app)
    assert client.get("/healthz").json() == {"ok": True}
    assert client.post("/v1/answer", json={"question": "hi"}).status_code == 401
    assert _extract_question(AnswerRequest(prompt=" hello ")).strip() == "hello"
    response = client.post(
        "/v1/answer",
        headers={"X-Internal-Token": "secret"},
        json={"prompt": "hello", "mode": "SMART", "conversation_id": "conv-1", "snapshot_pin": True},
    )
    assert response.status_code == 200
    assert response.json()["reply"] == "hello:smart"


def test_main_and_queue_and_matrix(monkeypatch: pytest.MonkeyPatch) -> None:
    """Run the bootstrap path and queueing branch without external services."""

    from atlasbot import main as main_mod

    settings = replace(
        build_test_settings(),
        queue_enabled=True,
        matrix_bots=(MatrixBotConfig("bot", "pw", ("bot",), "quick"),),
    )

    class FakeQueue:
        def __init__(self, settings: Settings, handler):
            self.settings = settings
            self.handler = handler
            self.started = False

        async def start(self) -> None:
            self.started = True

        async def submit(self, _payload: dict[str, Any]) -> dict[str, Any]:
            return {
                "reply": "queued",
                "scores": {"confidence": 7, "relevance": 8, "satisfaction": 9, "hallucination_risk": "low"},
            }

    class FakeMatrixBot:
        def __init__(self, _settings: Settings, _bot: MatrixBotConfig, _engine: Any, answer_handler):
            self.answer_handler = answer_handler

        async def run(self) -> None:
            result = await self.answer_handler("what is atlas?", "quick", [], "room-1", None)
            assert result.reply == "queued"

    class FakeServer:
        def __init__(self, config: Any):
            self.config = config

        async def serve(self) -> None:
            return None

    monkeypatch.setattr(main_mod, "load_settings", lambda: settings)
    monkeypatch.setattr(main_mod, "configure_logging", lambda _level: None)
    monkeypatch.setattr(main_mod, "QueueManager", FakeQueue)
    monkeypatch.setattr(main_mod, "MatrixBot", FakeMatrixBot)
    monkeypatch.setattr(main_mod.uvicorn, "Server", FakeServer)
    asyncio.run(main_mod.main())
    scores = result_scores({"scores": {"confidence": 10, "relevance": 20, "satisfaction": 30, "hallucination_risk": "low"}})
    assert scores.confidence == 10


def test_matrix_and_queue_and_snapshot_helpers(monkeypatch: pytest.MonkeyPatch) -> None:
    """Drive the Matrix client, queue manager, and snapshot renderers."""

    settings = replace(build_test_settings(), matrix_bots=())
    bot_cfg = MatrixBotConfig("bot", "pw", ("bot",), "quick")

    class FakeResp:
        def __init__(self, payload: dict[str, Any], status_code: int = 200):
            self._payload = payload
            self.status_code = status_code

        def raise_for_status(self) -> None:
            if self.status_code >= 400:
                raise httpx.HTTPError("bad")

        def json(self) -> dict[str, Any]:
            return self._payload

    class FakeAsyncClient:
        def __init__(self, timeout: float | None = None):
            self.timeout = timeout

        async def __aenter__(self) -> "FakeAsyncClient":
            return self

        async def __aexit__(self, *exc: object) -> None:
            return None

        async def post(self, url: str, json: dict[str, Any] | None = None, headers: dict[str, str] | None = None) -> FakeResp:
            if "login" in url:
                return FakeResp({"access_token": "tok"})
            return FakeResp({})

        async def get(self, url: str, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None) -> FakeResp:
            if "directory/room" in url:
                return FakeResp({"room_id": "!room"})
            return FakeResp({"next_batch": "n1", "rooms": {"join": {}}})

    monkeypatch.setattr("atlasbot.matrix.bot.httpx.AsyncClient", FakeAsyncClient)
    client = MatrixClient(settings, bot_cfg)
    token = asyncio.run(client.login())
    assert token == "tok"
    assert asyncio.run(client.resolve_room(token)) == "!room"
    asyncio.run(client.join_room(token, "!room"))
    asyncio.run(client.send_message(token, "!room", "hello"))
    assert asyncio.run(client.sync(token, None))["next_batch"] == "n1"
    mode, cleaned = _extract_mode("atlas-smart hello", ("atlas",), "")
    assert mode == "smart"
    assert cleaned == "-smart hello"
    assert _mode_timeout_sec(settings, "smart") == settings.smart_time_budget_sec

    class FakeSub:
        async def next_msg(self, timeout: float) -> Any:
            return SimpleNamespace(data=json.dumps({"reply": "ok"}).encode(), reply="reply")

        async def unsubscribe(self) -> None:
            return None

    class FakeMsg:
        def __init__(self) -> None:
            self.data = json.dumps({"payload": {"question": "q"}}).encode()
            self.reply = "reply"
            self.acked = False

        async def ack(self) -> None:
            self.acked = True

    class FakeJS:
        def __init__(self) -> None:
            self.streams = []

        async def stream_info(self, stream: str) -> None:
            raise NotFoundError

        async def add_stream(self, **kwargs: Any) -> None:
            self.streams.append(kwargs)

        async def publish(self, subject: str, data: bytes) -> None:
            self.streams.append({"subject": subject, "data": data})

        async def pull_subscribe(self, subject: str, durable: str) -> Any:
            class Pull:
                async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
                    raise RuntimeError("stop")

            return Pull()

    class FakeNATS:
        def __init__(self) -> None:
            self.published = []

        async def connect(self, url: str) -> None:
            return None

        def jetstream(self) -> FakeJS:
            return FakeJS()

        def new_inbox(self) -> str:
            return "inbox"

        async def subscribe(self, reply: str) -> FakeSub:
            return FakeSub()

        async def publish(self, reply: str, data: bytes) -> None:
            self.published.append((reply, data))

        async def drain(self) -> None:
            return None

    monkeypatch.setattr("atlasbot.queue.nats.NATS", FakeNATS)
    queue_settings = replace(settings, queue_enabled=True, nats_stream="atlasbot", nats_subject="atlasbot.requests")
    qm = QueueManager(queue_settings, lambda payload: asyncio.sleep(0, result={"reply": "x"}))
    asyncio.run(QueueManager(replace(queue_settings, queue_enabled=False), lambda payload: asyncio.sleep(0, result=payload)).start())
    asyncio.run(qm.start())
    assert asyncio.run(qm.submit({"mode": "quick"})) == {"reply": "ok"}
    assert asyncio.run(qm.submit({"mode": "genius"})) == {"reply": "ok"}

    class LoopPull:
        def __init__(self) -> None:
            self.calls = 0

        async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
            del count, timeout
            self.calls += 1
            if self.calls == 1:
                raise RuntimeError("retry")
            if self.calls == 2:
                return [FakeMsg()]
            raise asyncio.CancelledError

    class LoopJS:
        async def pull_subscribe(self, subject: str, durable: str) -> LoopPull:
            del subject, durable
            return LoopPull()

    qm._js = LoopJS()
    with pytest.raises(asyncio.CancelledError):
        asyncio.run(qm._worker_loop())
    asyncio.run(qm.stop())

    snapshot = _rich_snapshot()
    summary = core_a.build_summary(snapshot)
    assert summary["nodes"]["total"] == 2
    text = summary_text(snapshot)
    assert "atlas_cluster:" in text
    assert "hardware_usage_avg:" in text
    assert "signals:" in text
    assert "node_profiles:" in text
    assert "flux:" in text or "flux" in text

    lines: list[str] = []
    format_a._append_nodes(lines, summary)
    format_a._append_hardware(lines, summary)
    format_a._append_hardware_groups(lines, summary)
    format_a._append_node_ages(lines, summary)
    format_a._append_node_taints(lines, summary)
    format_a._append_node_facts(lines, summary)
    format_a._append_pressure(lines, summary)
    format_a._append_pods(lines, summary)
    format_a._append_capacity(lines, summary)
    format_a._append_namespace_pods(lines, summary)
    format_a._append_namespace_nodes(lines, summary)
    format_a._append_node_pods(lines, summary)
    format_a._append_pod_issues(lines, summary)
    format_a._append_workload_health(lines, summary)
    format_a._append_node_usage_stats(lines, summary)
    format_a._append_events(lines, summary)
    format_a._append_pvc_usage(lines, summary)
    format_a._append_root_disk_headroom(lines, summary)
    format_b._append_longhorn(lines, summary)
    format_b._append_namespace_usage(lines, summary)
    format_b._append_namespace_requests(lines, summary)
    format_b._append_namespace_io_net(lines, summary)
    format_b._append_pod_usage(lines, summary)
    format_b._append_restarts(lines, summary)
    format_b._append_job_failures(lines, summary)
    format_b._append_jobs(lines, summary)
    format_b._append_postgres(lines, summary)
    format_b._append_hottest(lines, summary)
    format_b._append_workloads(lines, summary)
    format_b._append_topology(lines, summary)
    format_b._append_flux(lines, summary)
    format_c._append_signals(lines, summary)
    format_c._append_profiles(lines, summary)
    format_c._append_units_windows(lines, summary)
    format_c._append_node_load_summary(lines, summary)
    format_c._append_hardware_usage(lines, summary)
    format_c._append_cluster_watchlist(lines, summary)
    format_c._append_baseline_deltas(lines, summary)
    format_c._append_pod_issue_summary(lines, summary)
    format_c._append_workloads_by_namespace(lines, summary)
    format_c._append_lexicon(lines, summary)
    format_c._append_cross_stats(lines, summary)
    assert any(line.startswith("nodes:") for line in lines)
    assert any(line.startswith("longhorn:") for line in lines)
    assert any(line.startswith("signals:") for line in lines)

    core_b_summary = core_b._build_hottest(snapshot["metrics"])
    assert core_b_summary["hottest"]["cpu"]["node"] == "titan-01"


def test_matrix_bot_sync_and_heartbeat() -> None:
    """Drive the Matrix bot heartbeat and sync handlers with a fake client."""

    settings = replace(build_test_settings(), thinking_interval_sec=0.001)
    bot_cfg = MatrixBotConfig("bot", "pw", ("atlas",), "quick")

    class FakeClient:
        def __init__(self) -> None:
            self.sent: list[str] = []

        async def login(self) -> str:
            return "tok"

        async def resolve_room(self, token: str) -> str:
            return "!room"

        async def join_room(self, token: str, room_id: str) -> None:
            return None

        async def send_message(self, token: str, room_id: str, text: str) -> None:
            self.sent.append(text)

        async def sync(self, token: str, since: str | None) -> dict[str, Any]:
            return {
                "next_batch": "n1",
                "rooms": {
                    "join": {
                        "!room": {
                            "timeline": {
                                "events": [
                                    {"type": "m.room.message", "sender": "user", "content": {"body": "atlas quick what is atlas?"}},
                                    {"type": "m.room.message", "sender": "bot", "content": {"body": "ignored"}},
                                ]
                            }
                        }
                    }
                },
            }

    async def answer_handler(question: str, mode: str, history, conversation_id, observer):
        if observer:
            observer("stage", "working")
        return AnswerResult(
            reply="Atlas has 22 nodes",
            scores=AnswerScores(confidence=1, relevance=2, satisfaction=3, hallucination_risk="low"),
            meta={"mode": mode},
        )

    bot = MatrixBot(settings, bot_cfg, SimpleNamespace(answer=lambda *args, **kwargs: None), answer_handler)
    bot._client = FakeClient()
    asyncio.run(bot._answer_with_heartbeat("tok", "!room", "What is Atlas?", "quick"))
    payload = {
        "rooms": {
            "join": {
                "!room": {
                    "timeline": {
                        "events": [
                            {"type": "m.room.message", "sender": "user", "content": {"body": "atlas smart hello"}}
                        ]
                    }
                }
            }
        }
    }
    asyncio.run(bot._handle_sync("tok", payload))
    assert bot._client.sent


def test_answerer_helper_coverage_smoke() -> None:
    """Exercise the split answerer helpers with representative inputs."""

    settings = build_test_settings()
    plan = answer_common._mode_plan(settings, "smart")
    fast_plan = replace(plan, parallelism=2, score_retries=2, chunk_group=1, chunk_top=2, max_subquestions=2)
    snapshot = _rich_snapshot()
    summary = core_a.build_summary(snapshot)
    summary_lines = answer_spine._summary_lines(snapshot)
    rich_lines = [
        "nodes_total: 2",
        "nodes_ready: 1",
        "cluster_name: atlas",
        "pods_total: 3",
        "cpu: 90",
        "ram: 80",
        "runbooks/fix.md",
    ]

    class ScriptedLLM:
        async def __call__(
            self,
            _system: str,
            _prompt: str,
            *,
            context: str | None = None,
            model: str | None = None,
            tag: str = "",
        ) -> str:
            responses = {
                "chunk_score": '[{"id":"c1","score":1},{"id":"c2","score":2}]',
                "chunk_select": '{"selected_index": 1}',
                "metric_keys": '{"keys":["nodes_total","pods_total"]}',
                "metric_keys_validate": '{"missing":["pods_total"]}',
                "fact_types": '{"fact_types":["nodes_total","pods_total"]}',
                "fact_types_select": '{"best": 1}',
                "signals": '{"signals":["cpu","ram"]}',
                "signals_select": '{"best": 1}',
                "chunk_scan": '{"lines":["cpu: 90"]}',
                "chunk_scan_select": '{"best": 1}',
                "fact_prune": '{"lines":["cpu: 90"]}',
                "fact_prune_select": '{"best": 1}',
                "fact_select": '{"lines":["cpu: 90"]}',
                "fact_select_best": '{"best": 1}',
                "contradiction": '{"use_facts": false, "confidence": 99}',
                "insight_guard": '{"ok": false}',
                "insight_fix": "fixed insight",
            }
            return responses.get(tag, "{}")

    scripted_llm = ScriptedLLM()
    chunks = [
        {"id": "c1", "text": "nodes_total: 2\npods_total: 3", "summary": "nodes"},
        {"id": "c2", "text": "cpu: 90\nram: 80", "summary": "cpu"},
    ]
    groups = answer_common._build_chunk_groups(chunks, 1)
    scores = asyncio.run(answer_common._score_chunks(scripted_llm, chunks, "How many nodes?", ["nodes"], fast_plan))
    serial_ctx = ScoreContext(question="How many nodes?", sub_questions=["nodes"], retries=2, parallelism=1, select_best=True, fast_model="fast")
    serial_scores = asyncio.run(answer_common._score_groups_serial(scripted_llm, groups, serial_ctx))
    parallel_ctx = ScoreContext(question="How many nodes?", sub_questions=["nodes"], retries=2, parallelism=2, select_best=True, fast_model="fast")
    parallel_scores = asyncio.run(answer_common._score_groups_parallel(scripted_llm, groups, parallel_ctx))
    best_run = asyncio.run(answer_common._select_best_score_run(scripted_llm, groups[0], [{"c1": 1.0}, {"c1": 2.0}], serial_ctx))
    selected = answer_common._select_chunks(chunks, {"c1": 0.2, "c2": 0.9}, replace(fast_plan, chunk_top=2), ["cpu"], ["c2"])
    assert scores and serial_scores and parallel_scores and best_run and selected
    assert answer_common._strip_followup_meta("The draft is correct. Atlas is healthy.") == "Atlas is healthy."
    assert answer_common._llm_call_limit(settings, "smart") == settings.smart_llm_calls_max
    assert answer_common._mode_time_budget(settings, "quick") == settings.quick_time_budget_sec
    assert answer_common._select_subquestions([], "fallback", 2) == ["fallback"]
    assert answer_common._chunk_lines(["a", "b", "c"], 2)
    assert answer_common._raw_snapshot_chunks(snapshot)
    assert answer_common._format_runbooks(["runbooks/fix.md"])
    assert answer_common._keyword_hits([{"text": "cpu usage"}], {"text": "cpu usage"}, ["cpu"])
    assert answer_factsheet._factsheet_kb_chars("quick", 10)
    assert answer_factsheet._factsheet_line_limit("smart") >= 1
    assert answer_factsheet._factsheet_instruction("quick")
    assert answer_factsheet._factsheet_model("genius", fast_plan) == fast_plan.model
    assert answer_factsheet._is_plain_math_question("2+2")
    assert answer_factsheet._quick_fact_sheet_lines("How many nodes?", rich_lines, ["kb"], limit=4)
    assert answer_factsheet._quick_fact_sheet_text(["nodes_total: 2"])
    assert answer_factsheet._quick_fact_sheet_heuristic_answer("How many ready nodes?", ["nodes_total:2,ready:1,not_ready:0"])
    assert answer_factsheet._json_excerpt(summary)
    assert answer_post._strip_unknown_entities("node titan-99 is hot. Atlas is healthy.", ["titan-99"], []) == "Atlas is healthy."
    assert answer_post._needs_evidence_guard("node titan-99 is hot.", ["node titan-01"]) is True
    contradiction = asyncio.run(
        answer_post._contradiction_decision(
            ContradictionContext(scripted_llm, "why", "draft", ["fact"], fast_plan),
            attempts=2,
        )
    )
    assert contradiction["confidence"] == 99
    assert answer_post._format_direct_metric_line("nodes_total: 2")
    assert answer_post._global_facts(["nodes_total: 2", "other: 1"])
    assert answer_post._has_keyword_overlap(["cpu usage"], ["cpu"])
    assert answer_post._merge_tokens(["a"], ["b"], ["c"]) == ["a", "b", "c"]
    assert answer_post._extract_question_tokens("How many nodes?")
    assert answer_post._expand_tokens(["nodes_total"])
    assert answer_post._ensure_token_coverage(["nodes_total: 2"], ["pods"], ["pods_total: 3"], max_add=1)
    assert answer_post._best_keyword_line(["cpu: 90"], ["cpu"]) == "cpu: 90"
    assert answer_post._line_starting_with(["cpu: 90"], "cpu")
    assert answer_post._non_rpi_nodes({"hardware_by_node": {"titan-01": "rpi5", "titan-02": "amd64"}}) == {"amd64": ["titan-02"]}
    assert answer_post._format_hardware_groups({"amd64": ["titan-02"]}, "Nodes")
    assert answer_post._lexicon_context({"lexicon": {"terms": [{"term": "atlas", "meaning": "cluster"}], "aliases": {"bot": "atlas"}}})
    assert answer_post._parse_json_block("{\"ok\": true}", fallback={}) == {"ok": True}
    assert answer_post._parse_json_list("[{\"ok\": true}]") == [{"ok": True}]
    assert answer_post._scores_from_json({"confidence": "1", "relevance": 2, "satisfaction": 3, "hallucination_risk": "low"}).confidence == 1
    assert answer_post._coerce_int("4", 1) == 4
    assert answer_post._default_scores().hallucination_risk == "medium"
    assert answer_post._style_hint({"answer_style": "insightful"}) == "insightful"
    assert answer_post._needs_evidence_fix("we don't know", {"needs_snapshot": True}) is True
    assert answer_post._should_use_insight_guard({"answer_style": "insightful"})
    insight_inputs = InsightGuardInput(
        question="why",
        reply="Insightful reply",
        classify={"answer_style": "insightful", "question_type": "open_ended"},
        context="",
        plan=fast_plan,
        call_llm=scripted_llm,
        facts=["fact"],
    )
    assert asyncio.run(answer_post._apply_insight_guard(insight_inputs))
    assert answer_post_ext._reply_matches_metric_facts("nodes_total: 2", ["nodes_total: 2"])
    assert answer_post_ext._needs_dedup("one. one. one.")
    answer_post_ext._needs_focus_fix("how many nodes", "For more details. Additional context.", {"question_type": "metric"})
    assert answer_post_ext._extract_keywords("How many nodes?", "How many nodes?", ["pods"], ["nodes"])
    assert answer_post_ext._allowed_nodes(summary)
    assert answer_post_ext._allowed_namespaces(summary)
    assert answer_post_ext._find_unknown_nodes("node titan-99", ["titan-01"]) == ["titan-99"]
    assert answer_post_ext._find_unknown_namespaces("namespace rogue", ["synapse"]) == ["rogue"]
    assert answer_post_ext._needs_runbook_fix("see runbooks/bad.md", ["runbooks/fix.md"])
    assert answer_post_ext._needs_runbook_reference("where is the runbook", ["runbooks/fix.md"], "")
    assert answer_post_ext._best_runbook_match("runbooks/fx.md", ["runbooks/fix.md"])
    assert answer_post_ext._resolve_path({"a": [{"b": 3}]}, "a[0].b") == 3
    assert answer_post_ext._snapshot_id({"snapshot_id": "snap-1"}) == "snap-1"
    assert answer_post_ext._claims_to_payload([ClaimItem(id="c1", claim="atlas", evidence=[EvidenceItem(path="a.b", reason="r", value_at_claim=1)])])
    assert answer_post_ext._state_from_payload({"updated_at": 1.0, "claims": [{"id": "c1", "claim": "atlas", "evidence": [{"path": "a.b", "reason": "r"}]}]})
    assert answer_retrieval._metric_ctx_values({"summary_lines": summary_lines, "question": "cpu", "sub_questions": ["pods"], "keywords": ["cpu"], "keyword_tokens": ["cpu"]})
    assert answer_retrieval._extract_metric_keys(rich_lines)
    assert answer_retrieval._token_variants({"nodes"})
    assert answer_retrieval._parse_key_list("{\"keys\":[\"nodes_total\"]}", ["nodes_total"], 1) == ["nodes_total"]
    assert answer_retrieval._chunk_ids_for_keys([{"id": "c1", "text": "nodes_total: 2"}], ["nodes_total"]) == ["c1"]
    assert answer_retrieval._filter_metric_keys(["nodes_total"], {"nodes"})
    assert answer_retrieval._metric_key_overlap(["nodes_total"], {"nodes"})
    assert answer_retrieval._lines_for_metric_keys(rich_lines, ["nodes_total"])
    assert answer_retrieval._merge_metric_keys(["nodes_total"], ["pods_total"], 3)
    assert answer_retrieval._merge_fact_lines(["a"], ["b"])
    assert answer_retrieval._expand_hottest_line("hottest: cpu=titan-01 (90)")
    answer_retrieval._has_token("hottest_cpu: titan-01=90", "cpu")
    answer_retrieval._hotspot_evidence(snapshot)
    assert asyncio.run(answer_retrieval._select_metric_chunks(scripted_llm, {"summary_lines": summary_lines, "question": "cpu", "sub_questions": ["pods"], "keywords": ["cpu"], "keyword_tokens": ["cpu"]}, chunks, fast_plan))
    asyncio.run(answer_retrieval._validate_metric_keys(scripted_llm, {"question": "cpu", "sub_questions": ["pods"], "selected": ["nodes_total"]}, ["nodes_total"], fast_plan))
    assert asyncio.run(answer_retrieval._gather_limited([asyncio.sleep(0, result=1), asyncio.sleep(0, result=2)], 1))
    assert answer_retrieval_ext._metric_key_tokens(summary_lines)
    asyncio.run(answer_retrieval_ext._select_best_candidate(scripted_llm, "question", ["a", "b"], fast_plan, "chunk_select"))
    assert answer_retrieval_ext._dedupe_lines(["x", "x", "y"])
    assert answer_retrieval_ext._collect_fact_candidates(chunks, 4)
    assert asyncio.run(answer_retrieval_ext._select_best_list(scripted_llm, "question", [["a"], ["b"]], fast_plan, "chunk_select"))
    assert asyncio.run(answer_retrieval_ext._extract_fact_types(scripted_llm, "question", ["cpu"], fast_plan))
    assert asyncio.run(answer_retrieval_ext._derive_signals(scripted_llm, "question", ["cpu"], fast_plan))
    assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(scripted_llm, "question", ["cpu"], ["cpu: 90"], fast_plan))
    assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(scripted_llm, "question", ["cpu: 90"], fast_plan, 1))
    assert asyncio.run(answer_retrieval_ext._select_fact_lines(scripted_llm, "question", ["cpu: 90"], fast_plan, 1))
    assert answer_spine._join_context(["a", "", "b"]) == "a\nb"
    assert answer_spine._format_history([{"q": "q", "a": "a"}])
    assert answer_spine._summary_lines(snapshot)
    assert answer_spine._line_starting_with(rich_lines, "nodes_total")
    assert answer_spine._spine_lines(rich_lines)
    spine_map: dict[str, str] = {}
    answer_spine._spine_nodes(rich_lines, spine_map)
    answer_spine._spine_hardware(rich_lines, spine_map)
    answer_spine._spine_hottest(rich_lines, spine_map)
    answer_spine._spine_postgres(rich_lines, spine_map)
    answer_spine._spine_namespaces(rich_lines, spine_map)
    answer_spine._spine_pressure(rich_lines, spine_map)
    assert answer_spine._parse_group_line("hardware: rpi5=(titan-01)")
    assert answer_spine._parse_hottest("hottest: cpu=titan-01 (90)", "cpu")
    assert answer_spine._spine_answer(route_intent("How many nodes?"), "nodes_total: 2")
    assert answer_spine._spine_nodes_answer("nodes_total: 2")
    assert answer_spine._spine_non_rpi_answer("amd64 (titan-02)")
    assert answer_spine._spine_hardware_answer("hardware: amd64=1")
    assert answer_spine._spine_hottest_answer("hottest_cpu", "hottest: cpu=titan-01 (90)")
    assert answer_spine._spine_postgres_answer("postgres_connections: used=5")
    assert answer_spine._spine_namespace_answer("namespace_most_pods: synapse=5")
    assert answer_spine._spine_pressure_answer("pressure_nodes: titan-02")
    assert answer_spine._spine_from_summary(summary)
    assert answer_spine._spine_from_counts(summary)
    assert answer_spine._spine_from_hardware(summary)
    assert answer_spine._spine_from_hottest(summary)
    assert answer_spine._spine_from_postgres(summary)
    assert answer_spine._spine_from_namespace_pods(summary)
    assert answer_spine._spine_from_pressure(summary)
    assert answer_spine._spine_fallback(route_intent("How many nodes?"), rich_lines)


def test_snapshot_builder_coverage_smoke() -> None:
    """Exercise the split snapshot render helpers end to end."""

    snapshot = _rich_snapshot()
    summary = core_a.build_summary(snapshot)
    text = summary_text(snapshot)
    assert summary and text
    lines: list[str] = []
    format_a._format_float(1.5)
    format_a._format_rate_bytes(2048)
    format_a._format_bytes(2048)
    format_a._format_kv_map({"a": 1, "b": 2})
    format_a._format_names(["b", "a"])
    format_a._append_nodes(lines, summary)
    format_a._append_hardware(lines, summary)
    format_a._append_hardware_groups(lines, summary)
    format_a._append_node_ages(lines, summary)
    format_a._append_node_taints(lines, summary)
    format_a._append_node_facts(lines, summary)
    format_a._append_pressure(lines, summary)
    format_a._append_pods(lines, summary)
    format_a._append_capacity(lines, summary)
    format_a._append_namespace_pods(lines, summary)
    format_a._append_namespace_nodes(lines, summary)
    format_a._append_node_pods(lines, summary)
    format_a._append_pod_issues(lines, summary)
    format_a._format_pod_issue_counts(summary["pod_issues"])
    format_a._format_pod_issue_top(summary["pod_issues"])
    format_a._format_pod_pending_oldest(summary["pod_issues"])
    format_a._format_pod_waiting_reasons(summary["pod_issues"])
    format_a._format_pod_pending_over_15m(summary["pod_issues"])
    format_a._append_workload_health(lines, summary)
    format_a._append_node_usage_stats(lines, summary)
    format_a._append_events(lines, summary)
    format_a._append_pvc_usage(lines, summary)
    format_a._append_root_disk_headroom(lines, summary)
    format_b._append_longhorn(lines, summary)
    format_b._append_namespace_usage(lines, summary)
    format_b._append_namespace_requests(lines, summary)
    format_b._append_namespace_io_net(lines, summary)
    format_b._append_pod_usage(lines, summary)
    format_b._append_restarts(lines, summary)
    format_b._append_job_failures(lines, summary)
    format_b._append_jobs(lines, summary)
    format_b._format_jobs_totals(summary["jobs"])
    format_b._format_jobs_failing(summary["jobs"])
    format_b._format_jobs_active_oldest(summary["jobs"])
    format_b._append_postgres(lines, summary)
    format_b._append_hottest(lines, summary)
    format_b._append_workloads(lines, summary)
    format_b._append_topology(lines, summary)
    format_b._append_flux(lines, summary)
    format_c._append_signals(lines, summary)
    format_c._append_profiles(lines, summary)
    format_c._append_units_windows(lines, summary)
    format_c._append_node_load_summary(lines, summary)
    format_c._append_hardware_usage(lines, summary)
    format_c._append_cluster_watchlist(lines, summary)
    format_c._append_baseline_deltas(lines, summary)
    format_c._append_pod_issue_summary(lines, summary)
    format_c._reason_line(summary["pod_issue_summary"]["waiting_reasons_top"], "waiting")
    format_c._append_namespace_issue_lines(lines, summary["pod_issue_summary"]["namespace_issue_top"])
    format_c._build_cluster_watchlist(summary)
    format_c._capacity_ratio_parts(summary["namespace_capacity"], "cpu", "cpu", "mem")
    format_c._capacity_headroom_parts(summary["namespace_capacity"])
    format_c._append_namespace_capacity_summary(lines, summary)
    format_c._append_workloads_by_namespace(lines, summary)
    format_c._append_lexicon(lines, summary)
    format_c._append_cross_stats(lines, summary)
    assert lines


def test_answerer_helper_edge_branches(monkeypatch: pytest.MonkeyPatch) -> None:
    """Cover alternate branches in the split answerer helper modules."""

    settings = replace(build_test_settings(), debug_pipeline=True)
    logged: list[tuple[str, dict[str, Any]]] = []
    monkeypatch.setattr(answer_common, "log", SimpleNamespace(info=lambda message, extra: logged.append((message, extra))))
    meta = answer_common._build_meta("custom", 1, 2, True, False, 3.0, {"kind": "x"}, {"cmd": "echo"}, 10.0)
    assert meta["llm_limit_hit"] is True
    answer_common._debug_pipeline_log(settings, "edge", {"ok": True})
    assert logged and logged[0][0] == "atlasbot_debug"
    assert answer_common._mode_plan(settings, "genius").drafts == 2
    assert answer_common._mode_plan(settings, "custom").use_tool is False
    assert answer_common._select_subquestions([None, {"question": "", "priority": "x"}], "fallback", 2) == ["fallback"]
    assert answer_common._chunk_lines([], 3) == []
    assert answer_common._raw_snapshot_chunks({"ok": 1, "bad": {1, 2}})
    assert answer_common._build_chunk_groups([{"id": "c1", "summary": "a"}], 2) == [[{"id": "c1", "summary": "a"}]]

    async def score_call(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
        if tag == "chunk_score":
            return '[{"id":"c1","score":"bad"},{"id":"","score":5},"bad"]'
        if tag == "chunk_select":
            return '{"selected_index": 99}'
        raise AssertionError(tag)

    groups = [[{"id": "c1", "summary": "a"}]]
    ctx = ScoreContext(question="q", sub_questions=[], retries=1, parallelism=1, select_best=True, fast_model="fast")
    assert asyncio.run(answer_common._score_chunk_group(score_call, groups[0], "q", [])) == {"c1": 0.0}
    assert asyncio.run(answer_common._score_chunk_group_run(score_call, 0, groups[0], "q", [])) == (0, {"c1": 0.0})
    assert answer_common._merge_score_runs([]) == {}
    assert asyncio.run(answer_common._select_best_score_run(score_call, groups[0], [{"c1": 1.0}, {"c1": 2.0}], ctx)) == {"c1": 1.0}
    assert answer_common._keyword_hits([{"text": "cpu"}, {"text": "ram"}], {"text": "cpu"}, None) == []
    assert answer_common._select_chunks([], {}, answer_common._mode_plan(settings, "custom")) == []
    selected = [{"id": "c0", "text": "a"}]
    assert answer_common._append_must_chunks([{"id": "c0"}, {"id": "c1"}], selected, ["c1"], 3) is False
    assert answer_common._append_keyword_chunks([{"id": "c0", "text": "cpu"}], selected, ["cpu"], 2) is False
    answer_common._append_ranked_chunks([{"id": "c1"}], selected, 2)
    assert answer_common._format_runbooks([]) == ""

    async def retrieval_call(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
        responses = {
            "fact_types": '{"fact_types":["cpu", 5, "cpu"]}',
            "fact_types_select": '{"best": 99}',
            "signals": '{"signals":["cpu", "", "ram"]}',
            "signals_select": '{"best": 99}',
            "chunk_scan": '{"lines":["cpu: 1", "missing: 2"]}',
            "chunk_scan_select": '{"best": 99}',
            "fact_prune": '{"lines":["cpu: 1", "ram: 2"]}',
            "fact_prune_select": '{"best": 99}',
            "fact_select": '{"lines":["cpu: 1"]}',
            "fact_select_best": '{"best": 99}',
        }
        return responses[tag]

    fast_plan = replace(answer_common._mode_plan(settings, "smart"), metric_retries=2)
    assert answer_retrieval_ext._parse_json_block("plain", fallback={"ok": True}) == {"ok": True}
    assert "nodes" in answer_retrieval_ext._metric_key_tokens(["nodes_total: 2"])
    assert answer_retrieval_ext._metric_key_tokens([123, "invalid", ": empty"]) == set()
    assert asyncio.run(answer_retrieval_ext._select_best_candidate(retrieval_call, "q", ["one"], fast_plan, "fact_types_select")) == 0
    assert answer_retrieval_ext._dedupe_lines(["lexicon_term: a", "units: x", "cpu", "cpu"], limit=1) == ["cpu"]
    assert answer_retrieval_ext._collect_fact_candidates([{"text": "cpu: 1\nram: 2"}, {"bad": True}], 3) == ["cpu: 1", "ram: 2"]
    assert asyncio.run(answer_retrieval_ext._select_best_list(retrieval_call, "q", [[], ["cpu"]], fast_plan, "fact_types_select")) == ["cpu"]
    assert asyncio.run(answer_retrieval_ext._extract_fact_types(retrieval_call, "q", [], fast_plan)) == ["cpu", "5"]
    async def retrieval_bad(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
        del _system, _prompt, model, tag
        return '{"signals":"bad","fact_types":"bad","lines":"bad"}'

    assert asyncio.run(answer_retrieval_ext._extract_fact_types(retrieval_bad, "q", [], fast_plan)) == []
    assert asyncio.run(answer_retrieval_ext._derive_signals(retrieval_call, "q", [], fast_plan)) == []
    assert asyncio.run(answer_retrieval_ext._derive_signals(retrieval_bad, "q", ["cpu"], fast_plan)) == []
    assert asyncio.run(answer_retrieval_ext._derive_signals(retrieval_call, "q", ["cpu"], fast_plan)) == ["cpu", "ram"]
    assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(retrieval_call, "q", [], ["cpu: 1"], fast_plan)) == []
    assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(retrieval_bad, "q", ["cpu"], ["cpu: 1"], fast_plan)) == []
    assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(retrieval_call, "q", ["cpu"], ["cpu: 1", "ram: 2"], fast_plan)) == ["cpu: 1"]
    assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(retrieval_call, "q", [], fast_plan, 2)) == []
    assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(retrieval_bad, "q", ["cpu: 1"], fast_plan, 2)) == []
    assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(retrieval_call, "q", ["cpu: 1", "ram: 2"], fast_plan, 2)) == ["cpu: 1", "ram: 2"]
    assert asyncio.run(answer_retrieval_ext._select_fact_lines(retrieval_call, "q", [], fast_plan, 1)) == []
    assert asyncio.run(answer_retrieval_ext._select_fact_lines(retrieval_bad, "q", ["cpu: 1"], fast_plan, 1)) == []
    assert asyncio.run(answer_retrieval_ext._select_fact_lines(retrieval_call, "q", ["cpu: 1", "ram: 2"], fast_plan, 1)) == ["cpu: 1"]

    async def post_call(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
        if tag == "contradiction":
            return '{"use_facts": false, "confidence": 70}'
        if tag == "insight_guard":
            return '{"ok": true}'
        if tag == "insight_fix":
            return "fixed"
        raise AssertionError(tag)

    assert answer_post._strip_unknown_entities("", ["titan-99"], []) == ""
    assert answer_post._strip_unknown_entities("Atlas is healthy.", [], []) == "Atlas is healthy."
    assert answer_post._needs_evidence_guard("", ["fact"]) is False
    assert answer_post._needs_evidence_guard("pressure is high", ["pressure"]) is False
    contradiction = asyncio.run(
        answer_post._contradiction_decision(
            ContradictionContext(post_call, "q", "draft", ["fact"], fast_plan),
            attempts=2,
        )
    )
    assert contradiction["confidence"] == 70
    assert answer_post._format_direct_metric_line("broken line") == "broken line"
    assert answer_post._global_facts([]) == []
    assert answer_post._has_keyword_overlap([], ["cpu"]) is False
    assert answer_post._extract_question_tokens("") == []
    assert answer_post._expand_tokens([]) == []
    assert answer_post._ensure_token_coverage([], ["cpu"], ["cpu: 1"]) == []
    assert answer_post._best_keyword_line(["ram: 1"], ["cpu"]) is None
    assert answer_post._line_starting_with([], "cpu") is None
    assert answer_post._non_rpi_nodes({"hardware_by_node": None}) == {}
    assert answer_post._format_hardware_groups({}, "Nodes") == ""
    assert answer_post._lexicon_context({"lexicon": []}) == ""
    assert answer_post._parse_json_list("nope") == []
    assert answer_post._scores_from_json({}).confidence == 60
    assert answer_post._coerce_int("bad", 5) == 5
    assert answer_post._style_hint({"question_type": "planning"}) == "insightful"
    assert answer_post._needs_evidence_fix("", {"needs_snapshot": True}) is False
    assert answer_post._should_use_insight_guard({"question_type": "planning"}) is True
    insight = InsightGuardInput(
        question="q",
        reply="reply",
        classify={"question_type": "planning"},
        context="ctx",
        plan=fast_plan,
        call_llm=post_call,
        facts=[],
    )
    assert asyncio.run(answer_post._apply_insight_guard(insight)) == "reply"

    assert answer_post_ext._reply_matches_metric_facts("no numbers", ["cpu: 1"]) is False
    assert answer_post_ext._needs_dedup("short.") is False
    assert answer_post_ext._needs_focus_fix("why", "direct", {"question_type": "open_ended"}) is False
    assert answer_post_ext._extract_keywords("Q", "Q", [], []) == []
    assert answer_post_ext._allowed_nodes({}) == []
    assert answer_post_ext._allowed_namespaces({}) == []
    assert answer_post_ext._find_unknown_nodes("titan-01", ["titan-01"]) == []
    assert answer_post_ext._find_unknown_namespaces("namespace synapse", ["synapse"]) == []
    assert answer_post_ext._needs_runbook_fix("runbooks/fix.md", ["runbooks/fix.md"]) is False
    assert answer_post_ext._needs_runbook_reference("status", ["runbooks/fix.md"], "ok") is False
    assert answer_post_ext._best_runbook_match("x", []) is None
    assert answer_post_ext._resolve_path({"a": []}, "a[1].b") is None
    assert answer_post_ext._snapshot_id({"snapshot": {"id": "x"}}) is None
    assert answer_post_ext._claims_to_payload([]) == []
    assert answer_post_ext._state_from_payload({}) is None

    assert answer_factsheet._factsheet_instruction("smart")
    assert answer_factsheet._factsheet_model("quick", fast_plan) == fast_plan.fast_model
    assert answer_factsheet._is_plain_math_question("2 + 2") is True
    assert answer_factsheet._quick_fact_sheet_lines("where is runbook", ["runbooks/fix.md", "cpu: 1"], [], limit=1)
    assert answer_factsheet._quick_fact_sheet_text([]) == "Fact Sheet:\n- No snapshot facts available."
    assert "prefer rpi5 workers first" in answer_factsheet._quick_fact_sheet_heuristic_answer(
        "what is the node placement last resort",
        ["runbooks/fix.md"],
    )
    assert "1 ready nodes out of 2 total" in answer_factsheet._quick_fact_sheet_heuristic_answer(
        "how many ready nodes are there",
        ["nodes_total:2,ready:1,not_ready:1"],
    )

    assert answer_spine._join_context([]) == ""
    assert answer_spine._format_history([]) == ""
    assert answer_spine._line_starting_with([], "cpu") is None
    assert answer_spine._spine_lines([]) == {}
    extra_spine: dict[str, str] = {}
    answer_spine._spine_nodes(["nodes: total=2 ready=1 not_ready=1"], extra_spine)
    answer_spine._spine_hardware(["hardware: amd64=1 (titan-02)"], extra_spine)
    answer_spine._spine_hottest(["hottest: cpu=titan-01 [rpi5] (90%)"], extra_spine)
    answer_spine._spine_postgres(["postgres_connections_total: used=5, max=10"], extra_spine)
    answer_spine._spine_namespaces(["namespace_pods_top: synapse=5"], extra_spine)
    answer_spine._spine_pressure(["pressure: nodes=0"], extra_spine)
    assert answer_spine._parse_group_line("invalid") == {}
    assert answer_spine._parse_hottest("broken", "cpu") is None
    assert answer_spine._spine_nodes_answer("nodes: total=2 ready=1 not_ready=1")
    assert answer_spine._spine_pressure_answer("pressure: nodes=0")


def test_runtime_and_snapshot_edge_branches(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
    """Cover runtime wrappers and sparse snapshot builder branches."""

    sparse_summary = {
        "node_pods": [
            {"node": "titan-01", "pods_total": "7", "namespaces_top": [("synapse", 3), ("vault", 2)]},
            {"node": "titan-02", "pods_total": "x"},
        ],
        "pod_issues": {
            "counts": {"Failed": 1},
            "items": [{"namespace": "synapse", "pod": "matrix", "phase": "Pending", "restarts": 1}],
            "pending_oldest": [{"namespace": "synapse", "pod": "matrix", "age_hours": 2, "reason": "Waiting"}],
            "waiting_reasons": {"ImagePullBackOff": 2},
            "pending_over_15m": "2",
        },
        "workloads_health": {
            "deployments": {"not_ready": 1},
            "statefulsets": {"not_ready": 0},
            "daemonsets": {"not_ready": 1},
        },
        "topology": {
            "nodes": [{"node": "titan-01", "workloads_top": [("matrix", 3)]}],
            "workloads": [{"namespace": "synapse", "workload": "matrix", "nodes_top": [("titan-01", 3)]}],
        },
        "flux": {
            "not_ready": 2,
            "items": [{"namespace": "flux-system", "name": "kustomization", "reason": "waiting", "suspended": True}],
        },
        "namespace_capacity_summary": {
            "cpu_ratio_top": [{"namespace": "synapse", "cpu_usage_ratio": 0.8, "cpu_usage": 4, "cpu_requests": 5}],
            "mem_ratio_top": [{"namespace": "synapse", "mem_usage_ratio": 0.7, "mem_usage": 7, "mem_requests": 10}],
            "cpu_headroom_low": [{"namespace": "synapse", "headroom": 0.2}],
            "mem_headroom_low": [{"namespace": "synapse", "headroom": 0.3}],
            "cpu_overcommitted": 1,
            "mem_overcommitted": 1,
            "cpu_overcommitted_names": ["synapse"],
            "mem_overcommitted_names": ["vault"],
        },
        "workloads": [{"namespace": "synapse", "workload": "matrix", "pods_total": 3, "primary_node": "titan-01"}],
        "lexicon": {"terms": [{"term": "atlas", "meaning": "cluster"}], "aliases": {"bot": "atlas"}},
        "cross_stats": {
            "node_metric_top": [{"metric": "cpu", "node": "titan-01", "value": 90, "cpu": 90, "ram": 80, "net": 1.0, "io": 2.0, "pods_total": 3}],
            "namespace_metric_top": [{"metric": "cpu", "namespace": "synapse", "value": 40, "cpu_ratio": 0.8, "mem_ratio": 0.7, "pods_total": 3}],
            "pvc_top": [{"namespace": "synapse", "pvc": "data", "used_percent": 95}],
        },
        "events": {"warnings_total": 2},
    }
    lines: list[str] = []
    format_a._append_node_pods(lines, sparse_summary)
    format_a._append_pod_issues(lines, sparse_summary)
    format_a._append_workload_health(lines, sparse_summary)
    format_b._append_topology(lines, sparse_summary)
    format_b._append_flux(lines, sparse_summary)
    format_c._append_namespace_capacity_summary(lines, sparse_summary)
    format_c._append_workloads_by_namespace(lines, sparse_summary)
    format_c._append_lexicon(lines, sparse_summary)
    format_c._append_cross_stats(lines, sparse_summary)
    assert any("node_pods_max" in line for line in lines)
    assert any("flux_not_ready_items" in line for line in lines)
    assert any("cross_pvc_usage" in line for line in lines)

    assert core_a._build_node_ages([{"name": "titan-01", "age_hours": 1}, "bad"])
    assert core_a._build_node_facts([{"name": "titan-01", "is_worker": True, "roles": ["worker"], "arch": "arm64"}])
    assert core_a._build_node_taints([{"name": "titan-01", "taints": [{"key": "dedicated", "effect": "NoSchedule"}]}])
    assert core_a._build_root_disk_headroom({"node_usage": {"disk": [{"node": "titan-01", "value": 80}]}})
    assert core_a._build_longhorn({"longhorn": {"total": 1}})
    assert core_a._build_node_load({"node_load": [{"node": "titan-01"}]})
    assert core_a._build_pods({"pods_running": 1})
    assert core_a._build_capacity({"capacity_cpu": 4})
    assert core_a._build_namespace_pods({"namespace_pods": [{"namespace": "synapse"}]})
    assert core_a._build_namespace_nodes({"namespace_nodes": [{"namespace": "synapse"}]})
    assert core_a._build_node_pods({"node_pods": [{"node": "titan-01"}]})
    assert core_a._build_node_pods_top({"node_pods_top": [{"node": "titan-01"}]})
    assert core_a._build_pod_issues({"pod_issues": {"counts": {}}})
    assert core_a._build_events({"events": {"warnings_total": 1}})
    assert core_a._build_event_summary({"events": {"warnings_top_reason": {"a": 1}, "warnings_latest": [{"reason": "x"}]}})
    assert core_a._build_postgres({"postgres_connections": {"used": 1}})

    settings = replace(build_test_settings(), queue_enabled=False)
    store = ClaimStore(":memory:", 60)
    assert store.get("") is None
    store.set("", {"claims": []})
    assert _safe_json(None, {}) == {}

    kb_dir = tmp_path / "kb"
    (kb_dir / "catalog").mkdir(parents=True)
    (kb_dir / "catalog" / "runbooks.json").write_text(json.dumps([{"path": "runbooks/fix.md"}, {"title": "Missing path"}]), encoding="utf-8")
    kb = KnowledgeBase(str(kb_dir))
    assert kb.runbook_titles() == ""
    assert kb.runbook_paths(limit=1) == ["runbooks/fix.md"]

    from atlasbot.snapshot.builder import SnapshotProvider

    provider = SnapshotProvider(replace(settings, ariadne_state_url="", snapshot_ttl_sec=1))
    provider._cache = {"cached": True}
    provider._cache_ts = 1.0
    monkeypatch.setattr("atlasbot.snapshot.builder.time.monotonic", lambda: 100.0)
    assert provider.get() == {"cached": True}

    from atlasbot import main as main_mod

    captured: dict[str, Any] = {}

    class QueueProbe:
        def __init__(self, _settings: Settings, handler):
            captured["handler"] = handler

        async def start(self) -> None:
            return None

        async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
            return {"reply": payload.get("question", ""), "scores": {}}

    class ApiProbe:
        def __init__(self, _settings: Settings, answer_handler):
            captured["answer_handler"] = answer_handler
            self.app = SimpleNamespace()

    class ServerProbe:
        def __init__(self, config: Any):
            self.config = config

        async def serve(self) -> None:
            return None

    class EngineProbe:
        async def answer(
            self,
            question: str,
            *,
            mode: str,
            history: list[dict[str, str]] | None = None,
            observer: Any = None,
            conversation_id: str | None = None,
            snapshot_pin: bool | None = None,
        ) -> AnswerResult:
            return AnswerResult(
                reply=f"{question}:{mode}:{bool(history)}:{conversation_id}:{snapshot_pin}:{observer is not None}",
                scores=AnswerScores(confidence=91, relevance=92, satisfaction=93, hallucination_risk="low"),
                meta={},
            )

    monkeypatch.setattr(main_mod, "load_settings", lambda: replace(settings, matrix_bots=()))
    monkeypatch.setattr(main_mod, "configure_logging", lambda _level: None)
    monkeypatch.setattr(main_mod, "_build_engine", lambda _settings: EngineProbe())
    monkeypatch.setattr(main_mod, "QueueManager", QueueProbe)
    monkeypatch.setattr(main_mod, "Api", ApiProbe)
    monkeypatch.setattr(main_mod.uvicorn, "Server", ServerProbe)
    asyncio.run(main_mod.main())
    handled = asyncio.run(captured["handler"]({"question": "hello", "mode": "smart", "history": "bad", "conversation_id": 7, "snapshot_pin": "bad"}))
    assert handled["reply"]
    answered = asyncio.run(captured["answer_handler"]("hello", "quick", None, None, None, None))
    assert answered.reply

    assert result_scores({"scores": {"confidence": "bad"}}).confidence == 60

    qm = QueueManager(replace(settings, queue_enabled=True), lambda payload: asyncio.sleep(0, result=payload))
    with pytest.raises(RuntimeError, match="queue not initialized"):
        asyncio.run(qm.submit({"question": "x"}))
    assert _mode_timeout_sec(settings, "genius") == settings.genius_time_budget_sec
    assert _extract_mode("atlas hello", ("atlas",), "quick") == ("quick", "hello")