atlasbot/tests/test_support_modules.py

1425 lines
68 KiB
Python
Raw Normal View History

"""Coverage-oriented tests for support modules and render helpers."""
from __future__ import annotations
import asyncio
import importlib
import json
import logging as pylogging
from dataclasses import replace
from pathlib import Path
from types import SimpleNamespace
from typing import Any
import httpx
import pytest
from fastapi.testclient import TestClient
from nats.js.errors import NotFoundError
import atlasbot
import atlasbot.api
import atlasbot.engine
import atlasbot.engine.answerer
import atlasbot.knowledge
import atlasbot.llm
import atlasbot.matrix
import atlasbot.queue
import atlasbot.snapshot
from atlasbot.api.http import Api, AnswerRequest, _extract_question
from atlasbot.config import (
MatrixBotConfig,
Settings,
_env_bool,
_env_float,
_env_int,
_load_matrix_bots,
load_settings,
)
import atlasbot.engine.answerer.common as answer_common
import atlasbot.engine.answerer.factsheet as answer_factsheet
import atlasbot.engine.answerer.post as answer_post
import atlasbot.engine.answerer.post_ext as answer_post_ext
import atlasbot.engine.answerer.retrieval as answer_retrieval
import atlasbot.engine.answerer.retrieval_ext as answer_retrieval_ext
import atlasbot.engine.answerer.spine as answer_spine
from atlasbot.engine.answerer import AnswerResult, AnswerScores
from atlasbot.engine.answerer._base import ClaimItem, ContradictionContext, EvidenceItem, InsightGuardInput, ScoreContext
from atlasbot.engine.intent_router import route_intent
from atlasbot.knowledge.loader import KnowledgeBase
from atlasbot.llm.client import LLMClient, build_messages, parse_json
from atlasbot.logging import JsonFormatter, configure_logging
from atlasbot.main import result_scores
from atlasbot.matrix.bot import MatrixBot, MatrixClient, _extract_mode, _mode_timeout_sec
from atlasbot.queue.nats import QueueManager
from atlasbot.snapshot.builder import core_a, core_b, format_a, format_b, format_c, summary_text
from atlasbot.state.store import ClaimStore, _safe_json
from testing.fakes import build_test_settings
def _rich_snapshot() -> dict[str, Any]:
return {
"collected_at": "2026-04-10T12:00:00Z",
"snapshot_version": "v1",
"summary": {
"signals": [
{"scope": "node", "target": "titan-01", "metric": "cpu", "current": 90, "delta_pct": 15, "severity": "warn"}
],
"profiles": {
"nodes": [
{"node": "titan-01", "load_index": 0.9, "cpu": 90, "ram": 80, "pods_total": 10, "hardware": "rpi5"}
],
"namespaces": [
{"namespace": "synapse", "pods_total": 5, "cpu_usage": 40, "mem_usage": 50, "primary_node": "titan-01"}
],
"workloads": [
{"namespace": "synapse", "workload": "matrix", "pods_total": 3, "pods_running": 3, "primary_node": "titan-01"}
],
},
"inventory": {"workers": {"total": 2, "ready": 1}},
"topology": {
"nodes": [{"name": "titan-01", "role": "worker"}],
"workloads": [{"name": "matrix", "node": "titan-01"}],
"namespaces": [{"name": "synapse", "pods": 5}],
},
"lexicon": {
"terms": [{"term": "atlas", "meaning": "Atlas cluster"}],
"aliases": {"atlasbot": "atlas"},
},
"cross_stats": {
"node_metric_top": [
{
"metric": "cpu",
"node": "titan-01",
"value": 90,
"cpu": 90,
"ram": 80,
"net": 2.5,
"io": 1.5,
"pods_total": 10,
}
],
"namespace_metric_top": [
{
"metric": "cpu",
"namespace": "synapse",
"value": 40,
"cpu": 40,
"ram": 50,
"net": 1.5,
"io": 1.0,
"pods_total": 5,
}
],
"pvc_top": [{"metric": "usage", "namespace": "synapse", "pvc": "data", "value": 95}],
},
"baseline_deltas": {
"nodes": {
"cpu": [{"node": "titan-01", "delta": 10, "severity": "warn"}],
"ram": [{"node": "titan-01", "delta": 5}],
},
"namespaces": {
"pods": [{"namespace": "synapse", "delta": 8, "severity": "high"}],
},
},
"pod_issue_summary": {
"waiting_reasons_top": [{"reason": "ImagePullBackOff", "count": 3}],
"phase_reasons_top": [{"reason": "Pending", "count": 2}],
"namespace_issue_top": {"waiting": [{"namespace": "synapse", "value": 2}]},
},
"trend_requests": {},
"pod_waiting_trends": {},
"pod_terminated_trends": {},
},
"nodes_summary": {
"total": 2,
"ready": 1,
"not_ready": 1,
"not_ready_names": ["titan-02"],
"by_arch": {"rpi5": 1, "amd64": 1},
"by_role": {"worker": 2},
"workers": {"total": 2, "ready": 1},
"pressure_nodes": {"names": ["titan-02"]},
},
"nodes_detail": [
{
"name": "titan-01",
"hardware": "rpi5",
"arch": "arm64",
"os": "linux",
"kubelet": "1.30",
"kernel": "6.8",
"container_runtime": "containerd",
"is_worker": True,
"roles": ["worker"],
"age_hours": 12,
"taints": [{"key": "dedicated", "effect": "NoSchedule"}],
},
{
"name": "titan-02",
"hardware": "amd64",
"arch": "amd64",
"os": "linux",
"kubelet": "1.30",
"kernel": "6.8",
"container_runtime": "containerd",
"is_worker": True,
"roles": ["worker"],
"age_hours": 24,
"taints": [{"key": "pressure", "effect": "NoExecute"}],
},
],
"metrics": {
"node_load": [
{"node": "titan-01", "load_index": 0.9, "cpu": 90, "ram": 80, "net": 100, "io": 50},
{"node": "titan-02", "load_index": 0.4, "cpu": 30, "ram": 20, "net": 10, "io": 5},
],
"pods_running": 12,
"pods_pending": 1,
"pods_failed": 2,
"pods_succeeded": 3,
"capacity_cpu": 8,
"allocatable_cpu": 7,
"capacity_mem_bytes": 8 * 1024 * 1024 * 1024,
"allocatable_mem_bytes": 6 * 1024 * 1024 * 1024,
"capacity_pods": 110,
"allocatable_pods": 100,
"namespace_cpu_top": [{"metric": {"namespace": "synapse"}, "value": 95}],
"namespace_mem_top": [{"metric": {"namespace": "synapse"}, "value": 1024 * 1024}],
"namespace_cpu_requests_top": [{"metric": {"namespace": "synapse"}, "value": 50}],
"namespace_mem_requests_top": [{"metric": {"namespace": "synapse"}, "value": 2 * 1024 * 1024}],
"namespace_net_top": [{"metric": {"namespace": "synapse"}, "value": 1024}],
"namespace_io_top": [{"metric": {"namespace": "synapse"}, "value": 2048}],
"pod_cpu_top": [{"metric": {"namespace": "synapse", "pod": "matrix"}, "value": 3.3}],
"pod_cpu_top_node": [{"metric": {"namespace": "synapse", "pod": "matrix", "node": "titan-01"}, "value": 3.3}],
"pod_mem_top": [{"metric": {"namespace": "synapse", "pod": "matrix"}, "value": 4096}],
"pod_mem_top_node": [{"metric": {"namespace": "synapse", "pod": "matrix", "node": "titan-01"}, "value": 4096}],
"top_restarts_1h": [{"metric": {"namespace": "synapse", "pod": "matrix"}, "value": [0, 4]}],
"restart_namespace_top": [{"metric": {"namespace": "synapse"}, "value": 4}],
"job_failures_24h": [{"metric": {"namespace": "synapse", "job_name": "backup"}, "value": 2}],
"node_pods_top": [{"node": "titan-01", "pods_total": 5, "namespaces": [{"name": "synapse", "count": 3}]}],
"postgres_connections": {"used": 5, "max": 10, "hottest_db": {"label": "synapse", "value": 3}},
"node_usage": {
"cpu": [{"node": "titan-01", "value": 90}],
"ram": [{"node": "titan-02", "value": 70}],
"net": [{"node": "titan-02", "value": 2}],
"io": [{"node": "titan-01", "value": 0.5}],
"disk": [{"node": "titan-01", "value": 80}],
},
"node_load_summary": {
"top": [{"node": "titan-01", "load_index": 0.9, "cpu": 90, "ram": 80, "io": 1.5, "net": 2.5, "pods_total": 10}],
"outliers": [{"node": "titan-02"}],
},
"hardware_usage_avg": [
{"hardware": "rpi5", "load_index": 0.9, "cpu": 90, "ram": 80, "io": 1.5, "net": 2.5},
],
"namespace_capacity_summary": {
"cpu_ratio_top": [
{"namespace": "synapse", "cpu_usage_ratio": 0.8, "cpu_usage": 40, "cpu_requests": 50}
],
"mem_ratio_top": [
{"namespace": "synapse", "mem_usage_ratio": 0.7, "mem_usage": 70, "mem_requests": 100}
],
"cpu_headroom_low": [{"namespace": "synapse", "headroom": 0.2}],
"mem_headroom_low": [{"namespace": "synapse", "headroom": 0.3}],
"cpu_overcommitted": 1,
"mem_overcommitted": 0,
"cpu_overcommitted_names": ["synapse"],
"mem_overcommitted_names": [],
},
"namespace_capacity": [{"namespace": "synapse", "cpu": 1, "mem": 2}],
"units": {"cpu_pct": "%", "ram_pct": "%", "net": "bytes/s"},
"windows": {"rates": "5m", "restarts": "1h"},
},
"namespace_pods": [{"namespace": "synapse", "pods_total": 5, "pods_running": 4}],
"namespace_nodes": [{"namespace": "synapse", "pods_total": 5, "primary_node": "titan-01"}],
"node_pods": [{"node": "titan-01", "pods_total": 5, "namespaces": [{"name": "synapse", "count": 3}]}],
"pod_issues": {
"counts": {"Failed": 2, "Pending": 1, "Unknown": 0},
"top": [{"namespace": "synapse", "pod": "matrix", "phase": "Pending", "age_hours": 2}],
"pending_oldest": [{"namespace": "synapse", "pod": "matrix", "age_hours": 2}],
"waiting_reasons_top": [{"reason": "ImagePullBackOff", "count": 3}],
"pending_over_15m": 1,
"waiting_reasons": {"ImagePullBackOff": 3},
},
"workloads_health": {
"deployments": {"ready": 2, "not_ready": 1, "desired": 3},
"statefulsets": {"ready": 1, "not_ready": 0, "desired": 1},
"daemonsets": {"ready": 1, "not_ready": 0, "desired": 1},
},
"events": {
"warnings_top_reason": {"ImagePullBackOff": 3},
"warnings_latest": [{"reason": "FailedScheduling", "count": 2}],
"warnings_total": 5,
},
"jobs": {
"totals": {"total": 4, "active": 1, "failed": 1, "succeeded": 2},
"failing": [{"namespace": "synapse", "job_name": "backup", "failed": 1}],
"active_oldest": [{"namespace": "synapse", "job_name": "backup", "age_minutes": 30}],
},
"postgres": {
"used": 5,
"max": 10,
"hottest_db": {"label": "synapse", "value": 3},
"by_db": [{"label": "synapse", "value": 3}],
},
"hottest": {
"cpu": {"node": "titan-01", "value": 90},
"ram": {"node": "titan-02", "value": 70},
"net": {"node": "titan-02", "value": 2},
"io": {"node": "titan-01", "value": 0.5},
"disk": {"node": "titan-01", "value": 80},
},
"pvc_usage_top": [{"namespace": "synapse", "pvc": "data", "value": 95}],
"root_disk_low_headroom": [{"node": "titan-01", "headroom_pct": 20, "used_pct": 80}],
"longhorn": {
"total": 2,
"attached_count": 1,
"detached_count": 1,
"degraded_count": 0,
"by_state": {"attached": 1, "detached": 1},
"by_robustness": {"healthy": 1, "degraded": 1},
"unhealthy": [{"name": "vol1", "state": "detached", "robustness": "degraded"}],
},
"workloads": [{"namespace": "synapse", "name": "matrix", "pods_total": 3, "pods_running": 3}],
"flux": {
"ready": 1,
"not_ready": 1,
"items": [{"kind": "HelmRelease", "name": "matrix", "status": "Ready"}],
},
}
def test_package_imports() -> None:
"""Import package shims so their `__init__` modules stay covered."""
importlib.import_module("atlasbot")
importlib.import_module("atlasbot.api")
importlib.import_module("atlasbot.engine")
importlib.import_module("atlasbot.engine.answerer")
importlib.import_module("atlasbot.knowledge")
importlib.import_module("atlasbot.llm")
importlib.import_module("atlasbot.matrix")
importlib.import_module("atlasbot.queue")
importlib.import_module("atlasbot.snapshot")
assert atlasbot.snapshot.__name__ == "atlasbot.snapshot"
def test_config_helpers_and_load_settings(monkeypatch: pytest.MonkeyPatch) -> None:
"""Exercise config parsing branches and matrix bot loading."""
monkeypatch.setenv("BOOL_ONE", "yes")
monkeypatch.setenv("INT_BAD", "nope")
monkeypatch.setenv("FLOAT_BAD", "nope")
assert _env_bool("BOOL_ONE")
assert _env_int("INT_BAD", "7") == 7
assert _env_float("FLOAT_BAD", "2.5") == 2.5
monkeypatch.setenv("BOT_USER_QUICK", "quick")
monkeypatch.setenv("BOT_PASS_QUICK", "pw")
monkeypatch.setenv("BOT_USER_SMART", "smart")
monkeypatch.setenv("BOT_PASS_SMART", "pw")
settings = load_settings()
assert settings.matrix_bots[0].mode == "quick"
assert settings.matrix_bots[1].mode == "smart"
monkeypatch.delenv("BOT_USER_QUICK", raising=False)
monkeypatch.delenv("BOT_PASS_QUICK", raising=False)
monkeypatch.delenv("BOT_USER_SMART", raising=False)
monkeypatch.delenv("BOT_PASS_SMART", raising=False)
monkeypatch.setenv("BOT_USER", "atlasbot")
monkeypatch.setenv("BOT_PASS", "legacy")
legacy = _load_matrix_bots(("atlasbot",))
assert legacy and legacy[0].mode == ""
def test_knowledge_base_helpers(tmp_path: Path, caplog: pytest.LogCaptureFixture) -> None:
"""Read KB data, titles, paths, and prompt chunks from a temp catalog."""
base = tmp_path / "kb"
catalog = base / "catalog"
catalog.mkdir(parents=True)
(catalog / "atlas.json").write_text(
json.dumps({"cluster": "titan", "sources": [{"name": "docs"}], "extra": True}),
encoding="utf-8",
)
(catalog / "runbooks.json").write_text(json.dumps([{"title": "Fix", "path": "runbooks/fix.md"}]), encoding="utf-8")
(base / "notes.md").write_text("hello atlas", encoding="utf-8")
kb = KnowledgeBase(str(base))
assert "Cluster: titan." in kb.summary()
assert "Relevant runbooks" in kb.runbook_titles(limit=1)
assert kb.runbook_paths() == ["runbooks/fix.md"]
assert kb.chunk_lines(max_files=1, max_chars=200)
bad = base / "bad"
bad.mkdir()
(bad / "catalog").mkdir()
(bad / "catalog" / "atlas.json").write_text("{broken", encoding="utf-8")
broken = KnowledgeBase(str(bad))
with caplog.at_level(pylogging.WARNING):
assert broken.summary() == ""
def test_llm_client_helpers_and_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
"""Exercise message building, JSON parsing, and fallback model logic."""
settings = replace(
build_test_settings(),
ollama_url="http://example",
ollama_model="base",
ollama_fallback_model="fallback",
ollama_retries=1,
)
client = LLMClient(settings)
assert client._endpoint().endswith("/api/chat")
assert build_messages("sys", "prompt", context="ctx")[1]["content"].startswith("Context")
assert parse_json("{\"ok\": true}", fallback={}) == {"ok": True}
class FakeResponse:
def __init__(self, status_code: int, payload: dict[str, Any]):
self.status_code = status_code
self._payload = payload
def raise_for_status(self) -> None:
if self.status_code >= 400:
raise httpx.HTTPStatusError("bad", request=httpx.Request("POST", "http://example"), response=httpx.Response(self.status_code))
def json(self) -> dict[str, Any]:
return self._payload
class FakeAsyncClient:
def __init__(self, timeout: float | None = None):
self.timeout = timeout
async def __aenter__(self) -> FakeAsyncClient:
return self
async def __aexit__(self, *exc: object) -> None:
return None
async def post(
self,
_url: str,
*,
json: dict[str, Any],
headers: dict[str, str],
) -> FakeResponse:
model = json["model"]
assert headers["Content-Type"] == "application/json"
if model == "base":
return FakeResponse(404, {})
return FakeResponse(200, {"message": {"content": "hello"}})
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
reply = asyncio.run(client.chat([{"role": "user", "content": "hi"}], model=None, timeout_sec=1.0))
assert reply == "hello"
def test_logging_formatter_and_configure() -> None:
"""Format a structured record and install JSON logging on the root logger."""
formatter = JsonFormatter()
record = pylogging.LogRecord("atlasbot", pylogging.INFO, __file__, 1, "hello %s", ("world",), None)
record.extra = {"mode": "quick"}
payload = json.loads(formatter.format(record))
assert payload["message"] == "hello world"
assert payload["mode"] == "quick"
configure_logging("debug")
root = pylogging.getLogger()
assert root.handlers and isinstance(root.handlers[0].formatter, JsonFormatter)
def test_state_store_roundtrip_and_cleanup(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Persist, read, and expire a claim payload."""
path = tmp_path / "state.db"
store = ClaimStore(str(path), 60)
store.set(
"conv",
{
"snapshot_id": "snap-1",
"claims": [{"id": "c1"}],
"snapshot": {"nodes": 1},
},
)
payload = store.get("conv")
assert payload and payload["snapshot_id"] == "snap-1"
assert payload["claims"] == [{"id": "c1"}]
assert _safe_json("{broken", []) == []
monkeypatch.setattr("atlasbot.state.store.time.monotonic", lambda: 1_000_000.0)
store.cleanup()
assert store.get("conv") is None
@pytest.mark.parametrize(
("question", "kind"),
[
("How many nodes are ready?", "nodes_ready"),
("How many cluster nodes do we have?", "nodes_count"),
("Which nodes are not rpi?", "nodes_non_rpi"),
("What hardware mix do we have?", "hardware_mix"),
("What is the hottest cpu?", "hottest_cpu"),
("What is the hottest ram?", "hottest_ram"),
("How many postgres connections?", "postgres_connections"),
("Which postgres db is hottest?", "postgres_hottest"),
("Which namespace has most pods?", "namespace_most_pods"),
("Is there pressure on the nodes?", "pressure_summary"),
],
)
def test_intent_router_patterns(question: str, kind: str) -> None:
"""Route the main cluster intents into deterministic matches."""
match = route_intent(question)
assert match and match.kind == kind
def test_api_routes_and_auth() -> None:
"""Exercise the HTTP wrapper, token check, and question extraction."""
settings = replace(build_test_settings(), internal_token="secret")
async def handler(
question: str,
mode: str,
_history: list[dict[str, str]] | None,
_conversation_id: str | None,
_snapshot_pin: bool | None,
) -> AnswerResult:
return AnswerResult(
reply=f"{question}:{mode}",
scores=AnswerScores(confidence=1, relevance=2, satisfaction=3, hallucination_risk="low"),
meta={"mode": mode},
)
api = Api(settings, handler)
client = TestClient(api.app)
assert client.get("/healthz").json() == {"ok": True}
assert client.post("/v1/answer", json={"question": "hi"}).status_code == 401
assert _extract_question(AnswerRequest(prompt=" hello ")).strip() == "hello"
response = client.post(
"/v1/answer",
headers={"X-Internal-Token": "secret"},
json={"prompt": "hello", "mode": "SMART", "conversation_id": "conv-1", "snapshot_pin": True},
)
assert response.status_code == 200
assert response.json()["reply"] == "hello:smart"
def test_main_and_queue_and_matrix(monkeypatch: pytest.MonkeyPatch) -> None:
"""Run the bootstrap path and queueing branch without external services."""
from atlasbot import main as main_mod
settings = replace(
build_test_settings(),
queue_enabled=True,
matrix_bots=(MatrixBotConfig("bot", "pw", ("bot",), "quick"),),
)
class FakeQueue:
def __init__(self, settings: Settings, handler):
self.settings = settings
self.handler = handler
self.started = False
async def start(self) -> None:
self.started = True
async def submit(self, _payload: dict[str, Any]) -> dict[str, Any]:
return {
"reply": "queued",
"scores": {"confidence": 7, "relevance": 8, "satisfaction": 9, "hallucination_risk": "low"},
}
class FakeMatrixBot:
def __init__(self, _settings: Settings, _bot: MatrixBotConfig, _engine: Any, answer_handler):
self.answer_handler = answer_handler
async def run(self) -> None:
result = await self.answer_handler("what is atlas?", "quick", [], "room-1", None)
assert result.reply == "queued"
class FakeServer:
def __init__(self, config: Any):
self.config = config
async def serve(self) -> None:
return None
monkeypatch.setattr(main_mod, "load_settings", lambda: settings)
monkeypatch.setattr(main_mod, "configure_logging", lambda _level: None)
monkeypatch.setattr(main_mod, "QueueManager", FakeQueue)
monkeypatch.setattr(main_mod, "MatrixBot", FakeMatrixBot)
monkeypatch.setattr(main_mod.uvicorn, "Server", FakeServer)
asyncio.run(main_mod.main())
scores = result_scores({"scores": {"confidence": 10, "relevance": 20, "satisfaction": 30, "hallucination_risk": "low"}})
assert scores.confidence == 10
def test_matrix_and_queue_and_snapshot_helpers(monkeypatch: pytest.MonkeyPatch) -> None:
"""Drive the Matrix client, queue manager, and snapshot renderers."""
settings = replace(build_test_settings(), matrix_bots=())
bot_cfg = MatrixBotConfig("bot", "pw", ("bot",), "quick")
class FakeResp:
def __init__(self, payload: dict[str, Any], status_code: int = 200):
self._payload = payload
self.status_code = status_code
def raise_for_status(self) -> None:
if self.status_code >= 400:
raise httpx.HTTPError("bad")
def json(self) -> dict[str, Any]:
return self._payload
class FakeAsyncClient:
def __init__(self, timeout: float | None = None):
self.timeout = timeout
async def __aenter__(self) -> "FakeAsyncClient":
return self
async def __aexit__(self, *exc: object) -> None:
return None
async def post(self, url: str, json: dict[str, Any] | None = None, headers: dict[str, str] | None = None) -> FakeResp:
if "login" in url:
return FakeResp({"access_token": "tok"})
return FakeResp({})
async def get(self, url: str, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None) -> FakeResp:
if "directory/room" in url:
return FakeResp({"room_id": "!room"})
return FakeResp({"next_batch": "n1", "rooms": {"join": {}}})
monkeypatch.setattr("atlasbot.matrix.bot.httpx.AsyncClient", FakeAsyncClient)
client = MatrixClient(settings, bot_cfg)
token = asyncio.run(client.login())
assert token == "tok"
assert asyncio.run(client.resolve_room(token)) == "!room"
asyncio.run(client.join_room(token, "!room"))
asyncio.run(client.send_message(token, "!room", "hello"))
assert asyncio.run(client.sync(token, None))["next_batch"] == "n1"
mode, cleaned = _extract_mode("atlas-smart hello", ("atlas",), "")
assert mode == "smart"
assert cleaned == "-smart hello"
assert _mode_timeout_sec(settings, "smart") == settings.smart_time_budget_sec
class FakeSub:
async def next_msg(self, timeout: float) -> Any:
return SimpleNamespace(data=json.dumps({"reply": "ok"}).encode(), reply="reply")
async def unsubscribe(self) -> None:
return None
class FakeMsg:
def __init__(self) -> None:
self.data = json.dumps({"payload": {"question": "q"}}).encode()
self.reply = "reply"
self.acked = False
async def ack(self) -> None:
self.acked = True
class FakeJS:
def __init__(self) -> None:
self.streams = []
async def stream_info(self, stream: str) -> None:
raise NotFoundError
async def add_stream(self, **kwargs: Any) -> None:
self.streams.append(kwargs)
async def publish(self, subject: str, data: bytes) -> None:
self.streams.append({"subject": subject, "data": data})
async def pull_subscribe(self, subject: str, durable: str) -> Any:
class Pull:
async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
raise RuntimeError("stop")
return Pull()
class FakeNATS:
def __init__(self) -> None:
self.published = []
async def connect(self, url: str) -> None:
return None
def jetstream(self) -> FakeJS:
return FakeJS()
def new_inbox(self) -> str:
return "inbox"
async def subscribe(self, reply: str) -> FakeSub:
return FakeSub()
async def publish(self, reply: str, data: bytes) -> None:
self.published.append((reply, data))
async def drain(self) -> None:
return None
monkeypatch.setattr("atlasbot.queue.nats.NATS", FakeNATS)
queue_settings = replace(settings, queue_enabled=True, nats_stream="atlasbot", nats_subject="atlasbot.requests")
qm = QueueManager(queue_settings, lambda payload: asyncio.sleep(0, result={"reply": "x"}))
asyncio.run(QueueManager(replace(queue_settings, queue_enabled=False), lambda payload: asyncio.sleep(0, result=payload)).start())
asyncio.run(qm.start())
assert asyncio.run(qm.submit({"mode": "quick"})) == {"reply": "ok"}
assert asyncio.run(qm.submit({"mode": "genius"})) == {"reply": "ok"}
class LoopPull:
def __init__(self) -> None:
self.calls = 0
async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
del count, timeout
self.calls += 1
if self.calls == 1:
raise RuntimeError("retry")
if self.calls == 2:
return [FakeMsg()]
raise asyncio.CancelledError
class LoopJS:
async def pull_subscribe(self, subject: str, durable: str) -> LoopPull:
del subject, durable
return LoopPull()
qm._js = LoopJS()
with pytest.raises(asyncio.CancelledError):
asyncio.run(qm._worker_loop())
asyncio.run(qm.stop())
snapshot = _rich_snapshot()
summary = core_a.build_summary(snapshot)
assert summary["nodes"]["total"] == 2
text = summary_text(snapshot)
assert "atlas_cluster:" in text
assert "hardware_usage_avg:" in text
assert "signals:" in text
assert "node_profiles:" in text
assert "flux:" in text or "flux" in text
lines: list[str] = []
format_a._append_nodes(lines, summary)
format_a._append_hardware(lines, summary)
format_a._append_hardware_groups(lines, summary)
format_a._append_node_ages(lines, summary)
format_a._append_node_taints(lines, summary)
format_a._append_node_facts(lines, summary)
format_a._append_pressure(lines, summary)
format_a._append_pods(lines, summary)
format_a._append_capacity(lines, summary)
format_a._append_namespace_pods(lines, summary)
format_a._append_namespace_nodes(lines, summary)
format_a._append_node_pods(lines, summary)
format_a._append_pod_issues(lines, summary)
format_a._append_workload_health(lines, summary)
format_a._append_node_usage_stats(lines, summary)
format_a._append_events(lines, summary)
format_a._append_pvc_usage(lines, summary)
format_a._append_root_disk_headroom(lines, summary)
format_b._append_longhorn(lines, summary)
format_b._append_namespace_usage(lines, summary)
format_b._append_namespace_requests(lines, summary)
format_b._append_namespace_io_net(lines, summary)
format_b._append_pod_usage(lines, summary)
format_b._append_restarts(lines, summary)
format_b._append_job_failures(lines, summary)
format_b._append_jobs(lines, summary)
format_b._append_postgres(lines, summary)
format_b._append_hottest(lines, summary)
format_b._append_workloads(lines, summary)
format_b._append_topology(lines, summary)
format_b._append_flux(lines, summary)
format_c._append_signals(lines, summary)
format_c._append_profiles(lines, summary)
format_c._append_units_windows(lines, summary)
format_c._append_node_load_summary(lines, summary)
format_c._append_hardware_usage(lines, summary)
format_c._append_cluster_watchlist(lines, summary)
format_c._append_baseline_deltas(lines, summary)
format_c._append_pod_issue_summary(lines, summary)
format_c._append_workloads_by_namespace(lines, summary)
format_c._append_lexicon(lines, summary)
format_c._append_cross_stats(lines, summary)
assert any(line.startswith("nodes:") for line in lines)
assert any(line.startswith("longhorn:") for line in lines)
assert any(line.startswith("signals:") for line in lines)
core_b_summary = core_b._build_hottest(snapshot["metrics"])
assert core_b_summary["hottest"]["cpu"]["node"] == "titan-01"
def test_matrix_bot_sync_and_heartbeat() -> None:
"""Drive the Matrix bot heartbeat and sync handlers with a fake client."""
settings = replace(build_test_settings(), thinking_interval_sec=0.001)
bot_cfg = MatrixBotConfig("bot", "pw", ("atlas",), "quick")
class FakeClient:
def __init__(self) -> None:
self.sent: list[str] = []
async def login(self) -> str:
return "tok"
async def resolve_room(self, token: str) -> str:
return "!room"
async def join_room(self, token: str, room_id: str) -> None:
return None
async def send_message(self, token: str, room_id: str, text: str) -> None:
self.sent.append(text)
async def sync(self, token: str, since: str | None) -> dict[str, Any]:
return {
"next_batch": "n1",
"rooms": {
"join": {
"!room": {
"timeline": {
"events": [
{"type": "m.room.message", "sender": "user", "content": {"body": "atlas quick what is atlas?"}},
{"type": "m.room.message", "sender": "bot", "content": {"body": "ignored"}},
]
}
}
}
},
}
async def answer_handler(question: str, mode: str, history, conversation_id, observer):
if observer:
observer("stage", "working")
return AnswerResult(
reply="Atlas has 22 nodes",
scores=AnswerScores(confidence=1, relevance=2, satisfaction=3, hallucination_risk="low"),
meta={"mode": mode},
)
bot = MatrixBot(settings, bot_cfg, SimpleNamespace(answer=lambda *args, **kwargs: None), answer_handler)
bot._client = FakeClient()
asyncio.run(bot._answer_with_heartbeat("tok", "!room", "What is Atlas?", "quick"))
payload = {
"rooms": {
"join": {
"!room": {
"timeline": {
"events": [
{"type": "m.room.message", "sender": "user", "content": {"body": "atlas smart hello"}}
]
}
}
}
}
}
asyncio.run(bot._handle_sync("tok", payload))
assert bot._client.sent
def test_answerer_helper_coverage_smoke() -> None:
"""Exercise the split answerer helpers with representative inputs."""
settings = build_test_settings()
plan = answer_common._mode_plan(settings, "smart")
fast_plan = replace(plan, parallelism=2, score_retries=2, chunk_group=1, chunk_top=2, max_subquestions=2)
snapshot = _rich_snapshot()
summary = core_a.build_summary(snapshot)
summary_lines = answer_spine._summary_lines(snapshot)
rich_lines = [
"nodes_total: 2",
"nodes_ready: 1",
"cluster_name: atlas",
"pods_total: 3",
"cpu: 90",
"ram: 80",
"runbooks/fix.md",
]
class ScriptedLLM:
async def __call__(
self,
_system: str,
_prompt: str,
*,
context: str | None = None,
model: str | None = None,
tag: str = "",
) -> str:
responses = {
"chunk_score": '[{"id":"c1","score":1},{"id":"c2","score":2}]',
"chunk_select": '{"selected_index": 1}',
"metric_keys": '{"keys":["nodes_total","pods_total"]}',
"metric_keys_validate": '{"missing":["pods_total"]}',
"fact_types": '{"fact_types":["nodes_total","pods_total"]}',
"fact_types_select": '{"best": 1}',
"signals": '{"signals":["cpu","ram"]}',
"signals_select": '{"best": 1}',
"chunk_scan": '{"lines":["cpu: 90"]}',
"chunk_scan_select": '{"best": 1}',
"fact_prune": '{"lines":["cpu: 90"]}',
"fact_prune_select": '{"best": 1}',
"fact_select": '{"lines":["cpu: 90"]}',
"fact_select_best": '{"best": 1}',
"contradiction": '{"use_facts": false, "confidence": 99}',
"insight_guard": '{"ok": false}',
"insight_fix": "fixed insight",
}
return responses.get(tag, "{}")
scripted_llm = ScriptedLLM()
chunks = [
{"id": "c1", "text": "nodes_total: 2\npods_total: 3", "summary": "nodes"},
{"id": "c2", "text": "cpu: 90\nram: 80", "summary": "cpu"},
]
groups = answer_common._build_chunk_groups(chunks, 1)
scores = asyncio.run(answer_common._score_chunks(scripted_llm, chunks, "How many nodes?", ["nodes"], fast_plan))
serial_ctx = ScoreContext(question="How many nodes?", sub_questions=["nodes"], retries=2, parallelism=1, select_best=True, fast_model="fast")
serial_scores = asyncio.run(answer_common._score_groups_serial(scripted_llm, groups, serial_ctx))
parallel_ctx = ScoreContext(question="How many nodes?", sub_questions=["nodes"], retries=2, parallelism=2, select_best=True, fast_model="fast")
parallel_scores = asyncio.run(answer_common._score_groups_parallel(scripted_llm, groups, parallel_ctx))
best_run = asyncio.run(answer_common._select_best_score_run(scripted_llm, groups[0], [{"c1": 1.0}, {"c1": 2.0}], serial_ctx))
selected = answer_common._select_chunks(chunks, {"c1": 0.2, "c2": 0.9}, replace(fast_plan, chunk_top=2), ["cpu"], ["c2"])
assert scores and serial_scores and parallel_scores and best_run and selected
assert answer_common._strip_followup_meta("The draft is correct. Atlas is healthy.") == "Atlas is healthy."
assert answer_common._llm_call_limit(settings, "smart") == settings.smart_llm_calls_max
assert answer_common._mode_time_budget(settings, "quick") == settings.quick_time_budget_sec
assert answer_common._select_subquestions([], "fallback", 2) == ["fallback"]
assert answer_common._chunk_lines(["a", "b", "c"], 2)
assert answer_common._raw_snapshot_chunks(snapshot)
assert answer_common._format_runbooks(["runbooks/fix.md"])
assert answer_common._keyword_hits([{"text": "cpu usage"}], {"text": "cpu usage"}, ["cpu"])
assert answer_factsheet._factsheet_kb_chars("quick", 10)
assert answer_factsheet._factsheet_line_limit("smart") >= 1
assert answer_factsheet._factsheet_instruction("quick")
assert answer_factsheet._factsheet_model("genius", fast_plan) == fast_plan.model
assert answer_factsheet._is_plain_math_question("2+2")
assert answer_factsheet._quick_fact_sheet_lines("How many nodes?", rich_lines, ["kb"], limit=4)
assert answer_factsheet._quick_fact_sheet_text(["nodes_total: 2"])
assert answer_factsheet._quick_fact_sheet_heuristic_answer("How many ready nodes?", ["nodes_total:2,ready:1,not_ready:0"])
assert answer_factsheet._json_excerpt(summary)
assert answer_post._strip_unknown_entities("node titan-99 is hot. Atlas is healthy.", ["titan-99"], []) == "Atlas is healthy."
assert answer_post._needs_evidence_guard("node titan-99 is hot.", ["node titan-01"]) is True
contradiction = asyncio.run(
answer_post._contradiction_decision(
ContradictionContext(scripted_llm, "why", "draft", ["fact"], fast_plan),
attempts=2,
)
)
assert contradiction["confidence"] == 99
assert answer_post._format_direct_metric_line("nodes_total: 2")
assert answer_post._global_facts(["nodes_total: 2", "other: 1"])
assert answer_post._has_keyword_overlap(["cpu usage"], ["cpu"])
assert answer_post._merge_tokens(["a"], ["b"], ["c"]) == ["a", "b", "c"]
assert answer_post._extract_question_tokens("How many nodes?")
assert answer_post._expand_tokens(["nodes_total"])
assert answer_post._ensure_token_coverage(["nodes_total: 2"], ["pods"], ["pods_total: 3"], max_add=1)
assert answer_post._best_keyword_line(["cpu: 90"], ["cpu"]) == "cpu: 90"
assert answer_post._line_starting_with(["cpu: 90"], "cpu")
assert answer_post._non_rpi_nodes({"hardware_by_node": {"titan-01": "rpi5", "titan-02": "amd64"}}) == {"amd64": ["titan-02"]}
assert answer_post._format_hardware_groups({"amd64": ["titan-02"]}, "Nodes")
assert answer_post._lexicon_context({"lexicon": {"terms": [{"term": "atlas", "meaning": "cluster"}], "aliases": {"bot": "atlas"}}})
assert answer_post._parse_json_block("{\"ok\": true}", fallback={}) == {"ok": True}
assert answer_post._parse_json_list("[{\"ok\": true}]") == [{"ok": True}]
assert answer_post._scores_from_json({"confidence": "1", "relevance": 2, "satisfaction": 3, "hallucination_risk": "low"}).confidence == 1
assert answer_post._coerce_int("4", 1) == 4
assert answer_post._default_scores().hallucination_risk == "medium"
assert answer_post._style_hint({"answer_style": "insightful"}) == "insightful"
assert answer_post._needs_evidence_fix("we don't know", {"needs_snapshot": True}) is True
assert answer_post._should_use_insight_guard({"answer_style": "insightful"})
insight_inputs = InsightGuardInput(
question="why",
reply="Insightful reply",
classify={"answer_style": "insightful", "question_type": "open_ended"},
context="",
plan=fast_plan,
call_llm=scripted_llm,
facts=["fact"],
)
assert asyncio.run(answer_post._apply_insight_guard(insight_inputs))
assert answer_post_ext._reply_matches_metric_facts("nodes_total: 2", ["nodes_total: 2"])
assert answer_post_ext._needs_dedup("one. one. one.")
answer_post_ext._needs_focus_fix("how many nodes", "For more details. Additional context.", {"question_type": "metric"})
assert answer_post_ext._extract_keywords("How many nodes?", "How many nodes?", ["pods"], ["nodes"])
assert answer_post_ext._allowed_nodes(summary)
assert answer_post_ext._allowed_namespaces(summary)
assert answer_post_ext._find_unknown_nodes("node titan-99", ["titan-01"]) == ["titan-99"]
assert answer_post_ext._find_unknown_namespaces("namespace rogue", ["synapse"]) == ["rogue"]
assert answer_post_ext._needs_runbook_fix("see runbooks/bad.md", ["runbooks/fix.md"])
assert answer_post_ext._needs_runbook_reference("where is the runbook", ["runbooks/fix.md"], "")
assert answer_post_ext._best_runbook_match("runbooks/fx.md", ["runbooks/fix.md"])
assert answer_post_ext._resolve_path({"a": [{"b": 3}]}, "a[0].b") == 3
assert answer_post_ext._snapshot_id({"snapshot_id": "snap-1"}) == "snap-1"
assert answer_post_ext._claims_to_payload([ClaimItem(id="c1", claim="atlas", evidence=[EvidenceItem(path="a.b", reason="r", value_at_claim=1)])])
assert answer_post_ext._state_from_payload({"updated_at": 1.0, "claims": [{"id": "c1", "claim": "atlas", "evidence": [{"path": "a.b", "reason": "r"}]}]})
assert answer_retrieval._metric_ctx_values({"summary_lines": summary_lines, "question": "cpu", "sub_questions": ["pods"], "keywords": ["cpu"], "keyword_tokens": ["cpu"]})
assert answer_retrieval._extract_metric_keys(rich_lines)
assert answer_retrieval._token_variants({"nodes"})
assert answer_retrieval._parse_key_list("{\"keys\":[\"nodes_total\"]}", ["nodes_total"], 1) == ["nodes_total"]
assert answer_retrieval._chunk_ids_for_keys([{"id": "c1", "text": "nodes_total: 2"}], ["nodes_total"]) == ["c1"]
assert answer_retrieval._filter_metric_keys(["nodes_total"], {"nodes"})
assert answer_retrieval._metric_key_overlap(["nodes_total"], {"nodes"})
assert answer_retrieval._lines_for_metric_keys(rich_lines, ["nodes_total"])
assert answer_retrieval._merge_metric_keys(["nodes_total"], ["pods_total"], 3)
assert answer_retrieval._merge_fact_lines(["a"], ["b"])
assert answer_retrieval._expand_hottest_line("hottest: cpu=titan-01 (90)")
answer_retrieval._has_token("hottest_cpu: titan-01=90", "cpu")
answer_retrieval._hotspot_evidence(snapshot)
assert asyncio.run(answer_retrieval._select_metric_chunks(scripted_llm, {"summary_lines": summary_lines, "question": "cpu", "sub_questions": ["pods"], "keywords": ["cpu"], "keyword_tokens": ["cpu"]}, chunks, fast_plan))
asyncio.run(answer_retrieval._validate_metric_keys(scripted_llm, {"question": "cpu", "sub_questions": ["pods"], "selected": ["nodes_total"]}, ["nodes_total"], fast_plan))
assert asyncio.run(answer_retrieval._gather_limited([asyncio.sleep(0, result=1), asyncio.sleep(0, result=2)], 1))
assert answer_retrieval_ext._metric_key_tokens(summary_lines)
asyncio.run(answer_retrieval_ext._select_best_candidate(scripted_llm, "question", ["a", "b"], fast_plan, "chunk_select"))
assert answer_retrieval_ext._dedupe_lines(["x", "x", "y"])
assert answer_retrieval_ext._collect_fact_candidates(chunks, 4)
assert asyncio.run(answer_retrieval_ext._select_best_list(scripted_llm, "question", [["a"], ["b"]], fast_plan, "chunk_select"))
assert asyncio.run(answer_retrieval_ext._extract_fact_types(scripted_llm, "question", ["cpu"], fast_plan))
assert asyncio.run(answer_retrieval_ext._derive_signals(scripted_llm, "question", ["cpu"], fast_plan))
assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(scripted_llm, "question", ["cpu"], ["cpu: 90"], fast_plan))
assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(scripted_llm, "question", ["cpu: 90"], fast_plan, 1))
assert asyncio.run(answer_retrieval_ext._select_fact_lines(scripted_llm, "question", ["cpu: 90"], fast_plan, 1))
assert answer_spine._join_context(["a", "", "b"]) == "a\nb"
assert answer_spine._format_history([{"q": "q", "a": "a"}])
assert answer_spine._summary_lines(snapshot)
assert answer_spine._line_starting_with(rich_lines, "nodes_total")
assert answer_spine._spine_lines(rich_lines)
spine_map: dict[str, str] = {}
answer_spine._spine_nodes(rich_lines, spine_map)
answer_spine._spine_hardware(rich_lines, spine_map)
answer_spine._spine_hottest(rich_lines, spine_map)
answer_spine._spine_postgres(rich_lines, spine_map)
answer_spine._spine_namespaces(rich_lines, spine_map)
answer_spine._spine_pressure(rich_lines, spine_map)
assert answer_spine._parse_group_line("hardware: rpi5=(titan-01)")
assert answer_spine._parse_hottest("hottest: cpu=titan-01 (90)", "cpu")
assert answer_spine._spine_answer(route_intent("How many nodes?"), "nodes_total: 2")
assert answer_spine._spine_nodes_answer("nodes_total: 2")
assert answer_spine._spine_non_rpi_answer("amd64 (titan-02)")
assert answer_spine._spine_hardware_answer("hardware: amd64=1")
assert answer_spine._spine_hottest_answer("hottest_cpu", "hottest: cpu=titan-01 (90)")
assert answer_spine._spine_postgres_answer("postgres_connections: used=5")
assert answer_spine._spine_namespace_answer("namespace_most_pods: synapse=5")
assert answer_spine._spine_pressure_answer("pressure_nodes: titan-02")
assert answer_spine._spine_from_summary(summary)
assert answer_spine._spine_from_counts(summary)
assert answer_spine._spine_from_hardware(summary)
assert answer_spine._spine_from_hottest(summary)
assert answer_spine._spine_from_postgres(summary)
assert answer_spine._spine_from_namespace_pods(summary)
assert answer_spine._spine_from_pressure(summary)
assert answer_spine._spine_fallback(route_intent("How many nodes?"), rich_lines)
def test_snapshot_builder_coverage_smoke() -> None:
"""Exercise the split snapshot render helpers end to end."""
snapshot = _rich_snapshot()
summary = core_a.build_summary(snapshot)
text = summary_text(snapshot)
assert summary and text
lines: list[str] = []
format_a._format_float(1.5)
format_a._format_rate_bytes(2048)
format_a._format_bytes(2048)
format_a._format_kv_map({"a": 1, "b": 2})
format_a._format_names(["b", "a"])
format_a._append_nodes(lines, summary)
format_a._append_hardware(lines, summary)
format_a._append_hardware_groups(lines, summary)
format_a._append_node_ages(lines, summary)
format_a._append_node_taints(lines, summary)
format_a._append_node_facts(lines, summary)
format_a._append_pressure(lines, summary)
format_a._append_pods(lines, summary)
format_a._append_capacity(lines, summary)
format_a._append_namespace_pods(lines, summary)
format_a._append_namespace_nodes(lines, summary)
format_a._append_node_pods(lines, summary)
format_a._append_pod_issues(lines, summary)
format_a._format_pod_issue_counts(summary["pod_issues"])
format_a._format_pod_issue_top(summary["pod_issues"])
format_a._format_pod_pending_oldest(summary["pod_issues"])
format_a._format_pod_waiting_reasons(summary["pod_issues"])
format_a._format_pod_pending_over_15m(summary["pod_issues"])
format_a._append_workload_health(lines, summary)
format_a._append_node_usage_stats(lines, summary)
format_a._append_events(lines, summary)
format_a._append_pvc_usage(lines, summary)
format_a._append_root_disk_headroom(lines, summary)
format_b._append_longhorn(lines, summary)
format_b._append_namespace_usage(lines, summary)
format_b._append_namespace_requests(lines, summary)
format_b._append_namespace_io_net(lines, summary)
format_b._append_pod_usage(lines, summary)
format_b._append_restarts(lines, summary)
format_b._append_job_failures(lines, summary)
format_b._append_jobs(lines, summary)
format_b._format_jobs_totals(summary["jobs"])
format_b._format_jobs_failing(summary["jobs"])
format_b._format_jobs_active_oldest(summary["jobs"])
format_b._append_postgres(lines, summary)
format_b._append_hottest(lines, summary)
format_b._append_workloads(lines, summary)
format_b._append_topology(lines, summary)
format_b._append_flux(lines, summary)
format_c._append_signals(lines, summary)
format_c._append_profiles(lines, summary)
format_c._append_units_windows(lines, summary)
format_c._append_node_load_summary(lines, summary)
format_c._append_hardware_usage(lines, summary)
format_c._append_cluster_watchlist(lines, summary)
format_c._append_baseline_deltas(lines, summary)
format_c._append_pod_issue_summary(lines, summary)
format_c._reason_line(summary["pod_issue_summary"]["waiting_reasons_top"], "waiting")
format_c._append_namespace_issue_lines(lines, summary["pod_issue_summary"]["namespace_issue_top"])
format_c._build_cluster_watchlist(summary)
format_c._capacity_ratio_parts(summary["namespace_capacity"], "cpu", "cpu", "mem")
format_c._capacity_headroom_parts(summary["namespace_capacity"])
format_c._append_namespace_capacity_summary(lines, summary)
format_c._append_workloads_by_namespace(lines, summary)
format_c._append_lexicon(lines, summary)
format_c._append_cross_stats(lines, summary)
assert lines
def test_answerer_helper_edge_branches(monkeypatch: pytest.MonkeyPatch) -> None:
"""Cover alternate branches in the split answerer helper modules."""
settings = replace(build_test_settings(), debug_pipeline=True)
logged: list[tuple[str, dict[str, Any]]] = []
monkeypatch.setattr(answer_common, "log", SimpleNamespace(info=lambda message, extra: logged.append((message, extra))))
meta = answer_common._build_meta("custom", 1, 2, True, False, 3.0, {"kind": "x"}, {"cmd": "echo"}, 10.0)
assert meta["llm_limit_hit"] is True
answer_common._debug_pipeline_log(settings, "edge", {"ok": True})
assert logged and logged[0][0] == "atlasbot_debug"
assert answer_common._mode_plan(settings, "genius").drafts == 2
assert answer_common._mode_plan(settings, "custom").use_tool is False
assert answer_common._select_subquestions([None, {"question": "", "priority": "x"}], "fallback", 2) == ["fallback"]
assert answer_common._chunk_lines([], 3) == []
assert answer_common._raw_snapshot_chunks({"ok": 1, "bad": {1, 2}})
assert answer_common._build_chunk_groups([{"id": "c1", "summary": "a"}], 2) == [[{"id": "c1", "summary": "a"}]]
async def score_call(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
if tag == "chunk_score":
return '[{"id":"c1","score":"bad"},{"id":"","score":5},"bad"]'
if tag == "chunk_select":
return '{"selected_index": 99}'
raise AssertionError(tag)
groups = [[{"id": "c1", "summary": "a"}]]
ctx = ScoreContext(question="q", sub_questions=[], retries=1, parallelism=1, select_best=True, fast_model="fast")
assert asyncio.run(answer_common._score_chunk_group(score_call, groups[0], "q", [])) == {"c1": 0.0}
assert asyncio.run(answer_common._score_chunk_group_run(score_call, 0, groups[0], "q", [])) == (0, {"c1": 0.0})
assert answer_common._merge_score_runs([]) == {}
assert asyncio.run(answer_common._select_best_score_run(score_call, groups[0], [{"c1": 1.0}, {"c1": 2.0}], ctx)) == {"c1": 1.0}
assert answer_common._keyword_hits([{"text": "cpu"}, {"text": "ram"}], {"text": "cpu"}, None) == []
assert answer_common._select_chunks([], {}, answer_common._mode_plan(settings, "custom")) == []
selected = [{"id": "c0", "text": "a"}]
assert answer_common._append_must_chunks([{"id": "c0"}, {"id": "c1"}], selected, ["c1"], 3) is False
assert answer_common._append_keyword_chunks([{"id": "c0", "text": "cpu"}], selected, ["cpu"], 2) is False
answer_common._append_ranked_chunks([{"id": "c1"}], selected, 2)
assert answer_common._format_runbooks([]) == ""
async def retrieval_call(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
responses = {
"fact_types": '{"fact_types":["cpu", 5, "cpu"]}',
"fact_types_select": '{"best": 99}',
"signals": '{"signals":["cpu", "", "ram"]}',
"signals_select": '{"best": 99}',
"chunk_scan": '{"lines":["cpu: 1", "missing: 2"]}',
"chunk_scan_select": '{"best": 99}',
"fact_prune": '{"lines":["cpu: 1", "ram: 2"]}',
"fact_prune_select": '{"best": 99}',
"fact_select": '{"lines":["cpu: 1"]}',
"fact_select_best": '{"best": 99}',
}
return responses[tag]
fast_plan = replace(answer_common._mode_plan(settings, "smart"), metric_retries=2)
assert answer_retrieval_ext._parse_json_block("plain", fallback={"ok": True}) == {"ok": True}
assert "nodes" in answer_retrieval_ext._metric_key_tokens(["nodes_total: 2"])
assert answer_retrieval_ext._metric_key_tokens([123, "invalid", ": empty"]) == set()
assert asyncio.run(answer_retrieval_ext._select_best_candidate(retrieval_call, "q", ["one"], fast_plan, "fact_types_select")) == 0
assert answer_retrieval_ext._dedupe_lines(["lexicon_term: a", "units: x", "cpu", "cpu"], limit=1) == ["cpu"]
assert answer_retrieval_ext._collect_fact_candidates([{"text": "cpu: 1\nram: 2"}, {"bad": True}], 3) == ["cpu: 1", "ram: 2"]
assert asyncio.run(answer_retrieval_ext._select_best_list(retrieval_call, "q", [[], ["cpu"]], fast_plan, "fact_types_select")) == ["cpu"]
assert asyncio.run(answer_retrieval_ext._extract_fact_types(retrieval_call, "q", [], fast_plan)) == ["cpu", "5"]
async def retrieval_bad(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
del _system, _prompt, model, tag
return '{"signals":"bad","fact_types":"bad","lines":"bad"}'
assert asyncio.run(answer_retrieval_ext._extract_fact_types(retrieval_bad, "q", [], fast_plan)) == []
assert asyncio.run(answer_retrieval_ext._derive_signals(retrieval_call, "q", [], fast_plan)) == []
assert asyncio.run(answer_retrieval_ext._derive_signals(retrieval_bad, "q", ["cpu"], fast_plan)) == []
assert asyncio.run(answer_retrieval_ext._derive_signals(retrieval_call, "q", ["cpu"], fast_plan)) == ["cpu", "ram"]
assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(retrieval_call, "q", [], ["cpu: 1"], fast_plan)) == []
assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(retrieval_bad, "q", ["cpu"], ["cpu: 1"], fast_plan)) == []
assert asyncio.run(answer_retrieval_ext._scan_chunk_for_signals(retrieval_call, "q", ["cpu"], ["cpu: 1", "ram: 2"], fast_plan)) == ["cpu: 1"]
assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(retrieval_call, "q", [], fast_plan, 2)) == []
assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(retrieval_bad, "q", ["cpu: 1"], fast_plan, 2)) == []
assert asyncio.run(answer_retrieval_ext._prune_metric_candidates(retrieval_call, "q", ["cpu: 1", "ram: 2"], fast_plan, 2)) == ["cpu: 1", "ram: 2"]
assert asyncio.run(answer_retrieval_ext._select_fact_lines(retrieval_call, "q", [], fast_plan, 1)) == []
assert asyncio.run(answer_retrieval_ext._select_fact_lines(retrieval_bad, "q", ["cpu: 1"], fast_plan, 1)) == []
assert asyncio.run(answer_retrieval_ext._select_fact_lines(retrieval_call, "q", ["cpu: 1", "ram: 2"], fast_plan, 1)) == ["cpu: 1"]
async def post_call(_system: str, _prompt: str, *, model: str | None = None, tag: str = "", **_: Any) -> str:
if tag == "contradiction":
return '{"use_facts": false, "confidence": 70}'
if tag == "insight_guard":
return '{"ok": true}'
if tag == "insight_fix":
return "fixed"
raise AssertionError(tag)
assert answer_post._strip_unknown_entities("", ["titan-99"], []) == ""
assert answer_post._strip_unknown_entities("Atlas is healthy.", [], []) == "Atlas is healthy."
assert answer_post._needs_evidence_guard("", ["fact"]) is False
assert answer_post._needs_evidence_guard("pressure is high", ["pressure"]) is False
contradiction = asyncio.run(
answer_post._contradiction_decision(
ContradictionContext(post_call, "q", "draft", ["fact"], fast_plan),
attempts=2,
)
)
assert contradiction["confidence"] == 70
assert answer_post._format_direct_metric_line("broken line") == "broken line"
assert answer_post._global_facts([]) == []
assert answer_post._has_keyword_overlap([], ["cpu"]) is False
assert answer_post._extract_question_tokens("") == []
assert answer_post._expand_tokens([]) == []
assert answer_post._ensure_token_coverage([], ["cpu"], ["cpu: 1"]) == []
assert answer_post._best_keyword_line(["ram: 1"], ["cpu"]) is None
assert answer_post._line_starting_with([], "cpu") is None
assert answer_post._non_rpi_nodes({"hardware_by_node": None}) == {}
assert answer_post._format_hardware_groups({}, "Nodes") == ""
assert answer_post._lexicon_context({"lexicon": []}) == ""
assert answer_post._parse_json_list("nope") == []
assert answer_post._scores_from_json({}).confidence == 60
assert answer_post._coerce_int("bad", 5) == 5
assert answer_post._style_hint({"question_type": "planning"}) == "insightful"
assert answer_post._needs_evidence_fix("", {"needs_snapshot": True}) is False
assert answer_post._should_use_insight_guard({"question_type": "planning"}) is True
insight = InsightGuardInput(
question="q",
reply="reply",
classify={"question_type": "planning"},
context="ctx",
plan=fast_plan,
call_llm=post_call,
facts=[],
)
assert asyncio.run(answer_post._apply_insight_guard(insight)) == "reply"
assert answer_post_ext._reply_matches_metric_facts("no numbers", ["cpu: 1"]) is False
assert answer_post_ext._needs_dedup("short.") is False
assert answer_post_ext._needs_focus_fix("why", "direct", {"question_type": "open_ended"}) is False
assert answer_post_ext._extract_keywords("Q", "Q", [], []) == []
assert answer_post_ext._allowed_nodes({}) == []
assert answer_post_ext._allowed_namespaces({}) == []
assert answer_post_ext._find_unknown_nodes("titan-01", ["titan-01"]) == []
assert answer_post_ext._find_unknown_namespaces("namespace synapse", ["synapse"]) == []
assert answer_post_ext._needs_runbook_fix("runbooks/fix.md", ["runbooks/fix.md"]) is False
assert answer_post_ext._needs_runbook_reference("status", ["runbooks/fix.md"], "ok") is False
assert answer_post_ext._best_runbook_match("x", []) is None
assert answer_post_ext._resolve_path({"a": []}, "a[1].b") is None
assert answer_post_ext._snapshot_id({"snapshot": {"id": "x"}}) is None
assert answer_post_ext._claims_to_payload([]) == []
assert answer_post_ext._state_from_payload({}) is None
assert answer_factsheet._factsheet_instruction("smart")
assert answer_factsheet._factsheet_model("quick", fast_plan) == fast_plan.fast_model
assert answer_factsheet._is_plain_math_question("2 + 2") is True
assert answer_factsheet._quick_fact_sheet_lines("where is runbook", ["runbooks/fix.md", "cpu: 1"], [], limit=1)
assert answer_factsheet._quick_fact_sheet_text([]) == "Fact Sheet:\n- No snapshot facts available."
assert "prefer rpi5 workers first" in answer_factsheet._quick_fact_sheet_heuristic_answer(
"what is the node placement last resort",
["runbooks/fix.md"],
)
assert "1 ready nodes out of 2 total" in answer_factsheet._quick_fact_sheet_heuristic_answer(
"how many ready nodes are there",
["nodes_total:2,ready:1,not_ready:1"],
)
assert answer_spine._join_context([]) == ""
assert answer_spine._format_history([]) == ""
assert answer_spine._line_starting_with([], "cpu") is None
assert answer_spine._spine_lines([]) == {}
extra_spine: dict[str, str] = {}
answer_spine._spine_nodes(["nodes: total=2 ready=1 not_ready=1"], extra_spine)
answer_spine._spine_hardware(["hardware: amd64=1 (titan-02)"], extra_spine)
answer_spine._spine_hottest(["hottest: cpu=titan-01 [rpi5] (90%)"], extra_spine)
answer_spine._spine_postgres(["postgres_connections_total: used=5, max=10"], extra_spine)
answer_spine._spine_namespaces(["namespace_pods_top: synapse=5"], extra_spine)
answer_spine._spine_pressure(["pressure: nodes=0"], extra_spine)
assert answer_spine._parse_group_line("invalid") == {}
assert answer_spine._parse_hottest("broken", "cpu") is None
assert answer_spine._spine_nodes_answer("nodes: total=2 ready=1 not_ready=1")
assert answer_spine._spine_pressure_answer("pressure: nodes=0")
def test_runtime_and_snapshot_edge_branches(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
"""Cover runtime wrappers and sparse snapshot builder branches."""
sparse_summary = {
"node_pods": [
{"node": "titan-01", "pods_total": "7", "namespaces_top": [("synapse", 3), ("vault", 2)]},
{"node": "titan-02", "pods_total": "x"},
],
"pod_issues": {
"counts": {"Failed": 1},
"items": [{"namespace": "synapse", "pod": "matrix", "phase": "Pending", "restarts": 1}],
"pending_oldest": [{"namespace": "synapse", "pod": "matrix", "age_hours": 2, "reason": "Waiting"}],
"waiting_reasons": {"ImagePullBackOff": 2},
"pending_over_15m": "2",
},
"workloads_health": {
"deployments": {"not_ready": 1},
"statefulsets": {"not_ready": 0},
"daemonsets": {"not_ready": 1},
},
"topology": {
"nodes": [{"node": "titan-01", "workloads_top": [("matrix", 3)]}],
"workloads": [{"namespace": "synapse", "workload": "matrix", "nodes_top": [("titan-01", 3)]}],
},
"flux": {
"not_ready": 2,
"items": [{"namespace": "flux-system", "name": "kustomization", "reason": "waiting", "suspended": True}],
},
"namespace_capacity_summary": {
"cpu_ratio_top": [{"namespace": "synapse", "cpu_usage_ratio": 0.8, "cpu_usage": 4, "cpu_requests": 5}],
"mem_ratio_top": [{"namespace": "synapse", "mem_usage_ratio": 0.7, "mem_usage": 7, "mem_requests": 10}],
"cpu_headroom_low": [{"namespace": "synapse", "headroom": 0.2}],
"mem_headroom_low": [{"namespace": "synapse", "headroom": 0.3}],
"cpu_overcommitted": 1,
"mem_overcommitted": 1,
"cpu_overcommitted_names": ["synapse"],
"mem_overcommitted_names": ["vault"],
},
"workloads": [{"namespace": "synapse", "workload": "matrix", "pods_total": 3, "primary_node": "titan-01"}],
"lexicon": {"terms": [{"term": "atlas", "meaning": "cluster"}], "aliases": {"bot": "atlas"}},
"cross_stats": {
"node_metric_top": [{"metric": "cpu", "node": "titan-01", "value": 90, "cpu": 90, "ram": 80, "net": 1.0, "io": 2.0, "pods_total": 3}],
"namespace_metric_top": [{"metric": "cpu", "namespace": "synapse", "value": 40, "cpu_ratio": 0.8, "mem_ratio": 0.7, "pods_total": 3}],
"pvc_top": [{"namespace": "synapse", "pvc": "data", "used_percent": 95}],
},
"events": {"warnings_total": 2},
}
lines: list[str] = []
format_a._append_node_pods(lines, sparse_summary)
format_a._append_pod_issues(lines, sparse_summary)
format_a._append_workload_health(lines, sparse_summary)
format_b._append_topology(lines, sparse_summary)
format_b._append_flux(lines, sparse_summary)
format_c._append_namespace_capacity_summary(lines, sparse_summary)
format_c._append_workloads_by_namespace(lines, sparse_summary)
format_c._append_lexicon(lines, sparse_summary)
format_c._append_cross_stats(lines, sparse_summary)
assert any("node_pods_max" in line for line in lines)
assert any("flux_not_ready_items" in line for line in lines)
assert any("cross_pvc_usage" in line for line in lines)
assert core_a._build_node_ages([{"name": "titan-01", "age_hours": 1}, "bad"])
assert core_a._build_node_facts([{"name": "titan-01", "is_worker": True, "roles": ["worker"], "arch": "arm64"}])
assert core_a._build_node_taints([{"name": "titan-01", "taints": [{"key": "dedicated", "effect": "NoSchedule"}]}])
assert core_a._build_root_disk_headroom({"node_usage": {"disk": [{"node": "titan-01", "value": 80}]}})
assert core_a._build_longhorn({"longhorn": {"total": 1}})
assert core_a._build_node_load({"node_load": [{"node": "titan-01"}]})
assert core_a._build_pods({"pods_running": 1})
assert core_a._build_capacity({"capacity_cpu": 4})
assert core_a._build_namespace_pods({"namespace_pods": [{"namespace": "synapse"}]})
assert core_a._build_namespace_nodes({"namespace_nodes": [{"namespace": "synapse"}]})
assert core_a._build_node_pods({"node_pods": [{"node": "titan-01"}]})
assert core_a._build_node_pods_top({"node_pods_top": [{"node": "titan-01"}]})
assert core_a._build_pod_issues({"pod_issues": {"counts": {}}})
assert core_a._build_events({"events": {"warnings_total": 1}})
assert core_a._build_event_summary({"events": {"warnings_top_reason": {"a": 1}, "warnings_latest": [{"reason": "x"}]}})
assert core_a._build_postgres({"postgres_connections": {"used": 1}})
settings = replace(build_test_settings(), queue_enabled=False)
store = ClaimStore(":memory:", 60)
assert store.get("") is None
store.set("", {"claims": []})
assert _safe_json(None, {}) == {}
kb_dir = tmp_path / "kb"
(kb_dir / "catalog").mkdir(parents=True)
(kb_dir / "catalog" / "runbooks.json").write_text(json.dumps([{"path": "runbooks/fix.md"}, {"title": "Missing path"}]), encoding="utf-8")
kb = KnowledgeBase(str(kb_dir))
assert kb.runbook_titles() == ""
assert kb.runbook_paths(limit=1) == ["runbooks/fix.md"]
from atlasbot.snapshot.builder import SnapshotProvider
provider = SnapshotProvider(replace(settings, ariadne_state_url="", snapshot_ttl_sec=1))
provider._cache = {"cached": True}
provider._cache_ts = 1.0
monkeypatch.setattr("atlasbot.snapshot.builder.time.monotonic", lambda: 100.0)
assert provider.get() == {"cached": True}
from atlasbot import main as main_mod
captured: dict[str, Any] = {}
class QueueProbe:
def __init__(self, _settings: Settings, handler):
captured["handler"] = handler
async def start(self) -> None:
return None
async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
return {"reply": payload.get("question", ""), "scores": {}}
class ApiProbe:
def __init__(self, _settings: Settings, answer_handler):
captured["answer_handler"] = answer_handler
self.app = SimpleNamespace()
class ServerProbe:
def __init__(self, config: Any):
self.config = config
async def serve(self) -> None:
return None
class EngineProbe:
async def answer(
self,
question: str,
*,
mode: str,
history: list[dict[str, str]] | None = None,
observer: Any = None,
conversation_id: str | None = None,
snapshot_pin: bool | None = None,
) -> AnswerResult:
return AnswerResult(
reply=f"{question}:{mode}:{bool(history)}:{conversation_id}:{snapshot_pin}:{observer is not None}",
scores=AnswerScores(confidence=91, relevance=92, satisfaction=93, hallucination_risk="low"),
meta={},
)
monkeypatch.setattr(main_mod, "load_settings", lambda: replace(settings, matrix_bots=()))
monkeypatch.setattr(main_mod, "configure_logging", lambda _level: None)
monkeypatch.setattr(main_mod, "_build_engine", lambda _settings: EngineProbe())
monkeypatch.setattr(main_mod, "QueueManager", QueueProbe)
monkeypatch.setattr(main_mod, "Api", ApiProbe)
monkeypatch.setattr(main_mod.uvicorn, "Server", ServerProbe)
asyncio.run(main_mod.main())
handled = asyncio.run(captured["handler"]({"question": "hello", "mode": "smart", "history": "bad", "conversation_id": 7, "snapshot_pin": "bad"}))
assert handled["reply"]
answered = asyncio.run(captured["answer_handler"]("hello", "quick", None, None, None, None))
assert answered.reply
assert result_scores({"scores": {"confidence": "bad"}}).confidence == 60
qm = QueueManager(replace(settings, queue_enabled=True), lambda payload: asyncio.sleep(0, result=payload))
with pytest.raises(RuntimeError, match="queue not initialized"):
asyncio.run(qm.submit({"question": "x"}))
assert _mode_timeout_sec(settings, "genius") == settings.genius_time_budget_sec
assert _extract_mode("atlas hello", ("atlas",), "quick") == ("quick", "hello")