atlasbot: include KB chunks in retrieval

2026-02-04 15:39:15 -03:00 · 2026-02-04 15:39:15 -03:00 · 23e99712a5
commit 23e99712a5
parent 4daf9884fe
2 changed files with 64 additions and 0 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -335,6 +335,12 @@ class AnswerEngine:
                if observer:
                    observer("retrieve", "scoring chunks")
                chunks = _chunk_lines(summary_lines, plan.chunk_lines)
                kb_lines = self._kb.chunk_lines() if self._kb else []
                if kb_lines:
                    kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
                    for idx, chunk in enumerate(kb_chunks):
                        chunk["id"] = f"k{idx}"
                    chunks.extend(kb_chunks)
                metric_keys: list[str] = []
                must_chunk_ids: list[str] = []
                metric_task = None
@ -2092,6 +2098,21 @@ def _format_direct_metric_line(line: str) -> str:
        key = key.strip().replace("_", " ")
        value = value.strip()
        if value:
            if key == "nodes":
                parts = [p.strip() for p in value.split(",") if p.strip()]
                total = None
                rest: list[str] = []
                for part in parts:
                    if part.startswith("total="):
                        total = part.split("=", 1)[1]
                    else:
                        rest.append(part.replace("_", " "))
                if total:
                    if rest:
                        return f"Atlas has {total} total nodes ({'; '.join(rest)})."
                    return f"Atlas has {total} total nodes."
            if key in {"nodes total", "nodes_total"}:
                return f"Atlas has {value} total nodes."
            return f"{key} is {value}."
    return line
--- a/atlasbot/knowledge/loader.py
+++ b/atlasbot/knowledge/loader.py
@ -71,3 +71,46 @@ class KnowledgeBase:
            if path:
                paths.append(str(path))
        return paths[:limit]
    def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
        self.load()
        lines: list[str] = []
        if not self._base:
            return lines
        # Prefer curated catalog JSON if present.
        if self._atlas:
            try:
                atlas_json = json.dumps(self._atlas, indent=2)
                lines.append("KB: atlas.json")
                lines.extend(atlas_json.splitlines())
            except Exception:
                pass
        if self._runbooks:
            lines.append("KB: runbooks.json")
            for entry in self._runbooks:
                if not isinstance(entry, dict):
                    continue
                title = entry.get("title")
                path = entry.get("path")
                if title and path:
                    lines.append(f"- {title} ({path})")
        # Include markdown/text sources as additional chunks.
        if len(lines) >= max_chars:
            return lines
        files = sorted(self._base.rglob("*.md")) + sorted(self._base.rglob("*.txt"))
        for path in files:
            if len(lines) >= max_chars:
                break
            if len(lines) > max_files * 50:
                break
            try:
                text = path.read_text(encoding="utf-8", errors="ignore")
            except Exception:
                continue
            if not text:
                continue
            lines.append(f"KB File: {path.relative_to(self._base)}")
            lines.extend(text.splitlines())
            if sum(len(line) for line in lines) >= max_chars:
                break
        return lines