atlasbot: include KB chunks in retrieval
This commit is contained in:
parent
4daf9884fe
commit
23e99712a5
@ -335,6 +335,12 @@ class AnswerEngine:
|
|||||||
if observer:
|
if observer:
|
||||||
observer("retrieve", "scoring chunks")
|
observer("retrieve", "scoring chunks")
|
||||||
chunks = _chunk_lines(summary_lines, plan.chunk_lines)
|
chunks = _chunk_lines(summary_lines, plan.chunk_lines)
|
||||||
|
kb_lines = self._kb.chunk_lines() if self._kb else []
|
||||||
|
if kb_lines:
|
||||||
|
kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
|
||||||
|
for idx, chunk in enumerate(kb_chunks):
|
||||||
|
chunk["id"] = f"k{idx}"
|
||||||
|
chunks.extend(kb_chunks)
|
||||||
metric_keys: list[str] = []
|
metric_keys: list[str] = []
|
||||||
must_chunk_ids: list[str] = []
|
must_chunk_ids: list[str] = []
|
||||||
metric_task = None
|
metric_task = None
|
||||||
@ -2092,6 +2098,21 @@ def _format_direct_metric_line(line: str) -> str:
|
|||||||
key = key.strip().replace("_", " ")
|
key = key.strip().replace("_", " ")
|
||||||
value = value.strip()
|
value = value.strip()
|
||||||
if value:
|
if value:
|
||||||
|
if key == "nodes":
|
||||||
|
parts = [p.strip() for p in value.split(",") if p.strip()]
|
||||||
|
total = None
|
||||||
|
rest: list[str] = []
|
||||||
|
for part in parts:
|
||||||
|
if part.startswith("total="):
|
||||||
|
total = part.split("=", 1)[1]
|
||||||
|
else:
|
||||||
|
rest.append(part.replace("_", " "))
|
||||||
|
if total:
|
||||||
|
if rest:
|
||||||
|
return f"Atlas has {total} total nodes ({'; '.join(rest)})."
|
||||||
|
return f"Atlas has {total} total nodes."
|
||||||
|
if key in {"nodes total", "nodes_total"}:
|
||||||
|
return f"Atlas has {value} total nodes."
|
||||||
return f"{key} is {value}."
|
return f"{key} is {value}."
|
||||||
return line
|
return line
|
||||||
|
|
||||||
|
|||||||
@ -71,3 +71,46 @@ class KnowledgeBase:
|
|||||||
if path:
|
if path:
|
||||||
paths.append(str(path))
|
paths.append(str(path))
|
||||||
return paths[:limit]
|
return paths[:limit]
|
||||||
|
|
||||||
|
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
|
||||||
|
self.load()
|
||||||
|
lines: list[str] = []
|
||||||
|
if not self._base:
|
||||||
|
return lines
|
||||||
|
# Prefer curated catalog JSON if present.
|
||||||
|
if self._atlas:
|
||||||
|
try:
|
||||||
|
atlas_json = json.dumps(self._atlas, indent=2)
|
||||||
|
lines.append("KB: atlas.json")
|
||||||
|
lines.extend(atlas_json.splitlines())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if self._runbooks:
|
||||||
|
lines.append("KB: runbooks.json")
|
||||||
|
for entry in self._runbooks:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
title = entry.get("title")
|
||||||
|
path = entry.get("path")
|
||||||
|
if title and path:
|
||||||
|
lines.append(f"- {title} ({path})")
|
||||||
|
# Include markdown/text sources as additional chunks.
|
||||||
|
if len(lines) >= max_chars:
|
||||||
|
return lines
|
||||||
|
files = sorted(self._base.rglob("*.md")) + sorted(self._base.rglob("*.txt"))
|
||||||
|
for path in files:
|
||||||
|
if len(lines) >= max_chars:
|
||||||
|
break
|
||||||
|
if len(lines) > max_files * 50:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
lines.append(f"KB File: {path.relative_to(self._base)}")
|
||||||
|
lines.extend(text.splitlines())
|
||||||
|
if sum(len(line) for line in lines) >= max_chars:
|
||||||
|
break
|
||||||
|
return lines
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user