atlasbot: add raw snapshot chunks for genius

This commit is contained in:
Brad Stein 2026-02-04 19:12:58 -03:00
parent 2aff855ce9
commit 92bc8b642d

View File

@ -79,6 +79,7 @@ class ModePlan:
chunk_group: int chunk_group: int
kb_max_chars: int kb_max_chars: int
kb_max_files: int kb_max_files: int
use_raw_snapshot: bool
parallelism: int parallelism: int
score_retries: int score_retries: int
use_deep_retrieval: bool use_deep_retrieval: bool
@ -337,6 +338,10 @@ class AnswerEngine:
if observer: if observer:
observer("retrieve", "scoring chunks") observer("retrieve", "scoring chunks")
chunks = _chunk_lines(summary_lines, plan.chunk_lines) chunks = _chunk_lines(summary_lines, plan.chunk_lines)
if plan.use_raw_snapshot:
raw_chunks = _raw_snapshot_chunks(snapshot_used)
if raw_chunks:
chunks.extend(raw_chunks)
kb_lines = self._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if self._kb else [] kb_lines = self._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if self._kb else []
if kb_lines: if kb_lines:
kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines) kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
@ -1137,8 +1142,9 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
chunk_lines=6, chunk_lines=6,
chunk_top=10, chunk_top=10,
chunk_group=4, chunk_group=4,
kb_max_chars=5000, kb_max_chars=200000,
kb_max_files=18, kb_max_files=200,
use_raw_snapshot=True,
parallelism=4, parallelism=4,
score_retries=3, score_retries=3,
use_deep_retrieval=True, use_deep_retrieval=True,
@ -1160,6 +1166,7 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
chunk_group=4, chunk_group=4,
kb_max_chars=3000, kb_max_chars=3000,
kb_max_files=12, kb_max_files=12,
use_raw_snapshot=False,
parallelism=2, parallelism=2,
score_retries=2, score_retries=2,
use_deep_retrieval=True, use_deep_retrieval=True,
@ -1180,6 +1187,7 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
chunk_group=5, chunk_group=5,
kb_max_chars=1200, kb_max_chars=1200,
kb_max_files=6, kb_max_files=6,
use_raw_snapshot=False,
parallelism=1, parallelism=1,
score_retries=1, score_retries=1,
use_deep_retrieval=False, use_deep_retrieval=False,
@ -1234,6 +1242,20 @@ def _chunk_lines(lines: list[str], lines_per_chunk: int) -> list[dict[str, Any]]
return chunks return chunks
def _raw_snapshot_chunks(snapshot: dict[str, Any] | None) -> list[dict[str, Any]]:
if not isinstance(snapshot, dict) or not snapshot:
return []
chunks: list[dict[str, Any]] = []
for key, value in snapshot.items():
try:
payload = json.dumps({key: value}, indent=2)
except Exception:
continue
summary = f"raw:{key}"
chunks.append({"id": f"r{key}", "text": payload, "summary": summary})
return chunks
def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]: def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
groups: list[list[dict[str, Any]]] = [] groups: list[list[dict[str, Any]]] = []
group: list[dict[str, Any]] = [] group: list[dict[str, Any]] = []