atlasbot: add raw snapshot chunks for genius
This commit is contained in:
parent
2aff855ce9
commit
92bc8b642d
@ -79,6 +79,7 @@ class ModePlan:
|
|||||||
chunk_group: int
|
chunk_group: int
|
||||||
kb_max_chars: int
|
kb_max_chars: int
|
||||||
kb_max_files: int
|
kb_max_files: int
|
||||||
|
use_raw_snapshot: bool
|
||||||
parallelism: int
|
parallelism: int
|
||||||
score_retries: int
|
score_retries: int
|
||||||
use_deep_retrieval: bool
|
use_deep_retrieval: bool
|
||||||
@ -337,6 +338,10 @@ class AnswerEngine:
|
|||||||
if observer:
|
if observer:
|
||||||
observer("retrieve", "scoring chunks")
|
observer("retrieve", "scoring chunks")
|
||||||
chunks = _chunk_lines(summary_lines, plan.chunk_lines)
|
chunks = _chunk_lines(summary_lines, plan.chunk_lines)
|
||||||
|
if plan.use_raw_snapshot:
|
||||||
|
raw_chunks = _raw_snapshot_chunks(snapshot_used)
|
||||||
|
if raw_chunks:
|
||||||
|
chunks.extend(raw_chunks)
|
||||||
kb_lines = self._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if self._kb else []
|
kb_lines = self._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if self._kb else []
|
||||||
if kb_lines:
|
if kb_lines:
|
||||||
kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
|
kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
|
||||||
@ -1137,8 +1142,9 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
|
|||||||
chunk_lines=6,
|
chunk_lines=6,
|
||||||
chunk_top=10,
|
chunk_top=10,
|
||||||
chunk_group=4,
|
chunk_group=4,
|
||||||
kb_max_chars=5000,
|
kb_max_chars=200000,
|
||||||
kb_max_files=18,
|
kb_max_files=200,
|
||||||
|
use_raw_snapshot=True,
|
||||||
parallelism=4,
|
parallelism=4,
|
||||||
score_retries=3,
|
score_retries=3,
|
||||||
use_deep_retrieval=True,
|
use_deep_retrieval=True,
|
||||||
@ -1160,6 +1166,7 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
|
|||||||
chunk_group=4,
|
chunk_group=4,
|
||||||
kb_max_chars=3000,
|
kb_max_chars=3000,
|
||||||
kb_max_files=12,
|
kb_max_files=12,
|
||||||
|
use_raw_snapshot=False,
|
||||||
parallelism=2,
|
parallelism=2,
|
||||||
score_retries=2,
|
score_retries=2,
|
||||||
use_deep_retrieval=True,
|
use_deep_retrieval=True,
|
||||||
@ -1180,6 +1187,7 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
|
|||||||
chunk_group=5,
|
chunk_group=5,
|
||||||
kb_max_chars=1200,
|
kb_max_chars=1200,
|
||||||
kb_max_files=6,
|
kb_max_files=6,
|
||||||
|
use_raw_snapshot=False,
|
||||||
parallelism=1,
|
parallelism=1,
|
||||||
score_retries=1,
|
score_retries=1,
|
||||||
use_deep_retrieval=False,
|
use_deep_retrieval=False,
|
||||||
@ -1234,6 +1242,20 @@ def _chunk_lines(lines: list[str], lines_per_chunk: int) -> list[dict[str, Any]]
|
|||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _raw_snapshot_chunks(snapshot: dict[str, Any] | None) -> list[dict[str, Any]]:
|
||||||
|
if not isinstance(snapshot, dict) or not snapshot:
|
||||||
|
return []
|
||||||
|
chunks: list[dict[str, Any]] = []
|
||||||
|
for key, value in snapshot.items():
|
||||||
|
try:
|
||||||
|
payload = json.dumps({key: value}, indent=2)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
summary = f"raw:{key}"
|
||||||
|
chunks.append({"id": f"r{key}", "text": payload, "summary": summary})
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
|
def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
|
||||||
groups: list[list[dict[str, Any]]] = []
|
groups: list[list[dict[str, Any]]] = []
|
||||||
group: list[dict[str, Any]] = []
|
group: list[dict[str, Any]] = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user