retriever: prioritize keyword chunks
This commit is contained in:
parent
3d076fb223
commit
7e9b01915a
@ -1094,10 +1094,28 @@ def _select_chunks(
|
||||
selected: list[dict[str, Any]] = []
|
||||
head = chunks[0]
|
||||
selected.append(head)
|
||||
|
||||
keyword_hits: list[dict[str, Any]] = []
|
||||
if keywords:
|
||||
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
||||
for item in ranked:
|
||||
if item is head:
|
||||
continue
|
||||
text = str(item.get("text") or "").lower()
|
||||
if any(kw in text for kw in lowered):
|
||||
keyword_hits.append(item)
|
||||
|
||||
for item in keyword_hits:
|
||||
if len(selected) >= plan.chunk_top:
|
||||
return selected
|
||||
if item in selected:
|
||||
continue
|
||||
selected.append(item)
|
||||
|
||||
for item in ranked:
|
||||
if len(selected) >= plan.chunk_top:
|
||||
break
|
||||
if item is head:
|
||||
if item in selected:
|
||||
continue
|
||||
selected.append(item)
|
||||
return selected
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user