retriever: split keyword hit selection
This commit is contained in:
parent
7e9b01915a
commit
46fdebbbfb
@ -1082,6 +1082,26 @@ async def _score_chunk_group(
|
|||||||
return scored
|
return scored
|
||||||
|
|
||||||
|
|
||||||
|
def _keyword_hits(
|
||||||
|
ranked: list[dict[str, Any]],
|
||||||
|
head: dict[str, Any],
|
||||||
|
keywords: list[str] | None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
if not keywords:
|
||||||
|
return []
|
||||||
|
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
||||||
|
if not lowered:
|
||||||
|
return []
|
||||||
|
hits: list[dict[str, Any]] = []
|
||||||
|
for item in ranked:
|
||||||
|
if item is head:
|
||||||
|
continue
|
||||||
|
text = str(item.get("text") or "").lower()
|
||||||
|
if any(kw in text for kw in lowered):
|
||||||
|
hits.append(item)
|
||||||
|
return hits
|
||||||
|
|
||||||
|
|
||||||
def _select_chunks(
|
def _select_chunks(
|
||||||
chunks: list[dict[str, Any]],
|
chunks: list[dict[str, Any]],
|
||||||
scores: dict[str, float],
|
scores: dict[str, float],
|
||||||
@ -1095,29 +1115,17 @@ def _select_chunks(
|
|||||||
head = chunks[0]
|
head = chunks[0]
|
||||||
selected.append(head)
|
selected.append(head)
|
||||||
|
|
||||||
keyword_hits: list[dict[str, Any]] = []
|
for item in _keyword_hits(ranked, head, keywords):
|
||||||
if keywords:
|
|
||||||
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
|
||||||
for item in ranked:
|
|
||||||
if item is head:
|
|
||||||
continue
|
|
||||||
text = str(item.get("text") or "").lower()
|
|
||||||
if any(kw in text for kw in lowered):
|
|
||||||
keyword_hits.append(item)
|
|
||||||
|
|
||||||
for item in keyword_hits:
|
|
||||||
if len(selected) >= plan.chunk_top:
|
if len(selected) >= plan.chunk_top:
|
||||||
return selected
|
return selected
|
||||||
if item in selected:
|
if item not in selected:
|
||||||
continue
|
selected.append(item)
|
||||||
selected.append(item)
|
|
||||||
|
|
||||||
for item in ranked:
|
for item in ranked:
|
||||||
if len(selected) >= plan.chunk_top:
|
if len(selected) >= plan.chunk_top:
|
||||||
break
|
break
|
||||||
if item in selected:
|
if item not in selected:
|
||||||
continue
|
selected.append(item)
|
||||||
selected.append(item)
|
|
||||||
return selected
|
return selected
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user