Compare commits
5 Commits
677e31ba2d
...
d8249bba37
| Author | SHA1 | Date | |
|---|---|---|---|
| d8249bba37 | |||
| 9827db8a49 | |||
| 71d2829d0f | |||
| 387eea1399 | |||
| a8e9bc53de |
@ -184,7 +184,7 @@ class AnswerEngine:
|
|||||||
normalized = str(normalize.get("normalized") or question).strip() or question
|
normalized = str(normalize.get("normalized") or question).strip() or question
|
||||||
keywords = normalize.get("keywords") or []
|
keywords = normalize.get("keywords") or []
|
||||||
_debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
|
_debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
|
||||||
keyword_tokens = _extract_keywords(normalized, sub_questions=[], keywords=keywords)
|
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
|
||||||
|
|
||||||
if observer:
|
if observer:
|
||||||
observer("route", "routing")
|
observer("route", "routing")
|
||||||
@ -250,7 +250,7 @@ class AnswerEngine:
|
|||||||
parts = _parse_json_list(decompose_raw)
|
parts = _parse_json_list(decompose_raw)
|
||||||
sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
|
sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
|
||||||
_debug_log("decompose_parsed", {"sub_questions": sub_questions})
|
_debug_log("decompose_parsed", {"sub_questions": sub_questions})
|
||||||
keyword_tokens = _extract_keywords(normalized, sub_questions=sub_questions, keywords=keywords)
|
keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
|
||||||
|
|
||||||
snapshot_context = ""
|
snapshot_context = ""
|
||||||
if classify.get("needs_snapshot"):
|
if classify.get("needs_snapshot"):
|
||||||
@ -260,7 +260,7 @@ class AnswerEngine:
|
|||||||
scored = await _score_chunks(call_llm, chunks, normalized, sub_questions, plan)
|
scored = await _score_chunks(call_llm, chunks, normalized, sub_questions, plan)
|
||||||
selected = _select_chunks(chunks, scored, plan, keyword_tokens)
|
selected = _select_chunks(chunks, scored, plan, keyword_tokens)
|
||||||
key_facts = _key_fact_lines(summary_lines, keyword_tokens)
|
key_facts = _key_fact_lines(summary_lines, keyword_tokens)
|
||||||
metric_facts = [line for line in key_facts if re.search(r"\\d", line)]
|
metric_facts = [line for line in key_facts if re.search(r"\d", line)]
|
||||||
if self._settings.debug_pipeline:
|
if self._settings.debug_pipeline:
|
||||||
scored_preview = sorted(
|
scored_preview = sorted(
|
||||||
[{"id": c["id"], "score": scored.get(c["id"], 0.0), "summary": c["summary"]} for c in chunks],
|
[{"id": c["id"], "score": scored.get(c["id"], 0.0), "summary": c["summary"]} for c in chunks],
|
||||||
@ -405,15 +405,19 @@ class AnswerEngine:
|
|||||||
model=plan.model,
|
model=plan.model,
|
||||||
tag="focus_fix",
|
tag="focus_fix",
|
||||||
)
|
)
|
||||||
if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts and not re.search(r"\\d", reply):
|
if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
|
||||||
best_line = None
|
best_line = None
|
||||||
lowered = normalized.lower()
|
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
|
||||||
for line in metric_facts:
|
for line in metric_facts:
|
||||||
if any(token in line.lower() for token in lowered.split()):
|
line_lower = line.lower()
|
||||||
|
if any(kw in line_lower for kw in lowered_keywords):
|
||||||
best_line = line
|
best_line = line
|
||||||
break
|
break
|
||||||
best_line = best_line or metric_facts[0]
|
best_line = best_line or metric_facts[0]
|
||||||
reply = f"From the latest snapshot: {best_line}."
|
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
|
||||||
|
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
|
||||||
|
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
|
||||||
|
reply = f"From the latest snapshot: {best_line}."
|
||||||
|
|
||||||
if plan.use_critic:
|
if plan.use_critic:
|
||||||
if observer:
|
if observer:
|
||||||
@ -443,6 +447,9 @@ class AnswerEngine:
|
|||||||
if note:
|
if note:
|
||||||
reply = f"{reply}\n\n{note}"
|
reply = f"{reply}\n\n{note}"
|
||||||
|
|
||||||
|
if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
|
||||||
|
reply = _metric_fact_guard(reply, metric_facts, keyword_tokens)
|
||||||
|
|
||||||
scores = await self._score_answer(normalized, reply, plan, call_llm)
|
scores = await self._score_answer(normalized, reply, plan, call_llm)
|
||||||
claims = await self._extract_claims(normalized, reply, summary, call_llm)
|
claims = await self._extract_claims(normalized, reply, summary, call_llm)
|
||||||
except LLMLimitReached:
|
except LLMLimitReached:
|
||||||
@ -873,16 +880,45 @@ def _key_fact_lines(lines: list[str], keywords: list[str] | None, limit: int = 6
|
|||||||
lowered = [kw.lower() for kw in keywords if kw]
|
lowered = [kw.lower() for kw in keywords if kw]
|
||||||
if not lowered:
|
if not lowered:
|
||||||
return []
|
return []
|
||||||
|
focused = _focused_keywords(lowered)
|
||||||
|
primary = focused or lowered
|
||||||
matches: list[str] = []
|
matches: list[str] = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line_lower = line.lower()
|
line_lower = line.lower()
|
||||||
if any(kw in line_lower for kw in lowered):
|
if any(kw in line_lower for kw in primary):
|
||||||
matches.append(line)
|
matches.append(line)
|
||||||
if len(matches) >= limit:
|
if len(matches) >= limit:
|
||||||
break
|
break
|
||||||
|
if len(matches) < limit and focused:
|
||||||
|
for line in lines:
|
||||||
|
if len(matches) >= limit:
|
||||||
|
break
|
||||||
|
if line in matches:
|
||||||
|
continue
|
||||||
|
line_lower = line.lower()
|
||||||
|
if any(kw in line_lower for kw in lowered):
|
||||||
|
matches.append(line)
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
def _metric_fact_guard(reply: str, metric_facts: list[str], keywords: list[str]) -> str:
|
||||||
|
if not metric_facts:
|
||||||
|
return reply
|
||||||
|
best_line = None
|
||||||
|
lowered_keywords = [kw.lower() for kw in keywords if kw]
|
||||||
|
for line in metric_facts:
|
||||||
|
line_lower = line.lower()
|
||||||
|
if any(kw in line_lower for kw in lowered_keywords):
|
||||||
|
best_line = line
|
||||||
|
break
|
||||||
|
best_line = best_line or metric_facts[0]
|
||||||
|
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
|
||||||
|
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
|
||||||
|
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
|
||||||
|
return f"From the latest snapshot: {best_line}."
|
||||||
|
return reply
|
||||||
|
|
||||||
|
|
||||||
def _lexicon_context(summary: dict[str, Any]) -> str:
|
def _lexicon_context(summary: dict[str, Any]) -> str:
|
||||||
if not isinstance(summary, dict):
|
if not isinstance(summary, dict):
|
||||||
return ""
|
return ""
|
||||||
@ -987,7 +1023,12 @@ def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> boo
|
|||||||
return any(marker in reply.lower() for marker in extra_markers)
|
return any(marker in reply.lower() for marker in extra_markers)
|
||||||
|
|
||||||
|
|
||||||
def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
|
def _extract_keywords(
|
||||||
|
raw_question: str,
|
||||||
|
normalized: str,
|
||||||
|
sub_questions: list[str],
|
||||||
|
keywords: list[Any] | None,
|
||||||
|
) -> list[str]:
|
||||||
stopwords = {
|
stopwords = {
|
||||||
"the",
|
"the",
|
||||||
"and",
|
"and",
|
||||||
@ -1011,7 +1052,7 @@ def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[
|
|||||||
"now",
|
"now",
|
||||||
}
|
}
|
||||||
tokens: list[str] = []
|
tokens: list[str] = []
|
||||||
for source in [normalized, *sub_questions]:
|
for source in [raw_question, normalized, *sub_questions]:
|
||||||
for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
|
for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
|
||||||
if len(part) < 3 or part in stopwords:
|
if len(part) < 3 or part in stopwords:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user