atlasbot: enforce evidence in answers
This commit is contained in:
parent
c1f1ef23a6
commit
81e2c65a21
@ -121,6 +121,7 @@ class AnswerEngine:
|
|||||||
"tool",
|
"tool",
|
||||||
"followup",
|
"followup",
|
||||||
"select_claims",
|
"select_claims",
|
||||||
|
"evidence_fix",
|
||||||
}
|
}
|
||||||
|
|
||||||
def _debug_log(name: str, payload: Any) -> None:
|
def _debug_log(name: str, payload: Any) -> None:
|
||||||
@ -274,6 +275,24 @@ class AnswerEngine:
|
|||||||
observer("synthesize", "synthesizing")
|
observer("synthesize", "synthesizing")
|
||||||
reply = await self._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
|
reply = await self._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
|
||||||
|
|
||||||
|
if snapshot_context and _needs_evidence_fix(reply, classify):
|
||||||
|
if observer:
|
||||||
|
observer("evidence_fix", "repairing missing evidence")
|
||||||
|
fix_prompt = (
|
||||||
|
prompts.EVIDENCE_FIX_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ normalized
|
||||||
|
+ "\nDraft: "
|
||||||
|
+ reply
|
||||||
|
)
|
||||||
|
reply = await call_llm(
|
||||||
|
prompts.EVIDENCE_FIX_SYSTEM,
|
||||||
|
fix_prompt,
|
||||||
|
context=context,
|
||||||
|
model=plan.model,
|
||||||
|
tag="evidence_fix",
|
||||||
|
)
|
||||||
|
|
||||||
if plan.use_critic:
|
if plan.use_critic:
|
||||||
if observer:
|
if observer:
|
||||||
observer("critic", "reviewing")
|
observer("critic", "reviewing")
|
||||||
@ -766,6 +785,29 @@ def _default_scores() -> AnswerScores:
|
|||||||
return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
|
return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
|
||||||
|
if not reply:
|
||||||
|
return False
|
||||||
|
lowered = reply.lower()
|
||||||
|
missing_markers = (
|
||||||
|
"don't have",
|
||||||
|
"do not have",
|
||||||
|
"don't know",
|
||||||
|
"cannot",
|
||||||
|
"can't",
|
||||||
|
"need to",
|
||||||
|
"would need",
|
||||||
|
"not provided",
|
||||||
|
"missing",
|
||||||
|
"no specific",
|
||||||
|
)
|
||||||
|
if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
|
||||||
|
return True
|
||||||
|
if classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _resolve_path(data: Any, path: str) -> Any | None:
|
def _resolve_path(data: Any, path: str) -> Any | None:
|
||||||
cursor = data
|
cursor = data
|
||||||
for part in re.split(r"\.(?![^\[]*\])", path):
|
for part in re.split(r"\.(?![^\[]*\])", path):
|
||||||
|
|||||||
@ -68,12 +68,14 @@ TOOL_PROMPT = (
|
|||||||
ANSWER_SYSTEM = (
|
ANSWER_SYSTEM = (
|
||||||
CLUSTER_SYSTEM
|
CLUSTER_SYSTEM
|
||||||
+ " Answer a focused sub-question using the provided context. "
|
+ " Answer a focused sub-question using the provided context. "
|
||||||
+ "Be concise and grounded."
|
+ "Be concise and grounded. "
|
||||||
|
+ "If the context contains explicit values relevant to the question, you must use them."
|
||||||
)
|
)
|
||||||
|
|
||||||
SUBANSWER_PROMPT = (
|
SUBANSWER_PROMPT = (
|
||||||
"Answer the sub-question using the context. "
|
"Answer the sub-question using the context. "
|
||||||
"If context lacks the fact, say so."
|
"If the context includes the fact, state it explicitly. "
|
||||||
|
"Only say the fact is missing if it truly is not present."
|
||||||
)
|
)
|
||||||
|
|
||||||
SYNTHESIZE_SYSTEM = (
|
SYNTHESIZE_SYSTEM = (
|
||||||
@ -87,6 +89,19 @@ SYNTHESIZE_PROMPT = (
|
|||||||
"Use sub-answers as evidence, avoid raw metric dumps unless asked."
|
"Use sub-answers as evidence, avoid raw metric dumps unless asked."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
EVIDENCE_FIX_SYSTEM = (
|
||||||
|
CLUSTER_SYSTEM
|
||||||
|
+ " Rewrite the draft answer if it ignored facts present in the context. "
|
||||||
|
+ "Only use facts in the provided context."
|
||||||
|
)
|
||||||
|
|
||||||
|
EVIDENCE_FIX_PROMPT = (
|
||||||
|
"Check the draft against the context. "
|
||||||
|
"If the draft says data is missing but the context includes relevant values, "
|
||||||
|
"rewrite the answer to include those values. "
|
||||||
|
"If data is truly missing, keep the draft concise and honest."
|
||||||
|
)
|
||||||
|
|
||||||
DRAFT_SELECT_PROMPT = (
|
DRAFT_SELECT_PROMPT = (
|
||||||
"Pick the best draft for accuracy, clarity, and helpfulness. "
|
"Pick the best draft for accuracy, clarity, and helpfulness. "
|
||||||
"Return JSON with field: best (1-based index)."
|
"Return JSON with field: best (1-based index)."
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user