atlasbot: improve multi-pass synthesis
This commit is contained in:
parent
2af817b9db
commit
44c22e3d00
@ -2559,8 +2559,13 @@ def _fact_pack_lines(
|
|||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
def _fact_pack_text(lines: list[str]) -> str:
|
def _fact_pack_text(lines: list[str], fact_meta: dict[str, dict[str, Any]]) -> str:
|
||||||
labeled = [f"F{idx + 1}: {line}" for idx, line in enumerate(lines)]
|
labeled: list[str] = []
|
||||||
|
for idx, line in enumerate(lines):
|
||||||
|
fid = f"F{idx + 1}"
|
||||||
|
tags = fact_meta.get(fid, {}).get("tags") or []
|
||||||
|
tag_text = f" [tags: {', '.join(tags)}]" if tags else ""
|
||||||
|
labeled.append(f"{fid}{tag_text}: {line}")
|
||||||
return "Fact pack:\n" + "\n".join(labeled)
|
return "Fact pack:\n" + "\n".join(labeled)
|
||||||
|
|
||||||
|
|
||||||
@ -2782,7 +2787,8 @@ def _open_ended_system() -> str:
|
|||||||
"Use ONLY the provided fact pack and recent chat as your evidence. "
|
"Use ONLY the provided fact pack and recent chat as your evidence. "
|
||||||
"You may draw light inferences if you label them as such. "
|
"You may draw light inferences if you label them as such. "
|
||||||
"Write concise, human sentences with a helpful, calm tone (not a list). "
|
"Write concise, human sentences with a helpful, calm tone (not a list). "
|
||||||
"If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
|
"If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
|
||||||
|
"If the question asks for a list, embed the list inline in a sentence (comma-separated). "
|
||||||
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
|
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
|
||||||
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
|
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
|
||||||
"Do not invent numbers or facts. "
|
"Do not invent numbers or facts. "
|
||||||
@ -2938,6 +2944,67 @@ def _open_ended_plan(
|
|||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_focus_tags(raw_tags: list[Any]) -> list[str]:
|
||||||
|
tags: list[str] = []
|
||||||
|
for tag in raw_tags:
|
||||||
|
if not isinstance(tag, str):
|
||||||
|
continue
|
||||||
|
tag = tag.strip()
|
||||||
|
if tag in _ALLOWED_INSIGHT_TAGS and tag not in tags:
|
||||||
|
tags.append(tag)
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
|
def _open_ended_interpret(
|
||||||
|
prompt: str,
|
||||||
|
*,
|
||||||
|
fact_pack: str,
|
||||||
|
history_lines: list[str],
|
||||||
|
state: ThoughtState | None,
|
||||||
|
model: str | None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if state:
|
||||||
|
state.update("interpreting", step=1, note="reading question")
|
||||||
|
allowed_tags = ", ".join(sorted(_ALLOWED_INSIGHT_TAGS))
|
||||||
|
prompt_text = (
|
||||||
|
"Classify how to answer the question using only the fact pack. "
|
||||||
|
"Return JSON: {\"style\":\"objective|subjective\","
|
||||||
|
"\"tone\":\"neutral|curious|enthusiastic\","
|
||||||
|
"\"focus_tags\":[\"tag\"],"
|
||||||
|
"\"focus_label\":\"short phrase\","
|
||||||
|
"\"allow_list\":true|false}. "
|
||||||
|
"Use allow_list=true only if the question explicitly asks for names or lists. "
|
||||||
|
f"Only use tags from: {allowed_tags}."
|
||||||
|
)
|
||||||
|
context = _append_history_context(fact_pack, history_lines)
|
||||||
|
result = _ollama_json_call(
|
||||||
|
prompt_text + f" Question: {prompt}",
|
||||||
|
context=context,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
result = {}
|
||||||
|
style = str(result.get("style") or "").strip().lower()
|
||||||
|
if style not in ("objective", "subjective"):
|
||||||
|
style = "subjective" if _is_subjective_query(prompt) else "objective"
|
||||||
|
tone = str(result.get("tone") or "neutral").strip().lower()
|
||||||
|
if tone not in ("neutral", "curious", "enthusiastic"):
|
||||||
|
tone = "neutral"
|
||||||
|
focus_tags = _sanitize_focus_tags(result.get("focus_tags") or [])
|
||||||
|
focus_label = str(result.get("focus_label") or "").strip()
|
||||||
|
allow_list = result.get("allow_list")
|
||||||
|
if not isinstance(allow_list, bool):
|
||||||
|
q = normalize_query(prompt)
|
||||||
|
allow_list = any(phrase in q for phrase in ("list", "which", "what are", "names"))
|
||||||
|
return {
|
||||||
|
"style": style,
|
||||||
|
"tone": tone,
|
||||||
|
"focus_tags": focus_tags,
|
||||||
|
"focus_label": focus_label,
|
||||||
|
"allow_list": allow_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _preferred_tags_for_prompt(prompt: str) -> set[str]:
|
def _preferred_tags_for_prompt(prompt: str) -> set[str]:
|
||||||
q = normalize_query(prompt)
|
q = normalize_query(prompt)
|
||||||
tags: set[str] = set()
|
tags: set[str] = set()
|
||||||
@ -3013,6 +3080,71 @@ def _open_ended_insights(
|
|||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_fact_ids(
|
||||||
|
fact_meta: dict[str, dict[str, Any]],
|
||||||
|
*,
|
||||||
|
focus_tags: set[str],
|
||||||
|
count: int,
|
||||||
|
) -> list[str]:
|
||||||
|
if not fact_meta:
|
||||||
|
return []
|
||||||
|
if focus_tags:
|
||||||
|
tagged = [
|
||||||
|
fid
|
||||||
|
for fid, meta in fact_meta.items()
|
||||||
|
if focus_tags & set(meta.get("tags") or [])
|
||||||
|
]
|
||||||
|
if tagged:
|
||||||
|
return tagged[:count]
|
||||||
|
return list(fact_meta.keys())[:count]
|
||||||
|
|
||||||
|
|
||||||
|
def _open_ended_select_facts(
|
||||||
|
prompt: str,
|
||||||
|
*,
|
||||||
|
fact_pack: str,
|
||||||
|
fact_meta: dict[str, dict[str, Any]],
|
||||||
|
history_lines: list[str],
|
||||||
|
focus_tags: set[str],
|
||||||
|
avoid_fact_ids: list[str],
|
||||||
|
count: int,
|
||||||
|
subjective: bool,
|
||||||
|
state: ThoughtState | None,
|
||||||
|
step: int,
|
||||||
|
model: str | None,
|
||||||
|
) -> list[str]:
|
||||||
|
if state:
|
||||||
|
state.update("selecting facts", step=step, note="picking evidence")
|
||||||
|
focus_hint = ", ".join(sorted(focus_tags)) if focus_tags else "any"
|
||||||
|
avoid_hint = ", ".join(avoid_fact_ids) if avoid_fact_ids else "none"
|
||||||
|
prompt_text = (
|
||||||
|
"Select the fact IDs that best answer the question. "
|
||||||
|
f"Pick up to {count} fact IDs. "
|
||||||
|
f"Focus tags: {focus_hint}. "
|
||||||
|
f"Avoid these fact IDs: {avoid_hint}. "
|
||||||
|
"If the question is subjective, pick standout or unusual facts; "
|
||||||
|
"if objective, pick the minimal facts needed. "
|
||||||
|
"Return JSON: {\"fact_ids\":[\"F1\"...],\"note\":\"...\"}."
|
||||||
|
)
|
||||||
|
context = _append_history_context(fact_pack, history_lines)
|
||||||
|
result = _ollama_json_call(
|
||||||
|
prompt_text + f" Question: {prompt}",
|
||||||
|
context=context,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
fact_ids = result.get("fact_ids") if isinstance(result, dict) else None
|
||||||
|
selected: list[str] = []
|
||||||
|
if isinstance(fact_ids, list):
|
||||||
|
for fid in fact_ids:
|
||||||
|
if isinstance(fid, str) and fid in fact_meta and fid not in selected:
|
||||||
|
selected.append(fid)
|
||||||
|
if len(selected) >= count:
|
||||||
|
break
|
||||||
|
if not selected:
|
||||||
|
selected = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
|
||||||
|
return selected
|
||||||
|
|
||||||
|
|
||||||
def _normalize_score(value: Any, *, default: int = 60) -> int:
|
def _normalize_score(value: Any, *, default: int = 60) -> int:
|
||||||
if isinstance(value, (int, float)):
|
if isinstance(value, (int, float)):
|
||||||
return int(max(0, min(100, value)))
|
return int(max(0, min(100, value)))
|
||||||
@ -3043,6 +3175,9 @@ def _open_ended_candidate(
|
|||||||
focus: str,
|
focus: str,
|
||||||
fact_pack: str,
|
fact_pack: str,
|
||||||
history_lines: list[str],
|
history_lines: list[str],
|
||||||
|
subjective: bool,
|
||||||
|
tone: str,
|
||||||
|
allow_list: bool,
|
||||||
state: ThoughtState | None,
|
state: ThoughtState | None,
|
||||||
step: int,
|
step: int,
|
||||||
fact_hints: list[str] | None = None,
|
fact_hints: list[str] | None = None,
|
||||||
@ -3053,10 +3188,23 @@ def _open_ended_candidate(
|
|||||||
hint_text = ""
|
hint_text = ""
|
||||||
if fact_hints:
|
if fact_hints:
|
||||||
hint_text = " Prioritize these fact IDs if relevant: " + ", ".join(fact_hints) + "."
|
hint_text = " Prioritize these fact IDs if relevant: " + ", ".join(fact_hints) + "."
|
||||||
|
style_hint = (
|
||||||
|
"Offer a brief opinion grounded in facts and explain why it stands out. "
|
||||||
|
if subjective
|
||||||
|
else "Answer directly and succinctly. "
|
||||||
|
)
|
||||||
|
list_hint = (
|
||||||
|
"If a list is requested, embed it inline in a sentence (comma-separated). "
|
||||||
|
if allow_list
|
||||||
|
else "Avoid bullet lists. "
|
||||||
|
)
|
||||||
prompt_text = (
|
prompt_text = (
|
||||||
"Using ONLY the fact pack, answer the question focusing on this angle: "
|
"Using ONLY the fact pack, answer the question focusing on this angle: "
|
||||||
f"{focus}. "
|
f"{focus}. "
|
||||||
"Write 2-4 sentences in plain prose (not a list)."
|
f"Tone: {tone}. "
|
||||||
|
+ style_hint
|
||||||
|
+ list_hint
|
||||||
|
+ "Write 2-4 sentences in plain prose."
|
||||||
+ hint_text
|
+ hint_text
|
||||||
+ " "
|
+ " "
|
||||||
"If you infer, label it as inference. "
|
"If you infer, label it as inference. "
|
||||||
@ -3125,6 +3273,9 @@ def _open_ended_synthesize(
|
|||||||
fact_pack: str,
|
fact_pack: str,
|
||||||
history_lines: list[str],
|
history_lines: list[str],
|
||||||
candidates: list[dict[str, Any]],
|
candidates: list[dict[str, Any]],
|
||||||
|
subjective: bool,
|
||||||
|
tone: str,
|
||||||
|
allow_list: bool,
|
||||||
state: ThoughtState | None,
|
state: ThoughtState | None,
|
||||||
step: int,
|
step: int,
|
||||||
model: str | None,
|
model: str | None,
|
||||||
@ -3133,6 +3284,16 @@ def _open_ended_synthesize(
|
|||||||
if state:
|
if state:
|
||||||
state.update("synthesizing", step=step, note="composing answer")
|
state.update("synthesizing", step=step, note="composing answer")
|
||||||
critique_block = f"\nCritique guidance: {critique}\n" if critique else "\n"
|
critique_block = f"\nCritique guidance: {critique}\n" if critique else "\n"
|
||||||
|
style_hint = (
|
||||||
|
"If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
|
||||||
|
if subjective
|
||||||
|
else "Answer directly without extra caveats. "
|
||||||
|
)
|
||||||
|
list_hint = (
|
||||||
|
"If a list is requested, embed it inline in a sentence (comma-separated). "
|
||||||
|
if allow_list
|
||||||
|
else "Avoid bullet lists. "
|
||||||
|
)
|
||||||
synth_prompt = (
|
synth_prompt = (
|
||||||
"Compose the final answer to the question using the candidate answers below. "
|
"Compose the final answer to the question using the candidate answers below. "
|
||||||
"Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. "
|
"Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. "
|
||||||
@ -3140,7 +3301,10 @@ def _open_ended_synthesize(
|
|||||||
"If you infer, label it as inference. "
|
"If you infer, label it as inference. "
|
||||||
"Do not claim nodes are missing or not ready unless the fact pack explicitly lists "
|
"Do not claim nodes are missing or not ready unless the fact pack explicitly lists "
|
||||||
"nodes_not_ready or expected_workers_missing. "
|
"nodes_not_ready or expected_workers_missing. "
|
||||||
"Keep the tone conversational and answer the user's intent directly. "
|
f"Tone: {tone}. "
|
||||||
|
+ style_hint
|
||||||
|
+ list_hint
|
||||||
|
+ "Keep the tone conversational and answer the user's intent directly. "
|
||||||
"Avoid repeating the last response if possible. "
|
"Avoid repeating the last response if possible. "
|
||||||
"End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), "
|
"End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), "
|
||||||
"HallucinationRisk (low|medium|high).\n"
|
"HallucinationRisk (low|medium|high).\n"
|
||||||
@ -3202,85 +3366,90 @@ def _open_ended_multi(
|
|||||||
) -> str:
|
) -> str:
|
||||||
model = _model_for_mode(mode)
|
model = _model_for_mode(mode)
|
||||||
if mode == "fast":
|
if mode == "fast":
|
||||||
angle_count = 1
|
total_steps = 4
|
||||||
insight_count = 1
|
|
||||||
total_steps = 2
|
|
||||||
else:
|
else:
|
||||||
angle_count = 4
|
total_steps = 7
|
||||||
insight_count = 4
|
|
||||||
total_steps = 2 + angle_count + 2 + 1
|
|
||||||
if state:
|
if state:
|
||||||
state.total_steps = total_steps
|
state.total_steps = total_steps
|
||||||
|
|
||||||
angles: list[dict[str, Any]] = []
|
interpretation = _open_ended_interpret(
|
||||||
insights: list[dict[str, Any]] = []
|
|
||||||
if mode != "fast":
|
|
||||||
angles = _open_ended_plan(
|
|
||||||
prompt,
|
prompt,
|
||||||
fact_pack=fact_pack,
|
fact_pack=fact_pack,
|
||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
count=angle_count,
|
|
||||||
state=state,
|
state=state,
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
insights = _open_ended_insights(
|
style = interpretation.get("style") or "objective"
|
||||||
prompt,
|
subjective = style == "subjective" or _is_subjective_query(prompt)
|
||||||
fact_pack=fact_pack,
|
tone = str(interpretation.get("tone") or "").strip().lower()
|
||||||
fact_meta=fact_meta,
|
if tone not in ("neutral", "curious", "enthusiastic"):
|
||||||
history_lines=history_lines,
|
tone = "curious" if subjective else "neutral"
|
||||||
count=insight_count,
|
allow_list = bool(interpretation.get("allow_list"))
|
||||||
state=state,
|
focus_tags = set(interpretation.get("focus_tags") or []) or _preferred_tags_for_prompt(prompt)
|
||||||
model=model,
|
if not focus_tags and subjective:
|
||||||
)
|
focus_tags = set(_ALLOWED_INSIGHT_TAGS)
|
||||||
seeds = _seed_insights(fact_lines, fact_meta, limit=max(4, insight_count))
|
|
||||||
insight_candidates = insights + seeds
|
|
||||||
subjective = _is_subjective_query(prompt)
|
|
||||||
prefer_tags = _preferred_tags_for_prompt(prompt)
|
|
||||||
history_tags = _history_tags(history_lines)
|
|
||||||
avoid_tags = history_tags if subjective else set()
|
|
||||||
preference = "novelty" if subjective else "relevance"
|
|
||||||
selected_insights = _select_diverse_insights(
|
|
||||||
insight_candidates,
|
|
||||||
preference=preference,
|
|
||||||
prefer_tags=prefer_tags,
|
|
||||||
avoid_tags=avoid_tags,
|
|
||||||
history_tags=history_tags,
|
|
||||||
fact_meta=fact_meta,
|
|
||||||
count=1 if mode == "fast" else 2,
|
|
||||||
)
|
|
||||||
if state and selected_insights:
|
|
||||||
state.update("analyzing", note=_candidate_note(selected_insights[0]))
|
|
||||||
|
|
||||||
angle_inputs: list[dict[str, Any]] = []
|
primary_ids = _open_ended_select_facts(
|
||||||
for insight in selected_insights:
|
prompt,
|
||||||
angle_inputs.append(
|
fact_pack=fact_pack,
|
||||||
{
|
fact_meta=fact_meta,
|
||||||
"focus": str(insight.get("summary") or "Direct answer"),
|
history_lines=history_lines,
|
||||||
"fact_ids": insight.get("fact_ids") or [],
|
focus_tags=focus_tags,
|
||||||
}
|
avoid_fact_ids=[],
|
||||||
|
count=4 if mode == "deep" else 3,
|
||||||
|
subjective=subjective,
|
||||||
|
state=state,
|
||||||
|
step=2,
|
||||||
|
model=model,
|
||||||
)
|
)
|
||||||
for angle in angles:
|
alternate_ids: list[str] = []
|
||||||
if len(angle_inputs) >= angle_count:
|
if mode == "deep":
|
||||||
break
|
alternate_ids = _open_ended_select_facts(
|
||||||
angle_inputs.append(
|
prompt,
|
||||||
{
|
fact_pack=fact_pack,
|
||||||
"focus": str(angle.get("focus") or "Direct answer"),
|
fact_meta=fact_meta,
|
||||||
"fact_ids": [],
|
history_lines=history_lines,
|
||||||
}
|
focus_tags=focus_tags,
|
||||||
|
avoid_fact_ids=primary_ids,
|
||||||
|
count=4,
|
||||||
|
subjective=subjective,
|
||||||
|
state=state,
|
||||||
|
step=3,
|
||||||
|
model=model,
|
||||||
)
|
)
|
||||||
|
|
||||||
candidates: list[dict[str, Any]] = []
|
candidates: list[dict[str, Any]] = []
|
||||||
step = 1 if mode == "fast" else 3
|
focus_label = interpretation.get("focus_label") or "primary angle"
|
||||||
for angle in angle_inputs[:angle_count]:
|
step = 3 if mode == "fast" else 4
|
||||||
candidates.append(
|
candidates.append(
|
||||||
_open_ended_candidate(
|
_open_ended_candidate(
|
||||||
prompt,
|
prompt,
|
||||||
focus=str(angle.get("focus") or "Direct answer"),
|
focus=str(focus_label),
|
||||||
fact_pack=fact_pack,
|
fact_pack=fact_pack,
|
||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
|
subjective=subjective,
|
||||||
|
tone=str(tone),
|
||||||
|
allow_list=allow_list,
|
||||||
state=state,
|
state=state,
|
||||||
step=step,
|
step=step,
|
||||||
fact_hints=angle.get("fact_ids") if isinstance(angle.get("fact_ids"), list) else None,
|
fact_hints=primary_ids,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
step += 1
|
||||||
|
if mode == "deep" and alternate_ids:
|
||||||
|
candidates.append(
|
||||||
|
_open_ended_candidate(
|
||||||
|
prompt,
|
||||||
|
focus="alternate angle",
|
||||||
|
fact_pack=fact_pack,
|
||||||
|
history_lines=history_lines,
|
||||||
|
subjective=subjective,
|
||||||
|
tone=str(tone),
|
||||||
|
allow_list=allow_list,
|
||||||
|
state=state,
|
||||||
|
step=step,
|
||||||
|
fact_hints=alternate_ids,
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -3306,6 +3475,9 @@ def _open_ended_multi(
|
|||||||
fact_pack=fact_pack,
|
fact_pack=fact_pack,
|
||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
candidates=selected or candidates,
|
candidates=selected or candidates,
|
||||||
|
subjective=subjective,
|
||||||
|
tone=str(tone),
|
||||||
|
allow_list=allow_list,
|
||||||
state=state,
|
state=state,
|
||||||
step=step,
|
step=step,
|
||||||
model=model,
|
model=model,
|
||||||
@ -3318,9 +3490,8 @@ def _open_ended_multi(
|
|||||||
|
|
||||||
def _open_ended_total_steps(mode: str) -> int:
|
def _open_ended_total_steps(mode: str) -> int:
|
||||||
if mode == "fast":
|
if mode == "fast":
|
||||||
return 2
|
return 4
|
||||||
angle_count = 4
|
return 7
|
||||||
return 2 + angle_count + 2 + 1
|
|
||||||
|
|
||||||
|
|
||||||
def _open_ended_fast(
|
def _open_ended_fast(
|
||||||
@ -3386,8 +3557,8 @@ def open_ended_answer(
|
|||||||
lines.extend(tool_lines)
|
lines.extend(tool_lines)
|
||||||
if not lines:
|
if not lines:
|
||||||
return _ensure_scores("I don't have enough data to answer that.")
|
return _ensure_scores("I don't have enough data to answer that.")
|
||||||
fact_pack = _fact_pack_text(lines)
|
|
||||||
fact_meta = _fact_pack_meta(lines)
|
fact_meta = _fact_pack_meta(lines)
|
||||||
|
fact_pack = _fact_pack_text(lines, fact_meta)
|
||||||
if mode == "fast":
|
if mode == "fast":
|
||||||
return _open_ended_fast(
|
return _open_ended_fast(
|
||||||
prompt,
|
prompt,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user