atlasbot: improve multi-pass synthesis
This commit is contained in:
parent
2af817b9db
commit
44c22e3d00
@ -2559,8 +2559,13 @@ def _fact_pack_lines(
|
||||
return lines
|
||||
|
||||
|
||||
def _fact_pack_text(lines: list[str]) -> str:
|
||||
labeled = [f"F{idx + 1}: {line}" for idx, line in enumerate(lines)]
|
||||
def _fact_pack_text(lines: list[str], fact_meta: dict[str, dict[str, Any]]) -> str:
|
||||
labeled: list[str] = []
|
||||
for idx, line in enumerate(lines):
|
||||
fid = f"F{idx + 1}"
|
||||
tags = fact_meta.get(fid, {}).get("tags") or []
|
||||
tag_text = f" [tags: {', '.join(tags)}]" if tags else ""
|
||||
labeled.append(f"{fid}{tag_text}: {line}")
|
||||
return "Fact pack:\n" + "\n".join(labeled)
|
||||
|
||||
|
||||
@ -2782,7 +2787,8 @@ def _open_ended_system() -> str:
|
||||
"Use ONLY the provided fact pack and recent chat as your evidence. "
|
||||
"You may draw light inferences if you label them as such. "
|
||||
"Write concise, human sentences with a helpful, calm tone (not a list). "
|
||||
"If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
|
||||
"If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
|
||||
"If the question asks for a list, embed the list inline in a sentence (comma-separated). "
|
||||
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
|
||||
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
|
||||
"Do not invent numbers or facts. "
|
||||
@ -2938,6 +2944,67 @@ def _open_ended_plan(
|
||||
return cleaned
|
||||
|
||||
|
||||
def _sanitize_focus_tags(raw_tags: list[Any]) -> list[str]:
|
||||
tags: list[str] = []
|
||||
for tag in raw_tags:
|
||||
if not isinstance(tag, str):
|
||||
continue
|
||||
tag = tag.strip()
|
||||
if tag in _ALLOWED_INSIGHT_TAGS and tag not in tags:
|
||||
tags.append(tag)
|
||||
return tags
|
||||
|
||||
|
||||
def _open_ended_interpret(
|
||||
prompt: str,
|
||||
*,
|
||||
fact_pack: str,
|
||||
history_lines: list[str],
|
||||
state: ThoughtState | None,
|
||||
model: str | None,
|
||||
) -> dict[str, Any]:
|
||||
if state:
|
||||
state.update("interpreting", step=1, note="reading question")
|
||||
allowed_tags = ", ".join(sorted(_ALLOWED_INSIGHT_TAGS))
|
||||
prompt_text = (
|
||||
"Classify how to answer the question using only the fact pack. "
|
||||
"Return JSON: {\"style\":\"objective|subjective\","
|
||||
"\"tone\":\"neutral|curious|enthusiastic\","
|
||||
"\"focus_tags\":[\"tag\"],"
|
||||
"\"focus_label\":\"short phrase\","
|
||||
"\"allow_list\":true|false}. "
|
||||
"Use allow_list=true only if the question explicitly asks for names or lists. "
|
||||
f"Only use tags from: {allowed_tags}."
|
||||
)
|
||||
context = _append_history_context(fact_pack, history_lines)
|
||||
result = _ollama_json_call(
|
||||
prompt_text + f" Question: {prompt}",
|
||||
context=context,
|
||||
model=model,
|
||||
)
|
||||
if not isinstance(result, dict):
|
||||
result = {}
|
||||
style = str(result.get("style") or "").strip().lower()
|
||||
if style not in ("objective", "subjective"):
|
||||
style = "subjective" if _is_subjective_query(prompt) else "objective"
|
||||
tone = str(result.get("tone") or "neutral").strip().lower()
|
||||
if tone not in ("neutral", "curious", "enthusiastic"):
|
||||
tone = "neutral"
|
||||
focus_tags = _sanitize_focus_tags(result.get("focus_tags") or [])
|
||||
focus_label = str(result.get("focus_label") or "").strip()
|
||||
allow_list = result.get("allow_list")
|
||||
if not isinstance(allow_list, bool):
|
||||
q = normalize_query(prompt)
|
||||
allow_list = any(phrase in q for phrase in ("list", "which", "what are", "names"))
|
||||
return {
|
||||
"style": style,
|
||||
"tone": tone,
|
||||
"focus_tags": focus_tags,
|
||||
"focus_label": focus_label,
|
||||
"allow_list": allow_list,
|
||||
}
|
||||
|
||||
|
||||
def _preferred_tags_for_prompt(prompt: str) -> set[str]:
|
||||
q = normalize_query(prompt)
|
||||
tags: set[str] = set()
|
||||
@ -3013,6 +3080,71 @@ def _open_ended_insights(
|
||||
return cleaned
|
||||
|
||||
|
||||
def _fallback_fact_ids(
|
||||
fact_meta: dict[str, dict[str, Any]],
|
||||
*,
|
||||
focus_tags: set[str],
|
||||
count: int,
|
||||
) -> list[str]:
|
||||
if not fact_meta:
|
||||
return []
|
||||
if focus_tags:
|
||||
tagged = [
|
||||
fid
|
||||
for fid, meta in fact_meta.items()
|
||||
if focus_tags & set(meta.get("tags") or [])
|
||||
]
|
||||
if tagged:
|
||||
return tagged[:count]
|
||||
return list(fact_meta.keys())[:count]
|
||||
|
||||
|
||||
def _open_ended_select_facts(
|
||||
prompt: str,
|
||||
*,
|
||||
fact_pack: str,
|
||||
fact_meta: dict[str, dict[str, Any]],
|
||||
history_lines: list[str],
|
||||
focus_tags: set[str],
|
||||
avoid_fact_ids: list[str],
|
||||
count: int,
|
||||
subjective: bool,
|
||||
state: ThoughtState | None,
|
||||
step: int,
|
||||
model: str | None,
|
||||
) -> list[str]:
|
||||
if state:
|
||||
state.update("selecting facts", step=step, note="picking evidence")
|
||||
focus_hint = ", ".join(sorted(focus_tags)) if focus_tags else "any"
|
||||
avoid_hint = ", ".join(avoid_fact_ids) if avoid_fact_ids else "none"
|
||||
prompt_text = (
|
||||
"Select the fact IDs that best answer the question. "
|
||||
f"Pick up to {count} fact IDs. "
|
||||
f"Focus tags: {focus_hint}. "
|
||||
f"Avoid these fact IDs: {avoid_hint}. "
|
||||
"If the question is subjective, pick standout or unusual facts; "
|
||||
"if objective, pick the minimal facts needed. "
|
||||
"Return JSON: {\"fact_ids\":[\"F1\"...],\"note\":\"...\"}."
|
||||
)
|
||||
context = _append_history_context(fact_pack, history_lines)
|
||||
result = _ollama_json_call(
|
||||
prompt_text + f" Question: {prompt}",
|
||||
context=context,
|
||||
model=model,
|
||||
)
|
||||
fact_ids = result.get("fact_ids") if isinstance(result, dict) else None
|
||||
selected: list[str] = []
|
||||
if isinstance(fact_ids, list):
|
||||
for fid in fact_ids:
|
||||
if isinstance(fid, str) and fid in fact_meta and fid not in selected:
|
||||
selected.append(fid)
|
||||
if len(selected) >= count:
|
||||
break
|
||||
if not selected:
|
||||
selected = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
|
||||
return selected
|
||||
|
||||
|
||||
def _normalize_score(value: Any, *, default: int = 60) -> int:
|
||||
if isinstance(value, (int, float)):
|
||||
return int(max(0, min(100, value)))
|
||||
@ -3043,6 +3175,9 @@ def _open_ended_candidate(
|
||||
focus: str,
|
||||
fact_pack: str,
|
||||
history_lines: list[str],
|
||||
subjective: bool,
|
||||
tone: str,
|
||||
allow_list: bool,
|
||||
state: ThoughtState | None,
|
||||
step: int,
|
||||
fact_hints: list[str] | None = None,
|
||||
@ -3053,10 +3188,23 @@ def _open_ended_candidate(
|
||||
hint_text = ""
|
||||
if fact_hints:
|
||||
hint_text = " Prioritize these fact IDs if relevant: " + ", ".join(fact_hints) + "."
|
||||
style_hint = (
|
||||
"Offer a brief opinion grounded in facts and explain why it stands out. "
|
||||
if subjective
|
||||
else "Answer directly and succinctly. "
|
||||
)
|
||||
list_hint = (
|
||||
"If a list is requested, embed it inline in a sentence (comma-separated). "
|
||||
if allow_list
|
||||
else "Avoid bullet lists. "
|
||||
)
|
||||
prompt_text = (
|
||||
"Using ONLY the fact pack, answer the question focusing on this angle: "
|
||||
f"{focus}. "
|
||||
"Write 2-4 sentences in plain prose (not a list)."
|
||||
f"Tone: {tone}. "
|
||||
+ style_hint
|
||||
+ list_hint
|
||||
+ "Write 2-4 sentences in plain prose."
|
||||
+ hint_text
|
||||
+ " "
|
||||
"If you infer, label it as inference. "
|
||||
@ -3125,6 +3273,9 @@ def _open_ended_synthesize(
|
||||
fact_pack: str,
|
||||
history_lines: list[str],
|
||||
candidates: list[dict[str, Any]],
|
||||
subjective: bool,
|
||||
tone: str,
|
||||
allow_list: bool,
|
||||
state: ThoughtState | None,
|
||||
step: int,
|
||||
model: str | None,
|
||||
@ -3133,6 +3284,16 @@ def _open_ended_synthesize(
|
||||
if state:
|
||||
state.update("synthesizing", step=step, note="composing answer")
|
||||
critique_block = f"\nCritique guidance: {critique}\n" if critique else "\n"
|
||||
style_hint = (
|
||||
"If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
|
||||
if subjective
|
||||
else "Answer directly without extra caveats. "
|
||||
)
|
||||
list_hint = (
|
||||
"If a list is requested, embed it inline in a sentence (comma-separated). "
|
||||
if allow_list
|
||||
else "Avoid bullet lists. "
|
||||
)
|
||||
synth_prompt = (
|
||||
"Compose the final answer to the question using the candidate answers below. "
|
||||
"Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. "
|
||||
@ -3140,7 +3301,10 @@ def _open_ended_synthesize(
|
||||
"If you infer, label it as inference. "
|
||||
"Do not claim nodes are missing or not ready unless the fact pack explicitly lists "
|
||||
"nodes_not_ready or expected_workers_missing. "
|
||||
"Keep the tone conversational and answer the user's intent directly. "
|
||||
f"Tone: {tone}. "
|
||||
+ style_hint
|
||||
+ list_hint
|
||||
+ "Keep the tone conversational and answer the user's intent directly. "
|
||||
"Avoid repeating the last response if possible. "
|
||||
"End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), "
|
||||
"HallucinationRisk (low|medium|high).\n"
|
||||
@ -3202,85 +3366,90 @@ def _open_ended_multi(
|
||||
) -> str:
|
||||
model = _model_for_mode(mode)
|
||||
if mode == "fast":
|
||||
angle_count = 1
|
||||
insight_count = 1
|
||||
total_steps = 2
|
||||
total_steps = 4
|
||||
else:
|
||||
angle_count = 4
|
||||
insight_count = 4
|
||||
total_steps = 2 + angle_count + 2 + 1
|
||||
total_steps = 7
|
||||
if state:
|
||||
state.total_steps = total_steps
|
||||
|
||||
angles: list[dict[str, Any]] = []
|
||||
insights: list[dict[str, Any]] = []
|
||||
if mode != "fast":
|
||||
angles = _open_ended_plan(
|
||||
prompt,
|
||||
fact_pack=fact_pack,
|
||||
history_lines=history_lines,
|
||||
count=angle_count,
|
||||
state=state,
|
||||
model=model,
|
||||
)
|
||||
insights = _open_ended_insights(
|
||||
interpretation = _open_ended_interpret(
|
||||
prompt,
|
||||
fact_pack=fact_pack,
|
||||
history_lines=history_lines,
|
||||
state=state,
|
||||
model=model,
|
||||
)
|
||||
style = interpretation.get("style") or "objective"
|
||||
subjective = style == "subjective" or _is_subjective_query(prompt)
|
||||
tone = str(interpretation.get("tone") or "").strip().lower()
|
||||
if tone not in ("neutral", "curious", "enthusiastic"):
|
||||
tone = "curious" if subjective else "neutral"
|
||||
allow_list = bool(interpretation.get("allow_list"))
|
||||
focus_tags = set(interpretation.get("focus_tags") or []) or _preferred_tags_for_prompt(prompt)
|
||||
if not focus_tags and subjective:
|
||||
focus_tags = set(_ALLOWED_INSIGHT_TAGS)
|
||||
|
||||
primary_ids = _open_ended_select_facts(
|
||||
prompt,
|
||||
fact_pack=fact_pack,
|
||||
fact_meta=fact_meta,
|
||||
history_lines=history_lines,
|
||||
focus_tags=focus_tags,
|
||||
avoid_fact_ids=[],
|
||||
count=4 if mode == "deep" else 3,
|
||||
subjective=subjective,
|
||||
state=state,
|
||||
step=2,
|
||||
model=model,
|
||||
)
|
||||
alternate_ids: list[str] = []
|
||||
if mode == "deep":
|
||||
alternate_ids = _open_ended_select_facts(
|
||||
prompt,
|
||||
fact_pack=fact_pack,
|
||||
fact_meta=fact_meta,
|
||||
history_lines=history_lines,
|
||||
count=insight_count,
|
||||
focus_tags=focus_tags,
|
||||
avoid_fact_ids=primary_ids,
|
||||
count=4,
|
||||
subjective=subjective,
|
||||
state=state,
|
||||
step=3,
|
||||
model=model,
|
||||
)
|
||||
seeds = _seed_insights(fact_lines, fact_meta, limit=max(4, insight_count))
|
||||
insight_candidates = insights + seeds
|
||||
subjective = _is_subjective_query(prompt)
|
||||
prefer_tags = _preferred_tags_for_prompt(prompt)
|
||||
history_tags = _history_tags(history_lines)
|
||||
avoid_tags = history_tags if subjective else set()
|
||||
preference = "novelty" if subjective else "relevance"
|
||||
selected_insights = _select_diverse_insights(
|
||||
insight_candidates,
|
||||
preference=preference,
|
||||
prefer_tags=prefer_tags,
|
||||
avoid_tags=avoid_tags,
|
||||
history_tags=history_tags,
|
||||
fact_meta=fact_meta,
|
||||
count=1 if mode == "fast" else 2,
|
||||
)
|
||||
if state and selected_insights:
|
||||
state.update("analyzing", note=_candidate_note(selected_insights[0]))
|
||||
|
||||
angle_inputs: list[dict[str, Any]] = []
|
||||
for insight in selected_insights:
|
||||
angle_inputs.append(
|
||||
{
|
||||
"focus": str(insight.get("summary") or "Direct answer"),
|
||||
"fact_ids": insight.get("fact_ids") or [],
|
||||
}
|
||||
)
|
||||
for angle in angles:
|
||||
if len(angle_inputs) >= angle_count:
|
||||
break
|
||||
angle_inputs.append(
|
||||
{
|
||||
"focus": str(angle.get("focus") or "Direct answer"),
|
||||
"fact_ids": [],
|
||||
}
|
||||
)
|
||||
|
||||
candidates: list[dict[str, Any]] = []
|
||||
step = 1 if mode == "fast" else 3
|
||||
for angle in angle_inputs[:angle_count]:
|
||||
focus_label = interpretation.get("focus_label") or "primary angle"
|
||||
step = 3 if mode == "fast" else 4
|
||||
candidates.append(
|
||||
_open_ended_candidate(
|
||||
prompt,
|
||||
focus=str(focus_label),
|
||||
fact_pack=fact_pack,
|
||||
history_lines=history_lines,
|
||||
subjective=subjective,
|
||||
tone=str(tone),
|
||||
allow_list=allow_list,
|
||||
state=state,
|
||||
step=step,
|
||||
fact_hints=primary_ids,
|
||||
model=model,
|
||||
)
|
||||
)
|
||||
step += 1
|
||||
if mode == "deep" and alternate_ids:
|
||||
candidates.append(
|
||||
_open_ended_candidate(
|
||||
prompt,
|
||||
focus=str(angle.get("focus") or "Direct answer"),
|
||||
focus="alternate angle",
|
||||
fact_pack=fact_pack,
|
||||
history_lines=history_lines,
|
||||
subjective=subjective,
|
||||
tone=str(tone),
|
||||
allow_list=allow_list,
|
||||
state=state,
|
||||
step=step,
|
||||
fact_hints=angle.get("fact_ids") if isinstance(angle.get("fact_ids"), list) else None,
|
||||
fact_hints=alternate_ids,
|
||||
model=model,
|
||||
)
|
||||
)
|
||||
@ -3306,6 +3475,9 @@ def _open_ended_multi(
|
||||
fact_pack=fact_pack,
|
||||
history_lines=history_lines,
|
||||
candidates=selected or candidates,
|
||||
subjective=subjective,
|
||||
tone=str(tone),
|
||||
allow_list=allow_list,
|
||||
state=state,
|
||||
step=step,
|
||||
model=model,
|
||||
@ -3318,9 +3490,8 @@ def _open_ended_multi(
|
||||
|
||||
def _open_ended_total_steps(mode: str) -> int:
|
||||
if mode == "fast":
|
||||
return 2
|
||||
angle_count = 4
|
||||
return 2 + angle_count + 2 + 1
|
||||
return 4
|
||||
return 7
|
||||
|
||||
|
||||
def _open_ended_fast(
|
||||
@ -3386,8 +3557,8 @@ def open_ended_answer(
|
||||
lines.extend(tool_lines)
|
||||
if not lines:
|
||||
return _ensure_scores("I don't have enough data to answer that.")
|
||||
fact_pack = _fact_pack_text(lines)
|
||||
fact_meta = _fact_pack_meta(lines)
|
||||
fact_pack = _fact_pack_text(lines, fact_meta)
|
||||
if mode == "fast":
|
||||
return _open_ended_fast(
|
||||
prompt,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user