atlasbot: add metric prefix selection and redundancy

2026-02-02 18:03:20 -03:00 · 2026-02-02 18:03:20 -03:00 · b86c1097f7
commit b86c1097f7
parent d48f679d47
2 changed files with 172 additions and 2 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -73,6 +73,8 @@ class ModePlan:
    use_gap: bool
    use_scores: bool
    drafts: int
+    metric_retries: int
+    subanswer_retries: int


 class AnswerEngine:
@ -294,6 +296,18 @@ class AnswerEngine:
                hottest_facts = _extract_hottest_facts(summary_lines, f"{question} {normalized}")
                hardware_facts = _extract_hardware_usage_facts(summary_lines, f"{question} {normalized}")
                hotspot_line = next((line for line in summary_lines if line.startswith("hottest:")), None)
+                forced_metric_lines: list[str] = []
+                if classify.get("question_type") in {"metric", "diagnostic"}:
+                    metric_prefixes = _metric_prefixes_from_lines(summary_lines)
+                    selected_prefixes = await _select_metric_prefixes(
+                        call_llm,
+                        normalized,
+                        metric_prefixes,
+                        plan,
+                        plan.metric_retries,
+                    )
+                    if selected_prefixes:
+                        forced_metric_lines = _lines_for_prefixes(summary_lines, selected_prefixes)
                if not hardware_facts:
                    hardware_tokens = ("hardware", "class", "type", "rpi", "jetson", "amd64", "arm64")
                    lowered_q = f"{question} {normalized}".lower()
@ -353,6 +367,10 @@ class AnswerEngine:
                                break
                if classify.get("question_type") in {"metric", "diagnostic"} and not hottest_facts and not hardware_facts:
                    metric_candidates = _metric_candidate_lines(summary_lines, keyword_tokens)
+                    if forced_metric_lines:
+                        metric_candidates = forced_metric_lines + [
+                            line for line in metric_candidates if line not in forced_metric_lines
+                        ]
                    selected_facts = await _select_metric_facts(call_llm, normalized, metric_candidates, plan)
                    if selected_facts:
                        metric_facts = selected_facts
@ -396,6 +414,9 @@ class AnswerEngine:
                if hotspot_request and hotspot_line:
                    metric_facts = [hotspot_line]
                    key_facts = _merge_fact_lines(metric_facts, key_facts)
+                if forced_metric_lines:
+                    metric_facts = _merge_fact_lines(forced_metric_lines, metric_facts)
+                    key_facts = _merge_fact_lines(forced_metric_lines, key_facts)
                if self._settings.debug_pipeline:
                    scored_preview = sorted(
                        [{"id": c["id"], "score": scored.get(c["id"], 0.0), "summary": c["summary"]} for c in chunks],
@ -430,8 +451,28 @@ class AnswerEngine:
            subanswers: list[str] = []
            for subq in sub_questions:
                sub_prompt = prompts.SUBANSWER_PROMPT + "\nQuestion: " + subq
-                sub_answer = await call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer")
-                subanswers.append(sub_answer)
+                if plan.subanswer_retries > 1:
+                    candidates: list[str] = []
+                    for _ in range(plan.subanswer_retries):
+                        candidate = await call_llm(
+                            prompts.ANSWER_SYSTEM,
+                            sub_prompt,
+                            context=context,
+                            model=plan.model,
+                            tag="subanswer",
+                        )
+                        candidates.append(candidate)
+                    best_idx = await _select_best_candidate(call_llm, subq, candidates, plan, "subanswer_select")
+                    subanswers.append(candidates[best_idx])
+                else:
+                    sub_answer = await call_llm(
+                        prompts.ANSWER_SYSTEM,
+                        sub_prompt,
+                        context=context,
+                        model=plan.model,
+                        tag="subanswer",
+                    )
+                    subanswers.append(sub_answer)

            if observer:
                observer("synthesize", "synthesizing")
@ -1180,6 +1221,8 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
            use_gap=True,
            use_scores=True,
            drafts=2,
+            metric_retries=3,
+            subanswer_retries=3,
        )
    if mode == "smart":
        return ModePlan(
@ -1194,6 +1237,8 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
            use_gap=True,
            use_scores=True,
            drafts=1,
+            metric_retries=2,
+            subanswer_retries=2,
        )
    return ModePlan(
        model=settings.ollama_model_fast,
@ -1207,6 +1252,8 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan:
        use_gap=False,
        use_scores=False,
        drafts=1,
+        metric_retries=1,
+        subanswer_retries=1,
    )


@ -1630,6 +1677,107 @@ def _metric_candidate_lines(lines: list[str], keywords: list[str] | None, limit:
    return candidates[:limit]


+def _metric_prefixes_from_lines(lines: list[str]) -> list[str]:
+    if not lines:
+        return []
+    prefixes: list[str] = []
+    for line in lines:
+        segments = [seg.strip() for seg in line.split(" | ")] if " | " in line else [line]
+        for seg in segments:
+            match = re.match(r"^([a-z0-9_]+):", seg)
+            if match:
+                prefix = match.group(1)
+                if prefix not in prefixes:
+                    prefixes.append(prefix)
+    return prefixes
+
+
+def _lines_for_prefixes(lines: list[str], prefixes: list[str]) -> list[str]:
+    if not lines or not prefixes:
+        return []
+    wanted = set(prefixes)
+    selected: list[str] = []
+    for line in lines:
+        segments = [seg.strip() for seg in line.split(" | ")] if " | " in line else [line]
+        for seg in segments:
+            match = re.match(r"^([a-z0-9_]+):", seg)
+            if match and match.group(1) in wanted:
+                if seg not in selected:
+                    selected.append(seg)
+    return selected
+
+
+async def _select_best_candidate(
+    call_llm: Callable[..., Any],
+    question: str,
+    candidates: list[str],
+    plan: ModePlan,
+    tag: str,
+) -> int:
+    if len(candidates) <= 1:
+        return 0
+    prompt = (
+        prompts.CANDIDATE_SELECT_PROMPT
+        + "\nQuestion: "
+        + question
+        + "\nCandidates:\n"
+        + "\n".join([f"{idx+1}) {cand}" for idx, cand in enumerate(candidates)])
+    )
+    raw = await call_llm(prompts.CANDIDATE_SELECT_SYSTEM, prompt, model=plan.model, tag=tag)
+    data = _parse_json_block(raw, fallback={})
+    best = data.get("best") if isinstance(data, dict) else None
+    if isinstance(best, int) and 1 <= best <= len(candidates):
+        return best - 1
+    return 0
+
+
+async def _select_metric_prefixes(
+    call_llm: Callable[..., Any],
+    question: str,
+    prefixes: list[str],
+    plan: ModePlan,
+    attempts: int,
+) -> list[str]:
+    if not prefixes:
+        return []
+    prompt = (
+        prompts.METRIC_PREFIX_PROMPT
+        + "\nQuestion: "
+        + question
+        + "\nAvailablePrefixes:\n"
+        + ", ".join(prefixes)
+    )
+    candidates: list[list[str]] = []
+    for _ in range(max(attempts, 1)):
+        raw = await call_llm(prompts.METRIC_PREFIX_SYSTEM, prompt, model=plan.fast_model, tag="metric_prefix")
+        data = _parse_json_block(raw, fallback={})
+        picked = data.get("prefixes") if isinstance(data, dict) else None
+        if not isinstance(picked, list):
+            continue
+        cleaned: list[str] = []
+        allowed = set(prefixes)
+        for item in picked:
+            if isinstance(item, str) and item in allowed and item not in cleaned:
+                cleaned.append(item)
+        if cleaned:
+            candidates.append(cleaned)
+    if not candidates:
+        return []
+    if len(candidates) == 1:
+        return candidates[0]
+    render = ["; ".join(items) for items in candidates]
+    best_idx = await _select_best_candidate(call_llm, question, render, plan, "metric_prefix_select")
+    chosen = candidates[best_idx] if 0 <= best_idx < len(candidates) else candidates[0]
+    if not chosen:
+        merged: list[str] = []
+        for entry in candidates:
+            for item in entry:
+                if item not in merged:
+                    merged.append(item)
+        chosen = merged
+    return chosen[:8]
+
+
 async def _select_metric_facts(
    call_llm: Callable[..., Any],
    question: str,
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -58,6 +58,17 @@ CHUNK_SCORE_PROMPT = (
    "Return JSON list of objects with: id, score, reason (<=12 words)."
 )

+METRIC_PREFIX_SYSTEM = (
+    CLUSTER_SYSTEM
+    + " Select relevant metric prefixes from the available list. "
+    + "Return JSON only."
+)
+
+METRIC_PREFIX_PROMPT = (
+    "Return JSON with field: prefixes (list). "
+    "Only use values from AvailablePrefixes."
+)
+
 TOOL_SYSTEM = (
    CLUSTER_SYSTEM
    + " Suggest a safe, read-only command that could refine the answer. "
@ -143,6 +154,17 @@ DRAFT_SELECT_PROMPT = (
    "Return JSON with field: best (1-based index)."
 )

+CANDIDATE_SELECT_SYSTEM = (
+    CLUSTER_SYSTEM
+    + " Pick the best candidate for accuracy and evidence use. "
+    + "Return JSON only."
+)
+
+CANDIDATE_SELECT_PROMPT = (
+    "Pick the best candidate for accuracy and grounding. "
+    "Return JSON with field: best (1-based index)."
+)
+
 CRITIC_SYSTEM = (
    CLUSTER_SYSTEM
    + " Critique answers for unsupported claims or missing context. "