From b86c1097f7d05879db95236cde59fb49a990547f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 2 Feb 2026 18:03:20 -0300 Subject: [PATCH] atlasbot: add metric prefix selection and redundancy --- atlasbot/engine/answerer.py | 152 +++++++++++++++++++++++++++++++++++- atlasbot/llm/prompts.py | 22 ++++++ 2 files changed, 172 insertions(+), 2 deletions(-) diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 381c8f5..8f0d57f 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -73,6 +73,8 @@ class ModePlan: use_gap: bool use_scores: bool drafts: int + metric_retries: int + subanswer_retries: int class AnswerEngine: @@ -294,6 +296,18 @@ class AnswerEngine: hottest_facts = _extract_hottest_facts(summary_lines, f"{question} {normalized}") hardware_facts = _extract_hardware_usage_facts(summary_lines, f"{question} {normalized}") hotspot_line = next((line for line in summary_lines if line.startswith("hottest:")), None) + forced_metric_lines: list[str] = [] + if classify.get("question_type") in {"metric", "diagnostic"}: + metric_prefixes = _metric_prefixes_from_lines(summary_lines) + selected_prefixes = await _select_metric_prefixes( + call_llm, + normalized, + metric_prefixes, + plan, + plan.metric_retries, + ) + if selected_prefixes: + forced_metric_lines = _lines_for_prefixes(summary_lines, selected_prefixes) if not hardware_facts: hardware_tokens = ("hardware", "class", "type", "rpi", "jetson", "amd64", "arm64") lowered_q = f"{question} {normalized}".lower() @@ -353,6 +367,10 @@ class AnswerEngine: break if classify.get("question_type") in {"metric", "diagnostic"} and not hottest_facts and not hardware_facts: metric_candidates = _metric_candidate_lines(summary_lines, keyword_tokens) + if forced_metric_lines: + metric_candidates = forced_metric_lines + [ + line for line in metric_candidates if line not in forced_metric_lines + ] selected_facts = await _select_metric_facts(call_llm, normalized, metric_candidates, plan) if selected_facts: metric_facts = selected_facts @@ -396,6 +414,9 @@ class AnswerEngine: if hotspot_request and hotspot_line: metric_facts = [hotspot_line] key_facts = _merge_fact_lines(metric_facts, key_facts) + if forced_metric_lines: + metric_facts = _merge_fact_lines(forced_metric_lines, metric_facts) + key_facts = _merge_fact_lines(forced_metric_lines, key_facts) if self._settings.debug_pipeline: scored_preview = sorted( [{"id": c["id"], "score": scored.get(c["id"], 0.0), "summary": c["summary"]} for c in chunks], @@ -430,8 +451,28 @@ class AnswerEngine: subanswers: list[str] = [] for subq in sub_questions: sub_prompt = prompts.SUBANSWER_PROMPT + "\nQuestion: " + subq - sub_answer = await call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer") - subanswers.append(sub_answer) + if plan.subanswer_retries > 1: + candidates: list[str] = [] + for _ in range(plan.subanswer_retries): + candidate = await call_llm( + prompts.ANSWER_SYSTEM, + sub_prompt, + context=context, + model=plan.model, + tag="subanswer", + ) + candidates.append(candidate) + best_idx = await _select_best_candidate(call_llm, subq, candidates, plan, "subanswer_select") + subanswers.append(candidates[best_idx]) + else: + sub_answer = await call_llm( + prompts.ANSWER_SYSTEM, + sub_prompt, + context=context, + model=plan.model, + tag="subanswer", + ) + subanswers.append(sub_answer) if observer: observer("synthesize", "synthesizing") @@ -1180,6 +1221,8 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan: use_gap=True, use_scores=True, drafts=2, + metric_retries=3, + subanswer_retries=3, ) if mode == "smart": return ModePlan( @@ -1194,6 +1237,8 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan: use_gap=True, use_scores=True, drafts=1, + metric_retries=2, + subanswer_retries=2, ) return ModePlan( model=settings.ollama_model_fast, @@ -1207,6 +1252,8 @@ def _mode_plan(settings: Settings, mode: str) -> ModePlan: use_gap=False, use_scores=False, drafts=1, + metric_retries=1, + subanswer_retries=1, ) @@ -1630,6 +1677,107 @@ def _metric_candidate_lines(lines: list[str], keywords: list[str] | None, limit: return candidates[:limit] +def _metric_prefixes_from_lines(lines: list[str]) -> list[str]: + if not lines: + return [] + prefixes: list[str] = [] + for line in lines: + segments = [seg.strip() for seg in line.split(" | ")] if " | " in line else [line] + for seg in segments: + match = re.match(r"^([a-z0-9_]+):", seg) + if match: + prefix = match.group(1) + if prefix not in prefixes: + prefixes.append(prefix) + return prefixes + + +def _lines_for_prefixes(lines: list[str], prefixes: list[str]) -> list[str]: + if not lines or not prefixes: + return [] + wanted = set(prefixes) + selected: list[str] = [] + for line in lines: + segments = [seg.strip() for seg in line.split(" | ")] if " | " in line else [line] + for seg in segments: + match = re.match(r"^([a-z0-9_]+):", seg) + if match and match.group(1) in wanted: + if seg not in selected: + selected.append(seg) + return selected + + +async def _select_best_candidate( + call_llm: Callable[..., Any], + question: str, + candidates: list[str], + plan: ModePlan, + tag: str, +) -> int: + if len(candidates) <= 1: + return 0 + prompt = ( + prompts.CANDIDATE_SELECT_PROMPT + + "\nQuestion: " + + question + + "\nCandidates:\n" + + "\n".join([f"{idx+1}) {cand}" for idx, cand in enumerate(candidates)]) + ) + raw = await call_llm(prompts.CANDIDATE_SELECT_SYSTEM, prompt, model=plan.model, tag=tag) + data = _parse_json_block(raw, fallback={}) + best = data.get("best") if isinstance(data, dict) else None + if isinstance(best, int) and 1 <= best <= len(candidates): + return best - 1 + return 0 + + +async def _select_metric_prefixes( + call_llm: Callable[..., Any], + question: str, + prefixes: list[str], + plan: ModePlan, + attempts: int, +) -> list[str]: + if not prefixes: + return [] + prompt = ( + prompts.METRIC_PREFIX_PROMPT + + "\nQuestion: " + + question + + "\nAvailablePrefixes:\n" + + ", ".join(prefixes) + ) + candidates: list[list[str]] = [] + for _ in range(max(attempts, 1)): + raw = await call_llm(prompts.METRIC_PREFIX_SYSTEM, prompt, model=plan.fast_model, tag="metric_prefix") + data = _parse_json_block(raw, fallback={}) + picked = data.get("prefixes") if isinstance(data, dict) else None + if not isinstance(picked, list): + continue + cleaned: list[str] = [] + allowed = set(prefixes) + for item in picked: + if isinstance(item, str) and item in allowed and item not in cleaned: + cleaned.append(item) + if cleaned: + candidates.append(cleaned) + if not candidates: + return [] + if len(candidates) == 1: + return candidates[0] + render = ["; ".join(items) for items in candidates] + best_idx = await _select_best_candidate(call_llm, question, render, plan, "metric_prefix_select") + chosen = candidates[best_idx] if 0 <= best_idx < len(candidates) else candidates[0] + if not chosen: + merged: list[str] = [] + for entry in candidates: + for item in entry: + if item not in merged: + merged.append(item) + chosen = merged + return chosen[:8] + + async def _select_metric_facts( call_llm: Callable[..., Any], question: str, diff --git a/atlasbot/llm/prompts.py b/atlasbot/llm/prompts.py index a47f38e..211df68 100644 --- a/atlasbot/llm/prompts.py +++ b/atlasbot/llm/prompts.py @@ -58,6 +58,17 @@ CHUNK_SCORE_PROMPT = ( "Return JSON list of objects with: id, score, reason (<=12 words)." ) +METRIC_PREFIX_SYSTEM = ( + CLUSTER_SYSTEM + + " Select relevant metric prefixes from the available list. " + + "Return JSON only." +) + +METRIC_PREFIX_PROMPT = ( + "Return JSON with field: prefixes (list). " + "Only use values from AvailablePrefixes." +) + TOOL_SYSTEM = ( CLUSTER_SYSTEM + " Suggest a safe, read-only command that could refine the answer. " @@ -143,6 +154,17 @@ DRAFT_SELECT_PROMPT = ( "Return JSON with field: best (1-based index)." ) +CANDIDATE_SELECT_SYSTEM = ( + CLUSTER_SYSTEM + + " Pick the best candidate for accuracy and evidence use. " + + "Return JSON only." +) + +CANDIDATE_SELECT_PROMPT = ( + "Pick the best candidate for accuracy and grounding. " + "Return JSON with field: best (1-based index)." +) + CRITIC_SYSTEM = ( CLUSTER_SYSTEM + " Critique answers for unsupported claims or missing context. "