atlasbot/atlasbot/engine/answerer/factsheet.py

from __future__ import annotations

import json
import re
from typing import Any

from ._base import *

MAX_FACT_LINE_CHARS = 180
MAX_KB_LINE_CHARS = 220


def _factsheet_kb_chars(mode: str, default_chars: int) -> int:
    if mode == "genius":
        return min(max(default_chars, 4000), 6000)
    if mode == "smart":
        return min(max(default_chars, 3000), 4500)
    return max(1200, default_chars)


def _factsheet_line_limit(mode: str) -> int:
    if mode == "genius":
        return 30
    if mode == "smart":
        return 22
    return 14


def _factsheet_instruction(mode: str) -> str:
    if mode == "genius":
        return (
            "Start with a direct conclusion, then include the strongest supporting facts and one caveat. "
            "Keep it to 4-8 sentences. If data is missing, name the missing metric explicitly."
        )
    if mode == "smart":
        return (
            "Start with a direct conclusion and support it with key facts. Keep it to 2-5 sentences. "
            "If data is missing, say exactly what is missing and suggest atlas-genius."
        )
    return "Keep it to 1-3 sentences. If key data is missing, say what is missing and suggest atlas-smart."


def _factsheet_model(mode: str, plan: ModePlan) -> str:
    if mode in {"quick", "fast"}:
        return plan.fast_model
    return plan.model


def _is_plain_math_question(question: str) -> bool:
    lowered = question.lower().strip()
    if not lowered:
        return False
    cluster_markers = (
        "titan",
        "atlas",
        "cluster",
        "node",
        "pod",
        "namespace",
        "workload",
        "grafana",
        "alert",
        "k8s",
        "kubernetes",
        "rpi",
        "longhorn",
        "postgres",
        "victoria",
        "ollama",
    )
    if any(token in lowered for token in cluster_markers):
        return False
    return bool(
        re.fullmatch(r"[0-9\s+\-*/().=]+", lowered)
        or re.search(r"\bwhat(?:'s| is)\s+\d+\s*[-+*/]\s*\d+\b", lowered)
    )


def _quick_fact_sheet_lines(question: str, summary_lines: list[str], kb_lines: list[str], *, limit: int) -> list[str]:  # noqa: C901
    tokens = {
        token
        for token in re.findall(r"[a-z0-9][a-z0-9_-]{2,}", question.lower())
        if token not in GENERIC_METRIC_TOKENS
    }
    priority_markers = (
        "snapshot:",
        "nodes_total",
        "nodes_ready",
        "nodes_not_ready",
        "workers_ready",
        "workers_not_ready",
        "control_plane",
        "worker_nodes",
        "hottest",
        "postgres",
        "pods",
        "longhorn",
        "titan-",
        "rpi5",
        "rpi4",
        "jetson",
        "amd64",
    )
    scored: list[tuple[int, str]] = []
    for raw in summary_lines:
        line = raw.strip()
        if not line:
            continue
        lowered = line.lower()
        score = 0
        if any(marker in lowered for marker in priority_markers):
            score += 4
        overlap = sum(1 for token in tokens if token in lowered)
        score += overlap * 3
        if len(line) <= MAX_FACT_LINE_CHARS:
            score += 1
        if score > 0:
            scored.append((score, line))

    scored.sort(key=lambda item: item[0], reverse=True)
    selected = [line for _, line in scored[:limit]]
    if not selected:
        selected = [line.strip() for line in summary_lines if line.strip()][:limit]

    kb_selected: list[str] = []
    for raw in kb_lines:
        line = raw.strip()
        if not line or len(line) > MAX_KB_LINE_CHARS:
            continue
        lowered = line.lower()
        if "kb file:" in lowered or "kb: atlas.json" in lowered:
            continue
        overlap = sum(1 for token in tokens if token in lowered)
        if overlap > 0 or any(marker in lowered for marker in ("runbook", "titan-", "rpi5", "rpi4", "amd64", "jetson")):
            kb_selected.append(line)
        if len(kb_selected) >= max(4, limit // 3):
            break

    merged = []
    seen: set[str] = set()
    for line in selected + kb_selected:
        if line not in seen:
            seen.add(line)
            merged.append(line)
        if len(merged) >= limit:
            break
    return merged


def _quick_fact_sheet_text(lines: list[str]) -> str:
    if not lines:
        return "Fact Sheet:\n- No snapshot facts available."
    body = "\n".join([f"- {line}" for line in lines])
    return "Fact Sheet:\n" + body


def _quick_fact_sheet_heuristic_answer(question: str, fact_lines: list[str]) -> str:
    lowered = question.lower()
    if (
        any(token in lowered for token in ("placement", "schedule", "last resort", "last-resort"))
        and any(token in lowered for token in ("node", "workload", "worker", "titan"))
    ):
        return (
            "General workload placement is: prefer rpi5 workers first, then rpi4 workers. "
            "titan-22 is the last-resort general compute node, and titan-24 is the absolute last resort "
            "reserved for heavy one-offs."
        )

    for line in fact_lines:
        compact = line.replace(" ", "")
        match = re.search(r"nodes_total[:=](\d+),ready[:=](\d+),not_ready[:=](\d+)", compact)
        if not match:
            continue
        total = match.group(1)
        ready = match.group(2)
        not_ready = match.group(3)
        if "how many" in lowered and "ready" in lowered and "node" in lowered:
            return f"The latest snapshot shows {ready} ready nodes out of {total} total ({not_ready} not ready)."
        if ("not ready" in lowered or "unready" in lowered) and "node" in lowered:
            return f"The latest snapshot shows {not_ready} not-ready nodes ({ready} ready out of {total} total)."
    return ""


def _json_excerpt(summary: dict[str, Any], max_chars: int = 12000) -> str:
    raw = json.dumps(summary, ensure_ascii=False)
    return raw[:max_chars]


__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]