190 lines
5.9 KiB
Python
190 lines
5.9 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from typing import Any
|
|
|
|
from ._base import *
|
|
|
|
MAX_FACT_LINE_CHARS = 180
|
|
MAX_KB_LINE_CHARS = 220
|
|
|
|
|
|
def _factsheet_kb_chars(mode: str, default_chars: int) -> int:
|
|
if mode == "genius":
|
|
return min(max(default_chars, 4000), 6000)
|
|
if mode == "smart":
|
|
return min(max(default_chars, 3000), 4500)
|
|
return max(1200, default_chars)
|
|
|
|
|
|
def _factsheet_line_limit(mode: str) -> int:
|
|
if mode == "genius":
|
|
return 30
|
|
if mode == "smart":
|
|
return 22
|
|
return 14
|
|
|
|
|
|
def _factsheet_instruction(mode: str) -> str:
|
|
if mode == "genius":
|
|
return (
|
|
"Start with a direct conclusion, then include the strongest supporting facts and one caveat. "
|
|
"Keep it to 4-8 sentences. If data is missing, name the missing metric explicitly."
|
|
)
|
|
if mode == "smart":
|
|
return (
|
|
"Start with a direct conclusion and support it with key facts. Keep it to 2-5 sentences. "
|
|
"If data is missing, say exactly what is missing and suggest atlas-genius."
|
|
)
|
|
return "Keep it to 1-3 sentences. If key data is missing, say what is missing and suggest atlas-smart."
|
|
|
|
|
|
def _factsheet_model(mode: str, plan: ModePlan) -> str:
|
|
if mode in {"quick", "fast"}:
|
|
return plan.fast_model
|
|
return plan.model
|
|
|
|
|
|
def _is_plain_math_question(question: str) -> bool:
|
|
lowered = question.lower().strip()
|
|
if not lowered:
|
|
return False
|
|
cluster_markers = (
|
|
"titan",
|
|
"atlas",
|
|
"cluster",
|
|
"node",
|
|
"pod",
|
|
"namespace",
|
|
"workload",
|
|
"grafana",
|
|
"alert",
|
|
"k8s",
|
|
"kubernetes",
|
|
"rpi",
|
|
"longhorn",
|
|
"postgres",
|
|
"victoria",
|
|
"ollama",
|
|
)
|
|
if any(token in lowered for token in cluster_markers):
|
|
return False
|
|
return bool(
|
|
re.fullmatch(r"[0-9\s+\-*/().=]+", lowered)
|
|
or re.search(r"\bwhat(?:'s| is)\s+\d+\s*[-+*/]\s*\d+\b", lowered)
|
|
)
|
|
|
|
|
|
def _quick_fact_sheet_lines(question: str, summary_lines: list[str], kb_lines: list[str], *, limit: int) -> list[str]: # noqa: C901
|
|
tokens = {
|
|
token
|
|
for token in re.findall(r"[a-z0-9][a-z0-9_-]{2,}", question.lower())
|
|
if token not in GENERIC_METRIC_TOKENS
|
|
}
|
|
priority_markers = (
|
|
"snapshot:",
|
|
"nodes_total",
|
|
"nodes_ready",
|
|
"nodes_not_ready",
|
|
"workers_ready",
|
|
"workers_not_ready",
|
|
"control_plane",
|
|
"worker_nodes",
|
|
"hottest",
|
|
"postgres",
|
|
"pods",
|
|
"longhorn",
|
|
"titan-",
|
|
"rpi5",
|
|
"rpi4",
|
|
"jetson",
|
|
"amd64",
|
|
)
|
|
scored: list[tuple[int, str]] = []
|
|
for raw in summary_lines:
|
|
line = raw.strip()
|
|
if not line:
|
|
continue
|
|
lowered = line.lower()
|
|
score = 0
|
|
if any(marker in lowered for marker in priority_markers):
|
|
score += 4
|
|
overlap = sum(1 for token in tokens if token in lowered)
|
|
score += overlap * 3
|
|
if len(line) <= MAX_FACT_LINE_CHARS:
|
|
score += 1
|
|
if score > 0:
|
|
scored.append((score, line))
|
|
|
|
scored.sort(key=lambda item: item[0], reverse=True)
|
|
selected = [line for _, line in scored[:limit]]
|
|
if not selected:
|
|
selected = [line.strip() for line in summary_lines if line.strip()][:limit]
|
|
|
|
kb_selected: list[str] = []
|
|
for raw in kb_lines:
|
|
line = raw.strip()
|
|
if not line or len(line) > MAX_KB_LINE_CHARS:
|
|
continue
|
|
lowered = line.lower()
|
|
if "kb file:" in lowered or "kb: atlas.json" in lowered:
|
|
continue
|
|
overlap = sum(1 for token in tokens if token in lowered)
|
|
if overlap > 0 or any(marker in lowered for marker in ("runbook", "titan-", "rpi5", "rpi4", "amd64", "jetson")):
|
|
kb_selected.append(line)
|
|
if len(kb_selected) >= max(4, limit // 3):
|
|
break
|
|
|
|
merged = []
|
|
seen: set[str] = set()
|
|
for line in selected + kb_selected:
|
|
if line not in seen:
|
|
seen.add(line)
|
|
merged.append(line)
|
|
if len(merged) >= limit:
|
|
break
|
|
return merged
|
|
|
|
|
|
def _quick_fact_sheet_text(lines: list[str]) -> str:
|
|
if not lines:
|
|
return "Fact Sheet:\n- No snapshot facts available."
|
|
body = "\n".join([f"- {line}" for line in lines])
|
|
return "Fact Sheet:\n" + body
|
|
|
|
|
|
def _quick_fact_sheet_heuristic_answer(question: str, fact_lines: list[str]) -> str:
|
|
lowered = question.lower()
|
|
if (
|
|
any(token in lowered for token in ("placement", "schedule", "last resort", "last-resort"))
|
|
and any(token in lowered for token in ("node", "workload", "worker", "titan"))
|
|
):
|
|
return (
|
|
"General workload placement is: prefer rpi5 workers first, then rpi4 workers. "
|
|
"titan-22 is the last-resort general compute node, and titan-24 is the absolute last resort "
|
|
"reserved for heavy one-offs."
|
|
)
|
|
|
|
for line in fact_lines:
|
|
compact = line.replace(" ", "")
|
|
match = re.search(r"nodes_total[:=](\d+),ready[:=](\d+),not_ready[:=](\d+)", compact)
|
|
if not match:
|
|
continue
|
|
total = match.group(1)
|
|
ready = match.group(2)
|
|
not_ready = match.group(3)
|
|
if "how many" in lowered and "ready" in lowered and "node" in lowered:
|
|
return f"The latest snapshot shows {ready} ready nodes out of {total} total ({not_ready} not ready)."
|
|
if ("not ready" in lowered or "unready" in lowered) and "node" in lowered:
|
|
return f"The latest snapshot shows {not_ready} not-ready nodes ({ready} ready out of {total} total)."
|
|
return ""
|
|
|
|
|
|
def _json_excerpt(summary: dict[str, Any], max_chars: int = 12000) -> str:
|
|
raw = json.dumps(summary, ensure_ascii=False)
|
|
return raw[:max_chars]
|
|
|
|
|
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|