atlasbot: refine cluster intent handling

This commit is contained in:
Brad Stein 2026-01-27 22:44:49 -03:00
parent fc10eed704
commit 23533e08ee

View File

@ -152,6 +152,16 @@ CLUSTER_HINT_WORDS = {
"deployment",
"daemonset",
"statefulset",
"snapshot",
"anomaly",
"anomalies",
"monitor",
"monitoring",
"runbook",
"runbooks",
"documentation",
"docs",
"playbook",
"grafana",
"victoria",
"prometheus",
@ -203,6 +213,12 @@ _INSIGHT_HINT_WORDS = {
"favorite",
"favourite",
"trivia",
"anomaly",
"anomalies",
"monitor",
"monitoring",
"alert",
"alerts",
"stand out",
"stands out",
}
@ -532,7 +548,14 @@ def _humanize_rate(value: str, *, unit: str) -> str:
return f"{val:.2f} B/s"
def _has_any(text: str, phrases: tuple[str, ...]) -> bool:
return any(p in text for p in phrases)
for phrase in phrases:
if " " in phrase:
if phrase in text:
return True
else:
if re.search(rf"\\b{re.escape(phrase)}\\b", text):
return True
return False
def _detect_operation(q: str) -> str | None:
if _has_any(q, OPERATION_HINTS["top"]):
@ -552,6 +575,8 @@ def _detect_metric(q: str) -> str | None:
part = part.strip()
if len(part) >= 2:
expanded.add(part)
if part.endswith("s") and len(part) >= 4:
expanded.add(part[:-1])
tokens = expanded
for metric, phrases in METRIC_HINTS.items():
for phrase in phrases:
@ -565,6 +590,8 @@ def _detect_metric(q: str) -> str | None:
def _detect_hardware_filters(q: str) -> tuple[set[str], set[str]]:
include: set[str] = set()
exclude: set[str] = set()
if any(term in q for term in ("gpu", "gpus", "accelerator", "accelerators", "cuda", "nvidia")):
include.add("jetson")
rpi_specific = any(
phrase in q
for phrase in (
@ -1287,6 +1314,10 @@ def snapshot_metric_answer(
failed = metrics.get("pods_failed")
succeeded = metrics.get("pods_succeeded")
status_terms = ("running", "pending", "failed", "succeeded", "completed")
if "not running" in q or "not in running" in q or "non running" in q:
parts = [v for v in (pending, failed, succeeded) if isinstance(v, (int, float))]
if parts:
return _format_confidence(f"Pods not running: {sum(parts):.0f}.", "high")
if sum(1 for term in status_terms if term in q) > 1:
parts = []
if running is not None:
@ -1350,6 +1381,8 @@ def structured_answer(
op = "top"
entity = _detect_entity(q)
include_hw, exclude_hw = _detect_hardware_filters(q)
if entity is None and (include_hw or exclude_hw):
entity = "node"
nodes_in_query = _extract_titan_nodes(q)
only_workers = "worker" in q or "workers" in q
role_filters = _detect_role_filters(q)
@ -1385,6 +1418,20 @@ def structured_answer(
if hw_line:
return _format_confidence(hw_line, "medium")
if (
entity == "node"
and any(term in q for term in ("arm64", "amd64"))
and any(term in q for term in ("mostly", "majority", "more"))
):
arm64_count = len([n for n in inventory if n.get("arch") == "arm64"])
amd64_count = len([n for n in inventory if n.get("arch") == "amd64"])
if arm64_count or amd64_count:
majority = "arm64" if arm64_count >= amd64_count else "amd64"
return _format_confidence(
f"arm64 nodes: {arm64_count}, amd64 nodes: {amd64_count}. Mostly {majority}.",
"high",
)
if op == "top" and metric is None and not any(word in q for word in ("hardware", "architecture", "class")):
metric = "cpu"
@ -1491,6 +1538,27 @@ def structured_answer(
)
if op == "count":
if only_workers and "ready" in q and ("total" in q or "vs" in q or "versus" in q):
total_workers = _inventory_filter(
inventory,
include_hw=include_hw,
exclude_hw=exclude_hw,
only_workers=True,
only_ready=None,
nodes_in_query=nodes_in_query,
)
ready_workers = _inventory_filter(
inventory,
include_hw=include_hw,
exclude_hw=exclude_hw,
only_workers=True,
only_ready=True,
nodes_in_query=nodes_in_query,
)
return _format_confidence(
f"Worker nodes ready: {len(ready_workers)} / {len(total_workers)} total.",
"high",
)
if expected_workers and ("expected" in q or "should" in q):
missing = sorted(set(expected_workers) - {n["name"] for n in inventory})
msg = f"Grafana inventory expects {len(expected_workers)} worker nodes."
@ -1711,6 +1779,15 @@ def _doc_intent(query: str) -> bool:
"how to",
"instructions",
"playbook",
"next step",
"next steps",
"what should",
"what do i",
"what to do",
"troubleshoot",
"triage",
"recover",
"remediate",
)
)
@ -2615,10 +2692,13 @@ def _candidate_note(candidate: dict[str, Any]) -> str:
def _ensure_scores(answer: str) -> str:
text = answer.strip()
lines = [line.strip() for line in text.splitlines() if line.strip()]
has_relevance = any(line.lower().startswith("relevance") for line in lines)
has_satisfaction = any(line.lower().startswith("satisfaction") for line in lines)
has_confidence = any(line.lower().startswith("confidence") for line in lines)
has_risk = any(line.lower().startswith("hallucinationrisk") for line in lines)
def _score_key(line: str) -> str:
cleaned = line.strip().lstrip("-•* ").strip()
return cleaned.lower()
has_relevance = any(_score_key(line).startswith("relevance") for line in lines)
has_satisfaction = any(_score_key(line).startswith("satisfaction") for line in lines)
has_confidence = any(_score_key(line).startswith("confidence") for line in lines)
has_risk = any(_score_key(line).startswith("hallucinationrisk") for line in lines)
if not has_confidence:
lines.append("Confidence: medium")
if not has_relevance:
@ -3004,6 +3084,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
_is_subjective_query(cleaned)
or _knowledge_intent(cleaned)
or _is_overview_query(cleaned)
or _doc_intent(cleaned)
)
if open_ended:
answer = open_ended_answer(
@ -3558,6 +3639,7 @@ def sync_loop(token: str, room_id: str):
_is_subjective_query(cleaned_body)
or _knowledge_intent(cleaned_body)
or _is_overview_query(cleaned_body)
or _doc_intent(cleaned_body)
)
if open_ended:
reply = open_ended_with_thinking(