diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 9ecd06d..f85b81a 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -152,6 +152,16 @@ CLUSTER_HINT_WORDS = { "deployment", "daemonset", "statefulset", + "snapshot", + "anomaly", + "anomalies", + "monitor", + "monitoring", + "runbook", + "runbooks", + "documentation", + "docs", + "playbook", "grafana", "victoria", "prometheus", @@ -203,6 +213,12 @@ _INSIGHT_HINT_WORDS = { "favorite", "favourite", "trivia", + "anomaly", + "anomalies", + "monitor", + "monitoring", + "alert", + "alerts", "stand out", "stands out", } @@ -532,7 +548,14 @@ def _humanize_rate(value: str, *, unit: str) -> str: return f"{val:.2f} B/s" def _has_any(text: str, phrases: tuple[str, ...]) -> bool: - return any(p in text for p in phrases) + for phrase in phrases: + if " " in phrase: + if phrase in text: + return True + else: + if re.search(rf"\\b{re.escape(phrase)}\\b", text): + return True + return False def _detect_operation(q: str) -> str | None: if _has_any(q, OPERATION_HINTS["top"]): @@ -552,6 +575,8 @@ def _detect_metric(q: str) -> str | None: part = part.strip() if len(part) >= 2: expanded.add(part) + if part.endswith("s") and len(part) >= 4: + expanded.add(part[:-1]) tokens = expanded for metric, phrases in METRIC_HINTS.items(): for phrase in phrases: @@ -565,6 +590,8 @@ def _detect_metric(q: str) -> str | None: def _detect_hardware_filters(q: str) -> tuple[set[str], set[str]]: include: set[str] = set() exclude: set[str] = set() + if any(term in q for term in ("gpu", "gpus", "accelerator", "accelerators", "cuda", "nvidia")): + include.add("jetson") rpi_specific = any( phrase in q for phrase in ( @@ -1287,6 +1314,10 @@ def snapshot_metric_answer( failed = metrics.get("pods_failed") succeeded = metrics.get("pods_succeeded") status_terms = ("running", "pending", "failed", "succeeded", "completed") + if "not running" in q or "not in running" in q or "non running" in q: + parts = [v for v in (pending, failed, succeeded) if isinstance(v, (int, float))] + if parts: + return _format_confidence(f"Pods not running: {sum(parts):.0f}.", "high") if sum(1 for term in status_terms if term in q) > 1: parts = [] if running is not None: @@ -1350,6 +1381,8 @@ def structured_answer( op = "top" entity = _detect_entity(q) include_hw, exclude_hw = _detect_hardware_filters(q) + if entity is None and (include_hw or exclude_hw): + entity = "node" nodes_in_query = _extract_titan_nodes(q) only_workers = "worker" in q or "workers" in q role_filters = _detect_role_filters(q) @@ -1385,6 +1418,20 @@ def structured_answer( if hw_line: return _format_confidence(hw_line, "medium") + if ( + entity == "node" + and any(term in q for term in ("arm64", "amd64")) + and any(term in q for term in ("mostly", "majority", "more")) + ): + arm64_count = len([n for n in inventory if n.get("arch") == "arm64"]) + amd64_count = len([n for n in inventory if n.get("arch") == "amd64"]) + if arm64_count or amd64_count: + majority = "arm64" if arm64_count >= amd64_count else "amd64" + return _format_confidence( + f"arm64 nodes: {arm64_count}, amd64 nodes: {amd64_count}. Mostly {majority}.", + "high", + ) + if op == "top" and metric is None and not any(word in q for word in ("hardware", "architecture", "class")): metric = "cpu" @@ -1491,6 +1538,27 @@ def structured_answer( ) if op == "count": + if only_workers and "ready" in q and ("total" in q or "vs" in q or "versus" in q): + total_workers = _inventory_filter( + inventory, + include_hw=include_hw, + exclude_hw=exclude_hw, + only_workers=True, + only_ready=None, + nodes_in_query=nodes_in_query, + ) + ready_workers = _inventory_filter( + inventory, + include_hw=include_hw, + exclude_hw=exclude_hw, + only_workers=True, + only_ready=True, + nodes_in_query=nodes_in_query, + ) + return _format_confidence( + f"Worker nodes ready: {len(ready_workers)} / {len(total_workers)} total.", + "high", + ) if expected_workers and ("expected" in q or "should" in q): missing = sorted(set(expected_workers) - {n["name"] for n in inventory}) msg = f"Grafana inventory expects {len(expected_workers)} worker nodes." @@ -1711,6 +1779,15 @@ def _doc_intent(query: str) -> bool: "how to", "instructions", "playbook", + "next step", + "next steps", + "what should", + "what do i", + "what to do", + "troubleshoot", + "triage", + "recover", + "remediate", ) ) @@ -2615,10 +2692,13 @@ def _candidate_note(candidate: dict[str, Any]) -> str: def _ensure_scores(answer: str) -> str: text = answer.strip() lines = [line.strip() for line in text.splitlines() if line.strip()] - has_relevance = any(line.lower().startswith("relevance") for line in lines) - has_satisfaction = any(line.lower().startswith("satisfaction") for line in lines) - has_confidence = any(line.lower().startswith("confidence") for line in lines) - has_risk = any(line.lower().startswith("hallucinationrisk") for line in lines) + def _score_key(line: str) -> str: + cleaned = line.strip().lstrip("-•* ").strip() + return cleaned.lower() + has_relevance = any(_score_key(line).startswith("relevance") for line in lines) + has_satisfaction = any(_score_key(line).startswith("satisfaction") for line in lines) + has_confidence = any(_score_key(line).startswith("confidence") for line in lines) + has_risk = any(_score_key(line).startswith("hallucinationrisk") for line in lines) if not has_confidence: lines.append("Confidence: medium") if not has_relevance: @@ -3004,6 +3084,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler): _is_subjective_query(cleaned) or _knowledge_intent(cleaned) or _is_overview_query(cleaned) + or _doc_intent(cleaned) ) if open_ended: answer = open_ended_answer( @@ -3558,6 +3639,7 @@ def sync_loop(token: str, room_id: str): _is_subjective_query(cleaned_body) or _knowledge_intent(cleaned_body) or _is_overview_query(cleaned_body) + or _doc_intent(cleaned_body) ) if open_ended: reply = open_ended_with_thinking(