From 7b1c891e70649529ae7de93c3a716c20b5991d0d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 27 Jan 2026 23:16:53 -0300 Subject: [PATCH] atlasbot: improve metric detection and counts --- services/comms/scripts/atlasbot/bot.py | 81 +++++++++++++++++++++----- 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 77868f1..eca5fef 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -120,6 +120,7 @@ OPERATION_HINTS = { "count": ("how many", "count", "number", "total"), "list": ("list", "which", "what are", "show", "names"), "top": ("top", "hottest", "highest", "most", "largest", "max", "maximum", "busiest", "busy"), + "bottom": ("lowest", "least", "minimum", "min", "smallest"), "status": ("ready", "not ready", "unready", "down", "missing", "status"), } @@ -568,6 +569,14 @@ def _detect_operation(q: str) -> str | None: return None def _detect_metric(q: str) -> str | None: + q = normalize_query(q) + if _has_any(q, ("disk", "storage")): + return "io" + if _has_any(q, ("io",)) and not _has_any(q, METRIC_HINTS["net"]): + return "io" + for metric, phrases in METRIC_HINTS.items(): + if _has_any(q, phrases): + return metric tokens = set(_tokens(q)) expanded: set[str] = set(tokens) for token in list(tokens): @@ -1237,6 +1246,34 @@ def _node_usage_top( return None +def _node_usage_bottom( + usage: list[dict[str, Any]], + *, + allowed_nodes: set[str] | None, +) -> tuple[str, float] | None: + best_node: str | None = None + best_val: float | None = None + for item in usage: + if not isinstance(item, dict): + continue + node = item.get("node") + if not node or not isinstance(node, str): + continue + if allowed_nodes and node not in allowed_nodes: + continue + value = item.get("value") + try: + numeric = float(value) + except (TypeError, ValueError): + continue + if best_val is None or numeric < best_val: + best_val = numeric + best_node = node + if best_node and best_val is not None: + return best_node, best_val + return None + + def snapshot_metric_answer( prompt: str, *, @@ -1267,18 +1304,20 @@ def snapshot_metric_answer( ) allowed_nodes = {node["name"] for node in filtered} if filtered else None - if metric in {"cpu", "ram", "net", "io"} and op in {"top", "status", None}: + if metric in {"cpu", "ram", "net", "io"} and op in {"top", "bottom", "status", None}: usage = metrics.get("node_usage", {}).get(metric, []) - top = _node_usage_top(usage, allowed_nodes=allowed_nodes) - if top: - node, val = top + pick = _node_usage_bottom if op == "bottom" else _node_usage_top + chosen = pick(usage, allowed_nodes=allowed_nodes) + if chosen: + node, val = chosen percent = metric in {"cpu", "ram"} value = _format_metric_value(str(val), percent=percent, rate=metric in {"net", "io"}) scope = "" if include_hw: scope = f" among {' and '.join(sorted(include_hw))}" - answer = f"Hottest node{scope}: {node} ({value})." - if allowed_nodes and len(allowed_nodes) != len(inventory): + label = "Lowest" if op == "bottom" else "Hottest" + answer = f"{label} node{scope}: {node} ({value})." + if allowed_nodes and len(allowed_nodes) != len(inventory) and op != "bottom": overall = _node_usage_top(usage, allowed_nodes=None) if overall and overall[0] != node: overall_val = _format_metric_value( @@ -1314,6 +1353,10 @@ def snapshot_metric_answer( failed = metrics.get("pods_failed") succeeded = metrics.get("pods_succeeded") status_terms = ("running", "pending", "failed", "succeeded", "completed") + if "total" in q or "sum" in q: + values = [v for v in (running, pending, failed, succeeded) if isinstance(v, (int, float))] + if values: + return _format_confidence(f"Total pods: {sum(values):.0f}.", "high") if "not running" in q or "not in running" in q or "non running" in q: parts = [v for v in (pending, failed, succeeded) if isinstance(v, (int, float))] if parts: @@ -1468,7 +1511,8 @@ def structured_answer( node, val = _primary_series_metric(res) if node and val is not None: percent = _metric_expr_uses_percent(entry) - value_fmt = _format_metric_value(val or "", percent=percent) + rate = metric in {"net", "io"} + value_fmt = _format_metric_value(val or "", percent=percent, rate=rate) metric_label = (metric or "").upper() label = f"{metric_label} node" if metric_label else "node" answer = f"Hottest {label}: {node} ({value_fmt})." @@ -1495,7 +1539,8 @@ def structured_answer( scoped_node, scoped_val = _primary_series_metric(res) if base_node and scoped_node and base_node != scoped_node: percent = _metric_expr_uses_percent(entry) - base_val_fmt = _format_metric_value(base_val or "", percent=percent) + rate = metric in {"net", "io"} + base_val_fmt = _format_metric_value(base_val or "", percent=percent, rate=rate) overall_note = f" Overall hottest node: {base_node} ({base_val_fmt})." return _format_confidence(f"Among {scope} nodes, {answer}{overall_note}", "high") return _format_confidence(answer, "high") @@ -1525,9 +1570,14 @@ def structured_answer( names = [node["name"] for node in filtered] if op == "status": + scope_label = "nodes" + if include_hw: + scope_label = f"{' and '.join(sorted(include_hw))} nodes" + elif only_workers: + scope_label = "worker nodes" if "missing" in q and ("ready" in q or "readiness" in q): return _format_confidence( - "Not ready nodes: " + (", ".join(names) if names else "none") + ".", + f"Not ready {scope_label}: " + (", ".join(names) if names else "none") + ".", "high", ) if "missing" in q and expected_workers: @@ -1538,16 +1588,21 @@ def structured_answer( ) if only_ready is False: return _format_confidence( - "Not ready nodes: " + (", ".join(names) if names else "none") + ".", + f"Not ready {scope_label}: " + (", ".join(names) if names else "none") + ".", "high", ) if only_ready is True: return _format_confidence( - f"Ready nodes ({len(names)}): " + (", ".join(names) if names else "none") + ".", + f"Ready {scope_label} ({len(names)}): " + (", ".join(names) if names else "none") + ".", "high", ) if op == "count": + scope_label = "nodes" + if include_hw: + scope_label = f"{' and '.join(sorted(include_hw))} nodes" + elif only_workers: + scope_label = "worker nodes" if only_workers and "ready" in q and ("total" in q or "vs" in q or "versus" in q): total_workers = _inventory_filter( inventory, @@ -1576,9 +1631,9 @@ def structured_answer( msg += f" Missing: {', '.join(missing)}." return _format_confidence(msg, "high") if only_ready is True: - return _format_confidence(f"Ready nodes: {len(names)}.", "high") + return _format_confidence(f"Ready {scope_label}: {len(names)}.", "high") if only_ready is False: - return _format_confidence(f"Not ready nodes: {len(names)}.", "high") + return _format_confidence(f"Not ready {scope_label}: {len(names)}.", "high") if not (include_hw or exclude_hw or nodes_in_query or only_workers or role_filters): return _format_confidence(f"Atlas has {len(names)} nodes.", "high") return _format_confidence(f"Matching nodes: {len(names)}.", "high")