165 lines
5.7 KiB
Python
165 lines
5.7 KiB
Python
import json
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class KnowledgeBase:
|
|
"""Load Atlas knowledge-base files and expose summary snippets."""
|
|
|
|
def __init__(self, base_dir: str) -> None:
|
|
self._base = Path(base_dir) if base_dir else None
|
|
self._atlas: dict[str, Any] = {}
|
|
self._runbooks: list[dict[str, Any]] = []
|
|
self._loaded = False
|
|
|
|
def load(self) -> None:
|
|
"""Load catalog files once so subsequent reads stay cheap."""
|
|
|
|
if self._loaded or not self._base:
|
|
return
|
|
self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
|
|
self._runbooks = self._read_json(self._base / "catalog" / "runbooks.json") or []
|
|
self._loaded = True
|
|
|
|
def _read_json(self, path: Path) -> dict[str, Any] | list[dict[str, Any]]:
|
|
if not path.exists():
|
|
return {}
|
|
try:
|
|
return json.loads(path.read_text())
|
|
except Exception as exc:
|
|
log.warning("kb load failed", extra={"extra": {"path": str(path), "error": str(exc)}})
|
|
return {}
|
|
|
|
def summary(self) -> str:
|
|
"""Return a short human-readable KB summary for prompt context."""
|
|
|
|
self.load()
|
|
if not self._atlas:
|
|
return ""
|
|
cluster = self._atlas.get("cluster")
|
|
sources = self._atlas.get("sources") if isinstance(self._atlas.get("sources"), list) else []
|
|
services = [src.get("name") for src in sources if isinstance(src, dict)]
|
|
parts: list[str] = []
|
|
if cluster:
|
|
parts.append(f"Cluster: {cluster}.")
|
|
if services:
|
|
parts.append(f"Services indexed: {len(services)}.")
|
|
if isinstance(self._atlas, dict):
|
|
keys = [key for key in self._atlas if key not in {"sources"}]
|
|
if keys:
|
|
parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
|
|
return " ".join(parts)
|
|
|
|
def runbook_titles(self, *, limit: int = 5) -> str:
|
|
"""Render the top runbook titles for prompt context."""
|
|
|
|
self.load()
|
|
if not self._runbooks:
|
|
return ""
|
|
titles = []
|
|
for entry in self._runbooks:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
title = entry.get("title")
|
|
path = entry.get("path")
|
|
if title and path:
|
|
titles.append(f"- {title} ({path})")
|
|
if not titles:
|
|
return ""
|
|
return "Relevant runbooks:\n" + "\n".join(titles[:limit])
|
|
|
|
def runbook_paths(self, *, limit: int = 10) -> list[str]:
|
|
"""Return the runbook paths used for exact-path enforcement."""
|
|
|
|
self.load()
|
|
if not self._runbooks:
|
|
return []
|
|
paths: list[str] = []
|
|
for entry in self._runbooks:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
path = entry.get("path")
|
|
if path:
|
|
paths.append(str(path))
|
|
return paths[:limit]
|
|
|
|
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
|
|
"""Collect KB excerpts into prompt-sized chunks."""
|
|
|
|
self.load()
|
|
if not self._base:
|
|
return []
|
|
lines: list[str] = []
|
|
self._append_summary(lines)
|
|
self._append_catalog(lines, max_chars)
|
|
if not self._within_limit(lines, max_chars):
|
|
return lines
|
|
self._append_runbooks(lines)
|
|
if not self._within_limit(lines, max_chars):
|
|
return lines
|
|
self._append_files(lines, max_files=max_files, max_chars=max_chars)
|
|
return lines
|
|
|
|
def _append_summary(self, lines: list[str]) -> None:
|
|
summary = self.summary()
|
|
if summary:
|
|
lines.append(f"KB Summary: {summary}")
|
|
|
|
def _append_catalog(self, lines: list[str], max_chars: int) -> None:
|
|
if not self._atlas:
|
|
return
|
|
if not self._within_limit(lines, max_chars):
|
|
return
|
|
try:
|
|
atlas_json = json.dumps(self._atlas, indent=2)
|
|
except Exception:
|
|
return
|
|
lines.append("KB: atlas.json")
|
|
self._extend_with_limit(lines, atlas_json.splitlines(), max_chars)
|
|
|
|
def _append_runbooks(self, lines: list[str]) -> None:
|
|
if not self._runbooks:
|
|
return
|
|
lines.append("KB: runbooks.json")
|
|
for entry in self._runbooks:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
title = entry.get("title")
|
|
path = entry.get("path")
|
|
if title and path:
|
|
lines.append(f"- {title} ({path})")
|
|
|
|
def _append_files(self, lines: list[str], *, max_files: int, max_chars: int) -> None:
|
|
files = sorted(self._base.rglob("*.md")) + sorted(self._base.rglob("*.txt"))
|
|
for path in files:
|
|
if not self._within_limit(lines, max_chars):
|
|
break
|
|
if len(lines) > max_files * 50:
|
|
break
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
continue
|
|
if not text:
|
|
continue
|
|
lines.append(f"KB File: {path.relative_to(self._base)}")
|
|
if not self._extend_with_limit(lines, text.splitlines(), max_chars):
|
|
break
|
|
|
|
@staticmethod
|
|
def _within_limit(lines: list[str], max_chars: int) -> bool:
|
|
return sum(len(line) for line in lines) < max_chars
|
|
|
|
@staticmethod
|
|
def _extend_with_limit(lines: list[str], new_lines: list[str], max_chars: int) -> bool:
|
|
total = sum(len(line) for line in lines)
|
|
for line in new_lines:
|
|
if total + len(line) >= max_chars:
|
|
return False
|
|
lines.append(line)
|
|
total += len(line)
|
|
return True
|