124 lines
4.3 KiB
Python
124 lines
4.3 KiB
Python
import json
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class KnowledgeBase:
|
|
def __init__(self, base_dir: str) -> None:
|
|
self._base = Path(base_dir) if base_dir else None
|
|
self._atlas: dict[str, Any] = {}
|
|
self._runbooks: list[dict[str, Any]] = []
|
|
self._loaded = False
|
|
|
|
def load(self) -> None:
|
|
if self._loaded or not self._base:
|
|
return
|
|
self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
|
|
self._runbooks = self._read_json(self._base / "catalog" / "runbooks.json") or []
|
|
self._loaded = True
|
|
|
|
def _read_json(self, path: Path) -> dict[str, Any] | list[dict[str, Any]]:
|
|
if not path.exists():
|
|
return {}
|
|
try:
|
|
return json.loads(path.read_text())
|
|
except Exception as exc:
|
|
log.warning("kb load failed", extra={"extra": {"path": str(path), "error": str(exc)}})
|
|
return {}
|
|
|
|
def summary(self) -> str:
|
|
self.load()
|
|
if not self._atlas:
|
|
return ""
|
|
cluster = self._atlas.get("cluster")
|
|
sources = self._atlas.get("sources") if isinstance(self._atlas.get("sources"), list) else []
|
|
services = [src.get("name") for src in sources if isinstance(src, dict)]
|
|
parts: list[str] = []
|
|
if cluster:
|
|
parts.append(f"Cluster: {cluster}.")
|
|
if services:
|
|
parts.append(f"Services indexed: {len(services)}.")
|
|
if isinstance(self._atlas, dict):
|
|
keys = [key for key in self._atlas.keys() if key not in {"sources"}]
|
|
if keys:
|
|
parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
|
|
return " ".join(parts)
|
|
|
|
def runbook_titles(self, *, limit: int = 5) -> str:
|
|
self.load()
|
|
if not self._runbooks:
|
|
return ""
|
|
titles = []
|
|
for entry in self._runbooks:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
title = entry.get("title")
|
|
path = entry.get("path")
|
|
if title and path:
|
|
titles.append(f"- {title} ({path})")
|
|
if not titles:
|
|
return ""
|
|
return "Relevant runbooks:\n" + "\n".join(titles[:limit])
|
|
|
|
def runbook_paths(self, *, limit: int = 10) -> list[str]:
|
|
self.load()
|
|
if not self._runbooks:
|
|
return []
|
|
paths: list[str] = []
|
|
for entry in self._runbooks:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
path = entry.get("path")
|
|
if path:
|
|
paths.append(str(path))
|
|
return paths[:limit]
|
|
|
|
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
|
|
self.load()
|
|
lines: list[str] = []
|
|
if not self._base:
|
|
return lines
|
|
summary = self.summary()
|
|
if summary:
|
|
lines.append(f"KB Summary: {summary}")
|
|
# Prefer curated catalog JSON if present.
|
|
if self._atlas:
|
|
try:
|
|
atlas_json = json.dumps(self._atlas, indent=2)
|
|
lines.append("KB: atlas.json")
|
|
lines.extend(atlas_json.splitlines())
|
|
except Exception:
|
|
pass
|
|
if self._runbooks:
|
|
lines.append("KB: runbooks.json")
|
|
for entry in self._runbooks:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
title = entry.get("title")
|
|
path = entry.get("path")
|
|
if title and path:
|
|
lines.append(f"- {title} ({path})")
|
|
# Include markdown/text sources as additional chunks.
|
|
if len(lines) >= max_chars:
|
|
return lines
|
|
files = sorted(self._base.rglob("*.md")) + sorted(self._base.rglob("*.txt"))
|
|
for path in files:
|
|
if len(lines) >= max_chars:
|
|
break
|
|
if len(lines) > max_files * 50:
|
|
break
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
continue
|
|
if not text:
|
|
continue
|
|
lines.append(f"KB File: {path.relative_to(self._base)}")
|
|
lines.extend(text.splitlines())
|
|
if sum(len(line) for line in lines) >= max_chars:
|
|
break
|
|
return lines
|