124 lines
4.3 KiB
Python

import json
import logging
from pathlib import Path
from typing import Any
log = logging.getLogger(__name__)
class KnowledgeBase:
def __init__(self, base_dir: str) -> None:
self._base = Path(base_dir) if base_dir else None
self._atlas: dict[str, Any] = {}
self._runbooks: list[dict[str, Any]] = []
self._loaded = False
def load(self) -> None:
if self._loaded or not self._base:
return
self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
self._runbooks = self._read_json(self._base / "catalog" / "runbooks.json") or []
self._loaded = True
def _read_json(self, path: Path) -> dict[str, Any] | list[dict[str, Any]]:
if not path.exists():
return {}
try:
return json.loads(path.read_text())
except Exception as exc:
log.warning("kb load failed", extra={"extra": {"path": str(path), "error": str(exc)}})
return {}
def summary(self) -> str:
self.load()
if not self._atlas:
return ""
cluster = self._atlas.get("cluster")
sources = self._atlas.get("sources") if isinstance(self._atlas.get("sources"), list) else []
services = [src.get("name") for src in sources if isinstance(src, dict)]
parts: list[str] = []
if cluster:
parts.append(f"Cluster: {cluster}.")
if services:
parts.append(f"Services indexed: {len(services)}.")
if isinstance(self._atlas, dict):
keys = [key for key in self._atlas.keys() if key not in {"sources"}]
if keys:
parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
return " ".join(parts)
def runbook_titles(self, *, limit: int = 5) -> str:
self.load()
if not self._runbooks:
return ""
titles = []
for entry in self._runbooks:
if not isinstance(entry, dict):
continue
title = entry.get("title")
path = entry.get("path")
if title and path:
titles.append(f"- {title} ({path})")
if not titles:
return ""
return "Relevant runbooks:\n" + "\n".join(titles[:limit])
def runbook_paths(self, *, limit: int = 10) -> list[str]:
self.load()
if not self._runbooks:
return []
paths: list[str] = []
for entry in self._runbooks:
if not isinstance(entry, dict):
continue
path = entry.get("path")
if path:
paths.append(str(path))
return paths[:limit]
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
self.load()
lines: list[str] = []
if not self._base:
return lines
summary = self.summary()
if summary:
lines.append(f"KB Summary: {summary}")
# Prefer curated catalog JSON if present.
if self._atlas:
try:
atlas_json = json.dumps(self._atlas, indent=2)
lines.append("KB: atlas.json")
lines.extend(atlas_json.splitlines())
except Exception:
pass
if self._runbooks:
lines.append("KB: runbooks.json")
for entry in self._runbooks:
if not isinstance(entry, dict):
continue
title = entry.get("title")
path = entry.get("path")
if title and path:
lines.append(f"- {title} ({path})")
# Include markdown/text sources as additional chunks.
if len(lines) >= max_chars:
return lines
files = sorted(self._base.rglob("*.md")) + sorted(self._base.rglob("*.txt"))
for path in files:
if len(lines) >= max_chars:
break
if len(lines) > max_files * 50:
break
try:
text = path.read_text(encoding="utf-8", errors="ignore")
except Exception:
continue
if not text:
continue
lines.append(f"KB File: {path.relative_to(self._base)}")
lines.extend(text.splitlines())
if sum(len(line) for line in lines) >= max_chars:
break
return lines