Compare commits

...

2 Commits

Author SHA1 Message Date
2eecc3d88d metis: switch watcher to http 2026-03-31 14:18:31 -03:00
cf0271a8ea metis: add sentinel watch task 2026-03-31 14:07:02 -03:00
7 changed files with 282 additions and 3 deletions

View File

@ -25,6 +25,7 @@ from .services.mailu import mailu
from .services.mailu_events import mailu_events
from .services.nextcloud import nextcloud
from .services.image_sweeper import image_sweeper
from .services.metis import metis
from .services.opensearch_prune import prune_indices
from .services.pod_cleaner import clean_finished_pods
from .services.vaultwarden_sync import run_vaultwarden_sync
@ -309,6 +310,11 @@ def _startup() -> None:
settings.image_sweeper_cron,
lambda: image_sweeper.run(wait=True),
)
scheduler.add_task(
"schedule.metis_sentinel_watch",
settings.metis_sentinel_watch_cron,
lambda: metis.watch_sentinel(),
)
scheduler.add_task(
"schedule.vault_k8s_auth",
settings.vault_k8s_auth_cron,
@ -361,6 +367,7 @@ def _startup() -> None:
"pod_cleaner_cron": settings.pod_cleaner_cron,
"opensearch_prune_cron": settings.opensearch_prune_cron,
"image_sweeper_cron": settings.image_sweeper_cron,
"metis_sentinel_watch_cron": settings.metis_sentinel_watch_cron,
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
"vault_oidc_cron": settings.vault_oidc_cron,
"comms_guest_name_cron": settings.comms_guest_name_cron,

106
ariadne/services/metis.py Normal file
View File

@ -0,0 +1,106 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
import httpx
from ..settings import settings
from ..utils.logging import get_logger
logger = get_logger(__name__)
_WATCH_PATH = "/internal/sentinel/watch"
@dataclass(frozen=True)
class MetisSentinelWatchSummary:
status: str
watch_url: str
detail: str = ""
result: dict[str, Any] = field(default_factory=dict)
def _watch_url() -> str:
if settings.metis_watch_url:
return settings.metis_watch_url
if settings.metis_base_url:
return f"{settings.metis_base_url}{_WATCH_PATH}"
return ""
def _normalize_payload(payload: Any) -> dict[str, Any]:
if isinstance(payload, dict):
return payload
if payload is None:
return {}
return {"result": payload}
class MetisService:
def ready(self) -> bool:
return bool(_watch_url())
def _finish(self, status: str, watch_url: str, detail: str = "", result: dict[str, Any] | None = None) -> MetisSentinelWatchSummary:
summary = MetisSentinelWatchSummary(
status=status,
watch_url=watch_url,
detail=detail,
result=result or {},
)
logger.info(
"metis sentinel watch finished",
extra={
"event": "metis_sentinel_watch",
"status": summary.status,
"watch_url": summary.watch_url,
"detail": summary.detail,
},
)
return summary
def watch_sentinel(self) -> MetisSentinelWatchSummary:
watch_url = _watch_url()
if not watch_url:
return self._finish("skipped", "", "metis watch url not configured")
try:
with httpx.Client(timeout=settings.metis_timeout_sec, follow_redirects=True) as client:
response = client.post(watch_url)
response.raise_for_status()
try:
payload = response.json()
except Exception:
payload = {}
except httpx.HTTPStatusError as exc:
response = exc.response
detail = f"metis watch failed with HTTP {response.status_code}"
try:
payload = response.json()
except Exception:
payload = {}
payload = _normalize_payload(payload)
if isinstance(payload.get("detail"), str) and payload["detail"].strip():
detail = payload["detail"].strip()
return self._finish("error", watch_url, detail, payload)
except Exception as exc: # noqa: BLE001
return self._finish("error", watch_url, str(exc).strip() or "metis watch failed")
payload = _normalize_payload(payload)
status = payload.get("status") if isinstance(payload.get("status"), str) else "ok"
detail = ""
if isinstance(payload.get("detail"), str):
detail = payload["detail"].strip()
elif isinstance(payload.get("message"), str):
detail = payload["message"].strip()
elif status != "ok":
detail = f"metis watch returned {status}"
if status not in {"ok", "skipped", "error"}:
status = "ok"
return self._finish(status, watch_url, detail, payload)
metis = MetisService()

View File

@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass
import os
from typing import Any
def _env(name: str, default: str = "") -> str:
@ -212,6 +213,10 @@ class Settings:
keycloak_profile_cron: str
cluster_state_cron: str
cluster_state_keep: int
metis_base_url: str
metis_watch_url: str
metis_timeout_sec: float
metis_sentinel_watch_cron: str
opensearch_url: str
opensearch_limit_bytes: int
@ -475,6 +480,15 @@ class Settings:
"cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168),
}
@classmethod
def _metis_config(cls) -> dict[str, Any]:
return {
"metis_base_url": _env("METIS_BASE_URL", "http://metis.maintenance.svc.cluster.local").rstrip("/"),
"metis_watch_url": _env("METIS_WATCH_URL", "").rstrip("/"),
"metis_timeout_sec": _env_float("METIS_TIMEOUT_SEC", 10.0),
"metis_sentinel_watch_cron": _env("ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH", "*/15 * * * *"),
}
@classmethod
def _opensearch_config(cls) -> dict[str, Any]:
return {
@ -502,6 +516,7 @@ class Settings:
vaultwarden_cfg = cls._vaultwarden_config()
schedule_cfg = cls._schedule_config()
cluster_cfg = cls._cluster_state_config()
metis_cfg = cls._metis_config()
opensearch_cfg = cls._opensearch_config()
portal_db = _env("PORTAL_DATABASE_URL", "")
@ -540,6 +555,7 @@ class Settings:
**vaultwarden_cfg,
**schedule_cfg,
**cluster_cfg,
**metis_cfg,
**opensearch_cfg,
)

6
tests/conftest.py Normal file
View File

@ -0,0 +1,6 @@
from __future__ import annotations
import os
os.environ["PORTAL_DATABASE_URL"] = "postgresql://user:pass@localhost/db"

View File

@ -2,13 +2,10 @@ from __future__ import annotations
import dataclasses
from datetime import datetime, timezone
import os
from fastapi import HTTPException
from fastapi.testclient import TestClient
os.environ.setdefault("PORTAL_DATABASE_URL", "postgresql://user:pass@localhost/db")
from ariadne.auth.keycloak import AuthContext
import ariadne.app as app_module
@ -47,6 +44,26 @@ def test_startup_and_shutdown(monkeypatch) -> None:
app_module._shutdown()
def test_startup_registers_metis_watch(monkeypatch) -> None:
tasks = []
monkeypatch.setattr(app_module.provisioning, "start", lambda: None)
monkeypatch.setattr(app_module.scheduler, "start", lambda: None)
monkeypatch.setattr(app_module.scheduler, "stop", lambda: None)
monkeypatch.setattr(app_module.provisioning, "stop", lambda: None)
monkeypatch.setattr(app_module.portal_db, "close", lambda: None)
monkeypatch.setattr(app_module.ariadne_db, "close", lambda: None)
monkeypatch.setattr(
app_module.scheduler,
"add_task",
lambda name, cron_expr, runner: tasks.append((name, cron_expr)),
)
app_module._startup()
assert any(name == "schedule.metis_sentinel_watch" for name, _cron in tasks)
def test_record_event_handles_exception(monkeypatch) -> None:
monkeypatch.setattr(app_module.storage, "record_event", lambda *args, **kwargs: (_ for _ in ()).throw(RuntimeError("fail")))
app_module._record_event("event", {"ok": True})

113
tests/test_metis.py Normal file
View File

@ -0,0 +1,113 @@
from __future__ import annotations
from types import SimpleNamespace
import httpx
from ariadne.services import metis as metis_module
class DummyResponse:
def __init__(self, status_code: int = 200, payload: object | None = None) -> None:
self.status_code = status_code
self._payload = payload
def raise_for_status(self) -> None:
if self.status_code >= 400:
request = httpx.Request("POST", "http://example.test")
raise httpx.HTTPStatusError("boom", request=request, response=self)
def json(self):
if isinstance(self._payload, Exception):
raise self._payload
return self._payload
class DummyClient:
def __init__(self, response: DummyResponse) -> None:
self.response = response
self.calls: list[str] = []
self.kwargs = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def post(self, url: str):
self.calls.append(url)
return self.response
def test_watch_sentinel_posts_to_derived_url(monkeypatch) -> None:
dummy = SimpleNamespace(
metis_base_url="http://metis.maintenance.svc.cluster.local",
metis_watch_url="",
metis_timeout_sec=12.5,
)
monkeypatch.setattr("ariadne.services.metis.settings", dummy)
client = DummyClient(DummyResponse(payload={"status": "ok", "detail": "watched", "nodes": 21}))
captured: dict[str, object] = {}
def factory(**kwargs):
captured.update(kwargs)
return client
monkeypatch.setattr(metis_module.httpx, "Client", factory)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "ok"
assert summary.watch_url == "http://metis.maintenance.svc.cluster.local/internal/sentinel/watch"
assert summary.detail == "watched"
assert summary.result["nodes"] == 21
assert client.calls == [summary.watch_url]
assert captured["timeout"] == 12.5
def test_watch_sentinel_uses_explicit_url(monkeypatch) -> None:
dummy = SimpleNamespace(
metis_base_url="http://metis.maintenance.svc.cluster.local",
metis_watch_url="http://metis.example/internal/sentinel/watch",
metis_timeout_sec=10.0,
)
monkeypatch.setattr("ariadne.services.metis.settings", dummy)
client = DummyClient(DummyResponse(payload={"status": "ok"}))
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "ok"
assert summary.watch_url == "http://metis.example/internal/sentinel/watch"
assert client.calls == [summary.watch_url]
def test_watch_sentinel_skips_when_unconfigured(monkeypatch) -> None:
monkeypatch.setattr(
"ariadne.services.metis.settings",
SimpleNamespace(metis_base_url="", metis_watch_url="", metis_timeout_sec=10.0),
)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "skipped"
assert summary.watch_url == ""
assert summary.detail == "metis watch url not configured"
def test_watch_sentinel_handles_http_error(monkeypatch) -> None:
dummy = SimpleNamespace(
metis_base_url="http://metis.maintenance.svc.cluster.local",
metis_watch_url="",
metis_timeout_sec=10.0,
)
monkeypatch.setattr("ariadne.services.metis.settings", dummy)
client = DummyClient(DummyResponse(status_code=502, payload={"detail": "upstream fail"}))
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "error"
assert summary.detail == "upstream fail"
assert summary.result["detail"] == "upstream fail"

View File

@ -1,6 +1,7 @@
from __future__ import annotations
from ariadne import settings as settings_module
from ariadne.settings import Settings
def test_env_int_invalid(monkeypatch) -> None:
@ -11,3 +12,16 @@ def test_env_int_invalid(monkeypatch) -> None:
def test_env_float_invalid(monkeypatch) -> None:
monkeypatch.setenv("ARIADNE_FLOAT_TEST", "bad")
assert settings_module._env_float("ARIADNE_FLOAT_TEST", 1.5) == 1.5
def test_from_env_includes_metis_settings(monkeypatch) -> None:
monkeypatch.setenv("METIS_BASE_URL", "http://metis.maintenance.svc.cluster.local/")
monkeypatch.setenv("METIS_WATCH_URL", "http://metis.example/internal/sentinel/watch")
monkeypatch.setenv("METIS_TIMEOUT_SEC", "9.5")
monkeypatch.setenv("ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH", "*/7 * * * *")
cfg = Settings.from_env()
assert cfg.metis_base_url == "http://metis.maintenance.svc.cluster.local"
assert cfg.metis_watch_url == "http://metis.example/internal/sentinel/watch"
assert cfg.metis_timeout_sec == 9.5
assert cfg.metis_sentinel_watch_cron == "*/7 * * * *"