Compare commits

..

2 Commits

Author SHA1 Message Date
2eecc3d88d metis: switch watcher to http 2026-03-31 14:18:31 -03:00
cf0271a8ea metis: add sentinel watch task 2026-03-31 14:07:02 -03:00
7 changed files with 282 additions and 3 deletions

View File

@ -25,6 +25,7 @@ from .services.mailu import mailu
from .services.mailu_events import mailu_events from .services.mailu_events import mailu_events
from .services.nextcloud import nextcloud from .services.nextcloud import nextcloud
from .services.image_sweeper import image_sweeper from .services.image_sweeper import image_sweeper
from .services.metis import metis
from .services.opensearch_prune import prune_indices from .services.opensearch_prune import prune_indices
from .services.pod_cleaner import clean_finished_pods from .services.pod_cleaner import clean_finished_pods
from .services.vaultwarden_sync import run_vaultwarden_sync from .services.vaultwarden_sync import run_vaultwarden_sync
@ -309,6 +310,11 @@ def _startup() -> None:
settings.image_sweeper_cron, settings.image_sweeper_cron,
lambda: image_sweeper.run(wait=True), lambda: image_sweeper.run(wait=True),
) )
scheduler.add_task(
"schedule.metis_sentinel_watch",
settings.metis_sentinel_watch_cron,
lambda: metis.watch_sentinel(),
)
scheduler.add_task( scheduler.add_task(
"schedule.vault_k8s_auth", "schedule.vault_k8s_auth",
settings.vault_k8s_auth_cron, settings.vault_k8s_auth_cron,
@ -361,6 +367,7 @@ def _startup() -> None:
"pod_cleaner_cron": settings.pod_cleaner_cron, "pod_cleaner_cron": settings.pod_cleaner_cron,
"opensearch_prune_cron": settings.opensearch_prune_cron, "opensearch_prune_cron": settings.opensearch_prune_cron,
"image_sweeper_cron": settings.image_sweeper_cron, "image_sweeper_cron": settings.image_sweeper_cron,
"metis_sentinel_watch_cron": settings.metis_sentinel_watch_cron,
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron, "vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
"vault_oidc_cron": settings.vault_oidc_cron, "vault_oidc_cron": settings.vault_oidc_cron,
"comms_guest_name_cron": settings.comms_guest_name_cron, "comms_guest_name_cron": settings.comms_guest_name_cron,

106
ariadne/services/metis.py Normal file
View File

@ -0,0 +1,106 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
import httpx
from ..settings import settings
from ..utils.logging import get_logger
logger = get_logger(__name__)
_WATCH_PATH = "/internal/sentinel/watch"
@dataclass(frozen=True)
class MetisSentinelWatchSummary:
status: str
watch_url: str
detail: str = ""
result: dict[str, Any] = field(default_factory=dict)
def _watch_url() -> str:
if settings.metis_watch_url:
return settings.metis_watch_url
if settings.metis_base_url:
return f"{settings.metis_base_url}{_WATCH_PATH}"
return ""
def _normalize_payload(payload: Any) -> dict[str, Any]:
if isinstance(payload, dict):
return payload
if payload is None:
return {}
return {"result": payload}
class MetisService:
def ready(self) -> bool:
return bool(_watch_url())
def _finish(self, status: str, watch_url: str, detail: str = "", result: dict[str, Any] | None = None) -> MetisSentinelWatchSummary:
summary = MetisSentinelWatchSummary(
status=status,
watch_url=watch_url,
detail=detail,
result=result or {},
)
logger.info(
"metis sentinel watch finished",
extra={
"event": "metis_sentinel_watch",
"status": summary.status,
"watch_url": summary.watch_url,
"detail": summary.detail,
},
)
return summary
def watch_sentinel(self) -> MetisSentinelWatchSummary:
watch_url = _watch_url()
if not watch_url:
return self._finish("skipped", "", "metis watch url not configured")
try:
with httpx.Client(timeout=settings.metis_timeout_sec, follow_redirects=True) as client:
response = client.post(watch_url)
response.raise_for_status()
try:
payload = response.json()
except Exception:
payload = {}
except httpx.HTTPStatusError as exc:
response = exc.response
detail = f"metis watch failed with HTTP {response.status_code}"
try:
payload = response.json()
except Exception:
payload = {}
payload = _normalize_payload(payload)
if isinstance(payload.get("detail"), str) and payload["detail"].strip():
detail = payload["detail"].strip()
return self._finish("error", watch_url, detail, payload)
except Exception as exc: # noqa: BLE001
return self._finish("error", watch_url, str(exc).strip() or "metis watch failed")
payload = _normalize_payload(payload)
status = payload.get("status") if isinstance(payload.get("status"), str) else "ok"
detail = ""
if isinstance(payload.get("detail"), str):
detail = payload["detail"].strip()
elif isinstance(payload.get("message"), str):
detail = payload["message"].strip()
elif status != "ok":
detail = f"metis watch returned {status}"
if status not in {"ok", "skipped", "error"}:
status = "ok"
return self._finish(status, watch_url, detail, payload)
metis = MetisService()

View File

@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import os import os
from typing import Any
def _env(name: str, default: str = "") -> str: def _env(name: str, default: str = "") -> str:
@ -212,6 +213,10 @@ class Settings:
keycloak_profile_cron: str keycloak_profile_cron: str
cluster_state_cron: str cluster_state_cron: str
cluster_state_keep: int cluster_state_keep: int
metis_base_url: str
metis_watch_url: str
metis_timeout_sec: float
metis_sentinel_watch_cron: str
opensearch_url: str opensearch_url: str
opensearch_limit_bytes: int opensearch_limit_bytes: int
@ -475,6 +480,15 @@ class Settings:
"cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168), "cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168),
} }
@classmethod
def _metis_config(cls) -> dict[str, Any]:
return {
"metis_base_url": _env("METIS_BASE_URL", "http://metis.maintenance.svc.cluster.local").rstrip("/"),
"metis_watch_url": _env("METIS_WATCH_URL", "").rstrip("/"),
"metis_timeout_sec": _env_float("METIS_TIMEOUT_SEC", 10.0),
"metis_sentinel_watch_cron": _env("ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH", "*/15 * * * *"),
}
@classmethod @classmethod
def _opensearch_config(cls) -> dict[str, Any]: def _opensearch_config(cls) -> dict[str, Any]:
return { return {
@ -502,6 +516,7 @@ class Settings:
vaultwarden_cfg = cls._vaultwarden_config() vaultwarden_cfg = cls._vaultwarden_config()
schedule_cfg = cls._schedule_config() schedule_cfg = cls._schedule_config()
cluster_cfg = cls._cluster_state_config() cluster_cfg = cls._cluster_state_config()
metis_cfg = cls._metis_config()
opensearch_cfg = cls._opensearch_config() opensearch_cfg = cls._opensearch_config()
portal_db = _env("PORTAL_DATABASE_URL", "") portal_db = _env("PORTAL_DATABASE_URL", "")
@ -540,6 +555,7 @@ class Settings:
**vaultwarden_cfg, **vaultwarden_cfg,
**schedule_cfg, **schedule_cfg,
**cluster_cfg, **cluster_cfg,
**metis_cfg,
**opensearch_cfg, **opensearch_cfg,
) )

6
tests/conftest.py Normal file
View File

@ -0,0 +1,6 @@
from __future__ import annotations
import os
os.environ["PORTAL_DATABASE_URL"] = "postgresql://user:pass@localhost/db"

View File

@ -2,13 +2,10 @@ from __future__ import annotations
import dataclasses import dataclasses
from datetime import datetime, timezone from datetime import datetime, timezone
import os
from fastapi import HTTPException from fastapi import HTTPException
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
os.environ.setdefault("PORTAL_DATABASE_URL", "postgresql://user:pass@localhost/db")
from ariadne.auth.keycloak import AuthContext from ariadne.auth.keycloak import AuthContext
import ariadne.app as app_module import ariadne.app as app_module
@ -47,6 +44,26 @@ def test_startup_and_shutdown(monkeypatch) -> None:
app_module._shutdown() app_module._shutdown()
def test_startup_registers_metis_watch(monkeypatch) -> None:
tasks = []
monkeypatch.setattr(app_module.provisioning, "start", lambda: None)
monkeypatch.setattr(app_module.scheduler, "start", lambda: None)
monkeypatch.setattr(app_module.scheduler, "stop", lambda: None)
monkeypatch.setattr(app_module.provisioning, "stop", lambda: None)
monkeypatch.setattr(app_module.portal_db, "close", lambda: None)
monkeypatch.setattr(app_module.ariadne_db, "close", lambda: None)
monkeypatch.setattr(
app_module.scheduler,
"add_task",
lambda name, cron_expr, runner: tasks.append((name, cron_expr)),
)
app_module._startup()
assert any(name == "schedule.metis_sentinel_watch" for name, _cron in tasks)
def test_record_event_handles_exception(monkeypatch) -> None: def test_record_event_handles_exception(monkeypatch) -> None:
monkeypatch.setattr(app_module.storage, "record_event", lambda *args, **kwargs: (_ for _ in ()).throw(RuntimeError("fail"))) monkeypatch.setattr(app_module.storage, "record_event", lambda *args, **kwargs: (_ for _ in ()).throw(RuntimeError("fail")))
app_module._record_event("event", {"ok": True}) app_module._record_event("event", {"ok": True})

113
tests/test_metis.py Normal file
View File

@ -0,0 +1,113 @@
from __future__ import annotations
from types import SimpleNamespace
import httpx
from ariadne.services import metis as metis_module
class DummyResponse:
def __init__(self, status_code: int = 200, payload: object | None = None) -> None:
self.status_code = status_code
self._payload = payload
def raise_for_status(self) -> None:
if self.status_code >= 400:
request = httpx.Request("POST", "http://example.test")
raise httpx.HTTPStatusError("boom", request=request, response=self)
def json(self):
if isinstance(self._payload, Exception):
raise self._payload
return self._payload
class DummyClient:
def __init__(self, response: DummyResponse) -> None:
self.response = response
self.calls: list[str] = []
self.kwargs = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def post(self, url: str):
self.calls.append(url)
return self.response
def test_watch_sentinel_posts_to_derived_url(monkeypatch) -> None:
dummy = SimpleNamespace(
metis_base_url="http://metis.maintenance.svc.cluster.local",
metis_watch_url="",
metis_timeout_sec=12.5,
)
monkeypatch.setattr("ariadne.services.metis.settings", dummy)
client = DummyClient(DummyResponse(payload={"status": "ok", "detail": "watched", "nodes": 21}))
captured: dict[str, object] = {}
def factory(**kwargs):
captured.update(kwargs)
return client
monkeypatch.setattr(metis_module.httpx, "Client", factory)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "ok"
assert summary.watch_url == "http://metis.maintenance.svc.cluster.local/internal/sentinel/watch"
assert summary.detail == "watched"
assert summary.result["nodes"] == 21
assert client.calls == [summary.watch_url]
assert captured["timeout"] == 12.5
def test_watch_sentinel_uses_explicit_url(monkeypatch) -> None:
dummy = SimpleNamespace(
metis_base_url="http://metis.maintenance.svc.cluster.local",
metis_watch_url="http://metis.example/internal/sentinel/watch",
metis_timeout_sec=10.0,
)
monkeypatch.setattr("ariadne.services.metis.settings", dummy)
client = DummyClient(DummyResponse(payload={"status": "ok"}))
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "ok"
assert summary.watch_url == "http://metis.example/internal/sentinel/watch"
assert client.calls == [summary.watch_url]
def test_watch_sentinel_skips_when_unconfigured(monkeypatch) -> None:
monkeypatch.setattr(
"ariadne.services.metis.settings",
SimpleNamespace(metis_base_url="", metis_watch_url="", metis_timeout_sec=10.0),
)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "skipped"
assert summary.watch_url == ""
assert summary.detail == "metis watch url not configured"
def test_watch_sentinel_handles_http_error(monkeypatch) -> None:
dummy = SimpleNamespace(
metis_base_url="http://metis.maintenance.svc.cluster.local",
metis_watch_url="",
metis_timeout_sec=10.0,
)
monkeypatch.setattr("ariadne.services.metis.settings", dummy)
client = DummyClient(DummyResponse(status_code=502, payload={"detail": "upstream fail"}))
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
summary = metis_module.MetisService().watch_sentinel()
assert summary.status == "error"
assert summary.detail == "upstream fail"
assert summary.result["detail"] == "upstream fail"

View File

@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
from ariadne import settings as settings_module from ariadne import settings as settings_module
from ariadne.settings import Settings
def test_env_int_invalid(monkeypatch) -> None: def test_env_int_invalid(monkeypatch) -> None:
@ -11,3 +12,16 @@ def test_env_int_invalid(monkeypatch) -> None:
def test_env_float_invalid(monkeypatch) -> None: def test_env_float_invalid(monkeypatch) -> None:
monkeypatch.setenv("ARIADNE_FLOAT_TEST", "bad") monkeypatch.setenv("ARIADNE_FLOAT_TEST", "bad")
assert settings_module._env_float("ARIADNE_FLOAT_TEST", 1.5) == 1.5 assert settings_module._env_float("ARIADNE_FLOAT_TEST", 1.5) == 1.5
def test_from_env_includes_metis_settings(monkeypatch) -> None:
monkeypatch.setenv("METIS_BASE_URL", "http://metis.maintenance.svc.cluster.local/")
monkeypatch.setenv("METIS_WATCH_URL", "http://metis.example/internal/sentinel/watch")
monkeypatch.setenv("METIS_TIMEOUT_SEC", "9.5")
monkeypatch.setenv("ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH", "*/7 * * * *")
cfg = Settings.from_env()
assert cfg.metis_base_url == "http://metis.maintenance.svc.cluster.local"
assert cfg.metis_watch_url == "http://metis.example/internal/sentinel/watch"
assert cfg.metis_timeout_sec == 9.5
assert cfg.metis_sentinel_watch_cron == "*/7 * * * *"