Compare commits
No commits in common. "963c42246b3ddd7d22736561af66825544e85f45" and "7f284007ebe85acc2ed98796d600a23ecf1f5445" have entirely different histories.
963c42246b
...
7f284007eb
41
README.md
41
README.md
@ -1,41 +0,0 @@
|
|||||||
# ariadne
|
|
||||||
|
|
||||||
Ariadne is the Atlas admin and account automation service.
|
|
||||||
|
|
||||||
It sits behind the portal and handles the jobs that are annoying or risky to do
|
|
||||||
by hand: approving access, syncing account state, rotating service passwords,
|
|
||||||
cleaning stale Kubernetes work, checking platform health, and keeping a few
|
|
||||||
service integrations lined up.
|
|
||||||
|
|
||||||
## How it works
|
|
||||||
|
|
||||||
Ariadne is a FastAPI service with a small scheduler. It talks to Keycloak,
|
|
||||||
Vault, Mailu, Nextcloud, Wger, Firefly, Jenkins, Metis, Kubernetes, and a few
|
|
||||||
Atlas-specific services through focused adapters under `ariadne/services/`.
|
|
||||||
|
|
||||||
The API is split between admin routes, account self-service routes, internal
|
|
||||||
event hooks, and Prometheus metrics. Background jobs store run history in the
|
|
||||||
Ariadne database so failures can be inspected later instead of vanishing into
|
|
||||||
logs.
|
|
||||||
|
|
||||||
Useful routes:
|
|
||||||
|
|
||||||
- `GET /health`
|
|
||||||
- `GET /metrics`
|
|
||||||
- `GET /api/admin/cluster/state`
|
|
||||||
- `POST /api/admin/access/requests/{username}/approve`
|
|
||||||
- `POST /api/account/mailu/rotate`
|
|
||||||
- `POST /api/account/wger/reset`
|
|
||||||
- `POST /api/account/firefly/reset`
|
|
||||||
- `POST /events`
|
|
||||||
|
|
||||||
## Development
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest
|
|
||||||
ruff check .
|
|
||||||
```
|
|
||||||
|
|
||||||
Most runtime behavior is configured through environment variables in
|
|
||||||
`ariadne/settings.py`. Keep service-specific logic in the small adapter modules;
|
|
||||||
`ariadne/app.py` should stay focused on request flow and task orchestration.
|
|
||||||
@ -264,36 +264,6 @@ class Storage:
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def list_schedule_states(self) -> list[ScheduleState]:
|
|
||||||
"""Return persisted scheduler state so metrics survive process restarts."""
|
|
||||||
|
|
||||||
rows = self._db.fetchall(
|
|
||||||
"""
|
|
||||||
SELECT task_name, cron_expr, last_started_at, last_finished_at, last_status,
|
|
||||||
last_error, last_duration_ms, next_run_at
|
|
||||||
FROM ariadne_schedule_state
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
states: list[ScheduleState] = []
|
|
||||||
for row in rows:
|
|
||||||
task_name = row.get("task_name")
|
|
||||||
cron_expr = row.get("cron_expr")
|
|
||||||
if not isinstance(task_name, str) or not isinstance(cron_expr, str):
|
|
||||||
continue
|
|
||||||
states.append(
|
|
||||||
ScheduleState(
|
|
||||||
task_name=task_name,
|
|
||||||
cron_expr=cron_expr,
|
|
||||||
last_started_at=row.get("last_started_at"),
|
|
||||||
last_finished_at=row.get("last_finished_at"),
|
|
||||||
last_status=row.get("last_status"),
|
|
||||||
last_error=row.get("last_error"),
|
|
||||||
last_duration_ms=row.get("last_duration_ms"),
|
|
||||||
next_run_at=row.get("next_run_at"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return states
|
|
||||||
|
|
||||||
def record_cluster_state(self, snapshot: dict[str, Any]) -> None:
|
def record_cluster_state(self, snapshot: dict[str, Any]) -> None:
|
||||||
payload = json.dumps(snapshot, ensure_ascii=True)
|
payload = json.dumps(snapshot, ensure_ascii=True)
|
||||||
self._db.execute(
|
self._db.execute(
|
||||||
|
|||||||
@ -43,7 +43,6 @@ class CronScheduler:
|
|||||||
def start(self) -> None:
|
def start(self) -> None:
|
||||||
if self._thread and self._thread.is_alive():
|
if self._thread and self._thread.is_alive():
|
||||||
return
|
return
|
||||||
self._hydrate_schedule_metrics()
|
|
||||||
self._stop_event.clear()
|
self._stop_event.clear()
|
||||||
self._thread = threading.Thread(target=self._run_loop, name="ariadne-scheduler", daemon=True)
|
self._thread = threading.Thread(target=self._run_loop, name="ariadne-scheduler", daemon=True)
|
||||||
self._thread.start()
|
self._thread.start()
|
||||||
@ -86,40 +85,6 @@ class CronScheduler:
|
|||||||
)
|
)
|
||||||
time.sleep(self._tick_sec)
|
time.sleep(self._tick_sec)
|
||||||
|
|
||||||
def _hydrate_schedule_metrics(self) -> None:
|
|
||||||
try:
|
|
||||||
states = self._storage.list_schedule_states()
|
|
||||||
except AttributeError:
|
|
||||||
return
|
|
||||||
except Exception as exc:
|
|
||||||
self._logger.warning(
|
|
||||||
"schedule metric hydration failed",
|
|
||||||
extra={"event": "schedule_hydration_error", "detail": str(exc)},
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
known_tasks = set(self._tasks)
|
|
||||||
for state in states:
|
|
||||||
if state.task_name not in known_tasks:
|
|
||||||
continue
|
|
||||||
last_finished = state.last_finished_at or state.last_started_at
|
|
||||||
last_success = last_finished if state.last_status == "ok" else None
|
|
||||||
if state.last_status == "ok":
|
|
||||||
ok: bool | None = True
|
|
||||||
elif state.last_status == "error":
|
|
||||||
ok = False
|
|
||||||
else:
|
|
||||||
ok = None
|
|
||||||
record_schedule_state(
|
|
||||||
state.task_name,
|
|
||||||
state.last_started_at.timestamp() if state.last_started_at else None,
|
|
||||||
last_success.timestamp() if last_success else None,
|
|
||||||
self._next_run.get(state.task_name).timestamp()
|
|
||||||
if self._next_run.get(state.task_name)
|
|
||||||
else None,
|
|
||||||
ok,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _execute_task(self, task: CronTask) -> None:
|
def _execute_task(self, task: CronTask) -> None:
|
||||||
started = datetime.now(timezone.utc)
|
started = datetime.now(timezone.utc)
|
||||||
status = "ok"
|
status = "ok"
|
||||||
|
|||||||
@ -4,7 +4,6 @@ from dataclasses import dataclass
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from ariadne.db.storage import ScheduleState
|
|
||||||
from ariadne.scheduler.cron import CronScheduler, CronTask
|
from ariadne.scheduler.cron import CronScheduler, CronTask
|
||||||
|
|
||||||
|
|
||||||
@ -23,9 +22,6 @@ class DummyStorage:
|
|||||||
def record_event(self, *args, **kwargs):
|
def record_event(self, *args, **kwargs):
|
||||||
self.events.append((args, kwargs))
|
self.events.append((args, kwargs))
|
||||||
|
|
||||||
def list_schedule_states(self):
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def test_execute_task_records_failure() -> None:
|
def test_execute_task_records_failure() -> None:
|
||||||
storage = DummyStorage()
|
storage = DummyStorage()
|
||||||
@ -91,45 +87,6 @@ def test_scheduler_start_skips_when_running() -> None:
|
|||||||
assert scheduler._thread.started is False
|
assert scheduler._thread.started is False
|
||||||
|
|
||||||
|
|
||||||
def test_scheduler_start_hydrates_persisted_schedule_metrics(monkeypatch) -> None:
|
|
||||||
class HydratingStorage(DummyStorage):
|
|
||||||
def list_schedule_states(self):
|
|
||||||
finished = datetime(2026, 1, 1, 12, 0, tzinfo=timezone.utc)
|
|
||||||
return [
|
|
||||||
ScheduleState(
|
|
||||||
task_name="nightly",
|
|
||||||
cron_expr="30 4 * * *",
|
|
||||||
last_started_at=finished,
|
|
||||||
last_finished_at=finished,
|
|
||||||
last_status="ok",
|
|
||||||
last_error=None,
|
|
||||||
last_duration_ms=100,
|
|
||||||
next_run_at=None,
|
|
||||||
),
|
|
||||||
ScheduleState(
|
|
||||||
task_name="unknown",
|
|
||||||
cron_expr="* * * * *",
|
|
||||||
last_started_at=finished,
|
|
||||||
last_finished_at=finished,
|
|
||||||
last_status="ok",
|
|
||||||
last_error=None,
|
|
||||||
last_duration_ms=100,
|
|
||||||
next_run_at=None,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
recorded = []
|
|
||||||
monkeypatch.setattr("ariadne.scheduler.cron.record_schedule_state", lambda *args: recorded.append(args))
|
|
||||||
|
|
||||||
scheduler = CronScheduler(HydratingStorage(), tick_sec=0.01)
|
|
||||||
scheduler.add_task("nightly", "30 4 * * *", lambda: None)
|
|
||||||
scheduler.start()
|
|
||||||
scheduler.stop()
|
|
||||||
|
|
||||||
assert any(item[0] == "nightly" and item[4] is True for item in recorded)
|
|
||||||
assert not any(item[0] == "unknown" for item in recorded)
|
|
||||||
|
|
||||||
|
|
||||||
def test_compute_next_handles_naive_timestamp() -> None:
|
def test_compute_next_handles_naive_timestamp() -> None:
|
||||||
scheduler = CronScheduler(DummyStorage(), tick_sec=0.1)
|
scheduler = CronScheduler(DummyStorage(), tick_sec=0.1)
|
||||||
base = datetime(2024, 1, 1, 12, 0, 0)
|
base = datetime(2024, 1, 1, 12, 0, 0)
|
||||||
|
|||||||
@ -345,31 +345,6 @@ def test_update_schedule_state_executes() -> None:
|
|||||||
assert db.executed
|
assert db.executed
|
||||||
|
|
||||||
|
|
||||||
def test_list_schedule_states_returns_valid_rows() -> None:
|
|
||||||
db = DummyDB()
|
|
||||||
now = datetime.now()
|
|
||||||
db.rows = [
|
|
||||||
{
|
|
||||||
"task_name": "schedule.nightly",
|
|
||||||
"cron_expr": "30 4 * * *",
|
|
||||||
"last_started_at": now,
|
|
||||||
"last_finished_at": now,
|
|
||||||
"last_status": "ok",
|
|
||||||
"last_error": None,
|
|
||||||
"last_duration_ms": 10,
|
|
||||||
"next_run_at": None,
|
|
||||||
},
|
|
||||||
{"task_name": None, "cron_expr": "bad"},
|
|
||||||
]
|
|
||||||
storage = Storage(db)
|
|
||||||
|
|
||||||
states = storage.list_schedule_states()
|
|
||||||
|
|
||||||
assert len(states) == 1
|
|
||||||
assert states[0].task_name == "schedule.nightly"
|
|
||||||
assert states[0].last_status == "ok"
|
|
||||||
|
|
||||||
|
|
||||||
def test_record_cluster_state_executes() -> None:
|
def test_record_cluster_state_executes() -> None:
|
||||||
db = DummyDB()
|
db = DummyDB()
|
||||||
storage = Storage(db)
|
storage = Storage(db)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user