ariadne: add scheduled jenkins workspace pvc cleanup

This commit is contained in:
Brad Stein 2026-04-12 04:49:25 -03:00
parent aa7098efad
commit 1dcc37e8a7
5 changed files with 408 additions and 0 deletions

View File

@ -25,6 +25,7 @@ from .services.mailu import mailu
from .services.mailu_events import mailu_events
from .services.nextcloud import nextcloud
from .services.image_sweeper import image_sweeper
from .services.jenkins_workspace_cleanup import cleanup_jenkins_workspace_storage
from .services.metis import metis
from .services.metis_token_sync import metis_token_sync
from .services.opensearch_prune import prune_indices
@ -327,6 +328,11 @@ def _startup() -> None:
settings.platform_quality_suite_probe_cron,
lambda: platform_quality_probe.run(wait=True),
)
scheduler.add_task(
"schedule.jenkins_workspace_cleanup",
settings.jenkins_workspace_cleanup_cron,
cleanup_jenkins_workspace_storage,
)
scheduler.add_task(
"schedule.vault_k8s_auth",
settings.vault_k8s_auth_cron,
@ -382,6 +388,7 @@ def _startup() -> None:
"metis_sentinel_watch_cron": settings.metis_sentinel_watch_cron,
"metis_k3s_token_sync_cron": settings.metis_k3s_token_sync_cron,
"platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron,
"jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron,
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
"vault_oidc_cron": settings.vault_oidc_cron,
"comms_guest_name_cron": settings.comms_guest_name_cron,

View File

@ -0,0 +1,232 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Any
from ..k8s.client import delete_json, get_json
from ..settings import settings
from ..utils.logging import get_logger
logger = get_logger(__name__)
@dataclass(frozen=True)
class JenkinsWorkspaceCleanupSummary:
"""Summarize one Jenkins workspace-storage cleanup pass.
Inputs: Kubernetes PV/PVC/Longhorn objects fetched from the API server.
Outputs: deterministic counters for operator logs and metrics.
"""
pvs_deleted: int
pvcs_deleted: int
volumes_deleted: int
skipped: int
failures: int
def _parse_timestamp(raw: str) -> datetime | None:
"""Parse Kubernetes RFC3339 timestamps into timezone-aware datetimes."""
normalized = raw.replace("Z", "+00:00")
try:
return datetime.fromisoformat(normalized)
except ValueError:
return None
def _is_old_enough(metadata: dict[str, Any]) -> bool:
"""Return true when an object age exceeds the configured cleanup threshold."""
raw = metadata.get("creationTimestamp")
if not isinstance(raw, str) or not raw:
return True
created_at = _parse_timestamp(raw)
if created_at is None:
return True
min_age = timedelta(hours=settings.jenkins_workspace_cleanup_min_age_hours)
return datetime.now(timezone.utc) - created_at >= min_age
def _active_workspace_claims() -> set[str]:
"""Collect currently referenced Jenkins workspace PVC names from pods."""
namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix
payload = get_json(f"/api/v1/namespaces/{namespace}/pods")
items = payload.get("items") if isinstance(payload.get("items"), list) else []
active: set[str] = set()
for pod in items:
if not isinstance(pod, dict):
continue
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
volumes = spec.get("volumes") if isinstance(spec.get("volumes"), list) else []
for volume in volumes:
if not isinstance(volume, dict):
continue
claim = volume.get("persistentVolumeClaim")
if not isinstance(claim, dict):
continue
claim_name = claim.get("claimName")
if isinstance(claim_name, str) and claim_name.startswith(prefix):
active.add(claim_name)
return active
def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[dict[str, Any]], set[str]]:
"""Find releasable Jenkins workspace PVs and keep a set of all PV names."""
namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix
payload = get_json("/api/v1/persistentvolumes")
items = payload.get("items") if isinstance(payload.get("items"), list) else []
candidates: list[dict[str, Any]] = []
all_pv_names: set[str] = set()
for pv in items:
if not isinstance(pv, dict):
continue
metadata = pv.get("metadata") if isinstance(pv.get("metadata"), dict) else {}
status = pv.get("status") if isinstance(pv.get("status"), dict) else {}
spec = pv.get("spec") if isinstance(pv.get("spec"), dict) else {}
name = metadata.get("name")
if isinstance(name, str) and name:
all_pv_names.add(name)
claim_ref = spec.get("claimRef") if isinstance(spec.get("claimRef"), dict) else {}
claim_namespace = claim_ref.get("namespace")
claim_name = claim_ref.get("name")
phase = status.get("phase")
if claim_namespace != namespace:
continue
if not isinstance(claim_name, str) or not claim_name.startswith(prefix):
continue
if claim_name in active_claims:
continue
if phase not in {"Released", "Failed"}:
continue
if not _is_old_enough(metadata):
continue
candidates.append(pv)
return candidates, all_pv_names
def _workspace_pvc_candidates(active_claims: set[str]) -> list[dict[str, Any]]:
"""Find stale Jenkins workspace PVCs that are not actively referenced."""
namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix
payload = get_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims")
items = payload.get("items") if isinstance(payload.get("items"), list) else []
candidates: list[dict[str, Any]] = []
for pvc in items:
if not isinstance(pvc, dict):
continue
metadata = pvc.get("metadata") if isinstance(pvc.get("metadata"), dict) else {}
status = pvc.get("status") if isinstance(pvc.get("status"), dict) else {}
claim_name = metadata.get("name")
phase = status.get("phase")
if not isinstance(claim_name, str) or not claim_name.startswith(prefix):
continue
if claim_name in active_claims:
continue
if phase == "Bound":
continue
if not _is_old_enough(metadata):
continue
candidates.append(pvc)
return candidates
def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
"""Delete stale Jenkins workspace PVC/PV artifacts and orphan Longhorn volumes."""
namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix
pvs_deleted = 0
pvcs_deleted = 0
volumes_deleted = 0
skipped = 0
failures = 0
active_claims = _active_workspace_claims()
stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims)
stale_pvcs = _workspace_pvc_candidates(active_claims)
removed_pv_names: set[str] = set()
for pvc in stale_pvcs:
metadata = pvc.get("metadata") if isinstance(pvc.get("metadata"), dict) else {}
claim_name = metadata.get("name")
if not isinstance(claim_name, str) or not claim_name:
skipped += 1
continue
try:
delete_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims/{claim_name}")
pvcs_deleted += 1
except Exception as exc:
failures += 1
logger.info(
"jenkins workspace pvc delete failed",
extra={"event": "jenkins_workspace_cleanup", "claim": claim_name, "detail": str(exc)},
)
for pv in stale_pvs:
metadata = pv.get("metadata") if isinstance(pv.get("metadata"), dict) else {}
pv_name = metadata.get("name")
if not isinstance(pv_name, str) or not pv_name:
skipped += 1
continue
try:
delete_json(f"/api/v1/persistentvolumes/{pv_name}")
removed_pv_names.add(pv_name)
pvs_deleted += 1
except Exception as exc:
failures += 1
logger.info(
"jenkins workspace pv delete failed",
extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)},
)
payload = get_json("/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes")
items = payload.get("items") if isinstance(payload.get("items"), list) else []
for volume in items:
if not isinstance(volume, dict):
continue
metadata = volume.get("metadata") if isinstance(volume.get("metadata"), dict) else {}
name = metadata.get("name")
if not isinstance(name, str) or not name:
skipped += 1
continue
should_delete = name in removed_pv_names
if not should_delete:
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
pvc_name = labels.get("kubernetes.io/created-for/pvc/name")
should_delete = (
isinstance(pvc_name, str)
and pvc_name.startswith(prefix)
and name not in all_pv_names
)
if not should_delete:
continue
if not _is_old_enough(metadata):
continue
try:
delete_json(f"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/{name}")
volumes_deleted += 1
except Exception as exc:
failures += 1
logger.info(
"jenkins workspace longhorn volume delete failed",
extra={"event": "jenkins_workspace_cleanup", "volume": name, "detail": str(exc)},
)
return JenkinsWorkspaceCleanupSummary(
pvs_deleted=pvs_deleted,
pvcs_deleted=pvcs_deleted,
volumes_deleted=volumes_deleted,
skipped=skipped,
failures=failures,
)

View File

@ -168,6 +168,9 @@ class Settings:
platform_quality_probe_wait_timeout_sec: float
platform_quality_probe_pushgateway_url: str
platform_quality_probe_http_timeout_sec: int
jenkins_workspace_namespace: str
jenkins_workspace_pvc_prefix: str
jenkins_workspace_cleanup_min_age_hours: float
vaultwarden_namespace: str
vaultwarden_pod_label: str
@ -234,6 +237,7 @@ class Settings:
metis_token_sync_vault_k8s_role: str
metis_k3s_token_sync_cron: str
platform_quality_suite_probe_cron: str
jenkins_workspace_cleanup_cron: str
opensearch_url: str
opensearch_limit_bytes: int
@ -459,6 +463,14 @@ class Settings:
"platform_quality_probe_http_timeout_sec": _env_int("PLATFORM_QUALITY_PROBE_HTTP_TIMEOUT_SECONDS", 12),
}
@classmethod
def _jenkins_workspace_cleanup_config(cls) -> dict[str, Any]:
return {
"jenkins_workspace_namespace": _env("JENKINS_WORKSPACE_NAMESPACE", "jenkins"),
"jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"),
"jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0),
}
@classmethod
def _vaultwarden_config(cls) -> dict[str, Any]:
return {
@ -505,6 +517,10 @@ class Settings:
"ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE",
"*/15 * * * *",
),
"jenkins_workspace_cleanup_cron": _env(
"ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP",
"45 */6 * * *",
),
}
@classmethod
@ -565,6 +581,7 @@ class Settings:
comms_cfg = cls._comms_config()
image_cfg = cls._image_sweeper_config()
platform_quality_probe_cfg = cls._platform_quality_probe_config()
jenkins_workspace_cleanup_cfg = cls._jenkins_workspace_cleanup_config()
vaultwarden_cfg = cls._vaultwarden_config()
schedule_cfg = cls._schedule_config()
cluster_cfg = cls._cluster_state_config()
@ -605,6 +622,7 @@ class Settings:
**comms_cfg,
**image_cfg,
**platform_quality_probe_cfg,
**jenkins_workspace_cleanup_cfg,
**vaultwarden_cfg,
**schedule_cfg,
**cluster_cfg,

View File

@ -64,6 +64,7 @@ def test_startup_registers_metis_watch(monkeypatch) -> None:
assert any(name == "schedule.metis_sentinel_watch" for name, _cron in tasks)
assert any(name == "schedule.metis_k3s_token_sync" for name, _cron in tasks)
assert any(name == "schedule.platform_quality_suite_probe" for name, _cron in tasks)
assert any(name == "schedule.jenkins_workspace_cleanup" for name, _cron in tasks)
def test_record_event_handles_exception(monkeypatch) -> None:

View File

@ -0,0 +1,150 @@
from __future__ import annotations
from datetime import datetime, timezone
import types
from ariadne.services import jenkins_workspace_cleanup as cleanup_module
def test_cleanup_jenkins_workspace_storage(monkeypatch) -> None:
dummy_settings = types.SimpleNamespace(
jenkins_workspace_namespace="jenkins",
jenkins_workspace_pvc_prefix="pvc-workspace-",
jenkins_workspace_cleanup_min_age_hours=1.0,
)
monkeypatch.setattr(cleanup_module, "settings", dummy_settings)
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
old_iso = "2020-01-01T00:00:00Z"
deleted_paths: list[str] = []
def fake_get_json(path: str):
if path == "/api/v1/namespaces/jenkins/pods":
return {
"items": [
{
"spec": {
"volumes": [
{"persistentVolumeClaim": {"claimName": "pvc-workspace-active"}},
]
}
}
]
}
if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims":
return {
"items": [
{
"metadata": {"name": "pvc-workspace-stale", "creationTimestamp": old_iso},
"status": {"phase": "Lost"},
},
{
"metadata": {"name": "pvc-workspace-active", "creationTimestamp": old_iso},
"status": {"phase": "Bound"},
},
{
"metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso},
"status": {"phase": "Lost"},
},
]
}
if path == "/api/v1/persistentvolumes":
return {
"items": [
{
"metadata": {"name": "pvc-old", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-stale"}},
},
{
"metadata": {"name": "pvc-active", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-active"}},
},
{
"metadata": {"name": "pvc-fresh", "creationTimestamp": now_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}},
},
]
}
if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes":
return {
"items": [
{"metadata": {"name": "pvc-old", "creationTimestamp": old_iso}},
{
"metadata": {
"name": "pvc-orphan",
"creationTimestamp": old_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
},
}
},
{
"metadata": {
"name": "pvc-orphan-fresh",
"creationTimestamp": now_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-fresh",
},
}
},
]
}
raise AssertionError(f"unexpected path: {path}")
def fake_delete_json(path: str):
deleted_paths.append(path)
return {"status": "Success"}
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
summary = cleanup_module.cleanup_jenkins_workspace_storage()
assert summary.pvcs_deleted == 1
assert summary.pvs_deleted == 1
assert summary.volumes_deleted == 2
assert summary.failures == 0
assert "/api/v1/namespaces/jenkins/persistentvolumeclaims/pvc-workspace-stale" in deleted_paths
assert "/api/v1/persistentvolumes/pvc-old" in deleted_paths
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-old" in deleted_paths
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan" in deleted_paths
def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None:
dummy_settings = types.SimpleNamespace(
jenkins_workspace_namespace="jenkins",
jenkins_workspace_pvc_prefix="pvc-workspace-",
jenkins_workspace_cleanup_min_age_hours=1.0,
)
monkeypatch.setattr(cleanup_module, "settings", dummy_settings)
def fake_get_json(path: str):
if path == "/api/v1/namespaces/jenkins/pods":
return {"items": []}
if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims":
return {
"items": [
{
"metadata": {"name": "pvc-workspace-stale", "creationTimestamp": "2020-01-01T00:00:00Z"},
"status": {"phase": "Lost"},
}
]
}
if path == "/api/v1/persistentvolumes":
return {"items": []}
if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes":
return {"items": []}
raise AssertionError(f"unexpected path: {path}")
def fake_delete_json(_path: str):
raise RuntimeError("boom")
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
summary = cleanup_module.cleanup_jenkins_workspace_storage()
assert summary.failures == 1
assert summary.pvcs_deleted == 0