From 1dcc37e8a7dfc795fb277844ac2c5bb5bb00355e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 12 Apr 2026 04:49:25 -0300 Subject: [PATCH] ariadne: add scheduled jenkins workspace pvc cleanup --- ariadne/app.py | 7 + ariadne/services/jenkins_workspace_cleanup.py | 232 ++++++++++++++++++ ariadne/settings.py | 18 ++ tests/test_app.py | 1 + tests/test_jenkins_workspace_cleanup.py | 150 +++++++++++ 5 files changed, 408 insertions(+) create mode 100644 ariadne/services/jenkins_workspace_cleanup.py create mode 100644 tests/test_jenkins_workspace_cleanup.py diff --git a/ariadne/app.py b/ariadne/app.py index 0805c60..5e9ecb1 100644 --- a/ariadne/app.py +++ b/ariadne/app.py @@ -25,6 +25,7 @@ from .services.mailu import mailu from .services.mailu_events import mailu_events from .services.nextcloud import nextcloud from .services.image_sweeper import image_sweeper +from .services.jenkins_workspace_cleanup import cleanup_jenkins_workspace_storage from .services.metis import metis from .services.metis_token_sync import metis_token_sync from .services.opensearch_prune import prune_indices @@ -327,6 +328,11 @@ def _startup() -> None: settings.platform_quality_suite_probe_cron, lambda: platform_quality_probe.run(wait=True), ) + scheduler.add_task( + "schedule.jenkins_workspace_cleanup", + settings.jenkins_workspace_cleanup_cron, + cleanup_jenkins_workspace_storage, + ) scheduler.add_task( "schedule.vault_k8s_auth", settings.vault_k8s_auth_cron, @@ -382,6 +388,7 @@ def _startup() -> None: "metis_sentinel_watch_cron": settings.metis_sentinel_watch_cron, "metis_k3s_token_sync_cron": settings.metis_k3s_token_sync_cron, "platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron, + "jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron, "vault_k8s_auth_cron": settings.vault_k8s_auth_cron, "vault_oidc_cron": settings.vault_oidc_cron, "comms_guest_name_cron": settings.comms_guest_name_cron, diff --git a/ariadne/services/jenkins_workspace_cleanup.py b/ariadne/services/jenkins_workspace_cleanup.py new file mode 100644 index 0000000..bf8d1b3 --- /dev/null +++ b/ariadne/services/jenkins_workspace_cleanup.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Any + +from ..k8s.client import delete_json, get_json +from ..settings import settings +from ..utils.logging import get_logger + + +logger = get_logger(__name__) + + +@dataclass(frozen=True) +class JenkinsWorkspaceCleanupSummary: + """Summarize one Jenkins workspace-storage cleanup pass. + + Inputs: Kubernetes PV/PVC/Longhorn objects fetched from the API server. + Outputs: deterministic counters for operator logs and metrics. + """ + + pvs_deleted: int + pvcs_deleted: int + volumes_deleted: int + skipped: int + failures: int + + +def _parse_timestamp(raw: str) -> datetime | None: + """Parse Kubernetes RFC3339 timestamps into timezone-aware datetimes.""" + + normalized = raw.replace("Z", "+00:00") + try: + return datetime.fromisoformat(normalized) + except ValueError: + return None + + +def _is_old_enough(metadata: dict[str, Any]) -> bool: + """Return true when an object age exceeds the configured cleanup threshold.""" + + raw = metadata.get("creationTimestamp") + if not isinstance(raw, str) or not raw: + return True + created_at = _parse_timestamp(raw) + if created_at is None: + return True + min_age = timedelta(hours=settings.jenkins_workspace_cleanup_min_age_hours) + return datetime.now(timezone.utc) - created_at >= min_age + + +def _active_workspace_claims() -> set[str]: + """Collect currently referenced Jenkins workspace PVC names from pods.""" + + namespace = settings.jenkins_workspace_namespace + prefix = settings.jenkins_workspace_pvc_prefix + payload = get_json(f"/api/v1/namespaces/{namespace}/pods") + items = payload.get("items") if isinstance(payload.get("items"), list) else [] + active: set[str] = set() + for pod in items: + if not isinstance(pod, dict): + continue + spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {} + volumes = spec.get("volumes") if isinstance(spec.get("volumes"), list) else [] + for volume in volumes: + if not isinstance(volume, dict): + continue + claim = volume.get("persistentVolumeClaim") + if not isinstance(claim, dict): + continue + claim_name = claim.get("claimName") + if isinstance(claim_name, str) and claim_name.startswith(prefix): + active.add(claim_name) + return active + + +def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[dict[str, Any]], set[str]]: + """Find releasable Jenkins workspace PVs and keep a set of all PV names.""" + + namespace = settings.jenkins_workspace_namespace + prefix = settings.jenkins_workspace_pvc_prefix + payload = get_json("/api/v1/persistentvolumes") + items = payload.get("items") if isinstance(payload.get("items"), list) else [] + candidates: list[dict[str, Any]] = [] + all_pv_names: set[str] = set() + + for pv in items: + if not isinstance(pv, dict): + continue + metadata = pv.get("metadata") if isinstance(pv.get("metadata"), dict) else {} + status = pv.get("status") if isinstance(pv.get("status"), dict) else {} + spec = pv.get("spec") if isinstance(pv.get("spec"), dict) else {} + name = metadata.get("name") + if isinstance(name, str) and name: + all_pv_names.add(name) + + claim_ref = spec.get("claimRef") if isinstance(spec.get("claimRef"), dict) else {} + claim_namespace = claim_ref.get("namespace") + claim_name = claim_ref.get("name") + phase = status.get("phase") + if claim_namespace != namespace: + continue + if not isinstance(claim_name, str) or not claim_name.startswith(prefix): + continue + if claim_name in active_claims: + continue + if phase not in {"Released", "Failed"}: + continue + if not _is_old_enough(metadata): + continue + candidates.append(pv) + return candidates, all_pv_names + + +def _workspace_pvc_candidates(active_claims: set[str]) -> list[dict[str, Any]]: + """Find stale Jenkins workspace PVCs that are not actively referenced.""" + + namespace = settings.jenkins_workspace_namespace + prefix = settings.jenkins_workspace_pvc_prefix + payload = get_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims") + items = payload.get("items") if isinstance(payload.get("items"), list) else [] + candidates: list[dict[str, Any]] = [] + + for pvc in items: + if not isinstance(pvc, dict): + continue + metadata = pvc.get("metadata") if isinstance(pvc.get("metadata"), dict) else {} + status = pvc.get("status") if isinstance(pvc.get("status"), dict) else {} + claim_name = metadata.get("name") + phase = status.get("phase") + if not isinstance(claim_name, str) or not claim_name.startswith(prefix): + continue + if claim_name in active_claims: + continue + if phase == "Bound": + continue + if not _is_old_enough(metadata): + continue + candidates.append(pvc) + return candidates + + +def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: + """Delete stale Jenkins workspace PVC/PV artifacts and orphan Longhorn volumes.""" + + namespace = settings.jenkins_workspace_namespace + prefix = settings.jenkins_workspace_pvc_prefix + pvs_deleted = 0 + pvcs_deleted = 0 + volumes_deleted = 0 + skipped = 0 + failures = 0 + + active_claims = _active_workspace_claims() + stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims) + stale_pvcs = _workspace_pvc_candidates(active_claims) + removed_pv_names: set[str] = set() + + for pvc in stale_pvcs: + metadata = pvc.get("metadata") if isinstance(pvc.get("metadata"), dict) else {} + claim_name = metadata.get("name") + if not isinstance(claim_name, str) or not claim_name: + skipped += 1 + continue + try: + delete_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims/{claim_name}") + pvcs_deleted += 1 + except Exception as exc: + failures += 1 + logger.info( + "jenkins workspace pvc delete failed", + extra={"event": "jenkins_workspace_cleanup", "claim": claim_name, "detail": str(exc)}, + ) + + for pv in stale_pvs: + metadata = pv.get("metadata") if isinstance(pv.get("metadata"), dict) else {} + pv_name = metadata.get("name") + if not isinstance(pv_name, str) or not pv_name: + skipped += 1 + continue + try: + delete_json(f"/api/v1/persistentvolumes/{pv_name}") + removed_pv_names.add(pv_name) + pvs_deleted += 1 + except Exception as exc: + failures += 1 + logger.info( + "jenkins workspace pv delete failed", + extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)}, + ) + + payload = get_json("/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes") + items = payload.get("items") if isinstance(payload.get("items"), list) else [] + for volume in items: + if not isinstance(volume, dict): + continue + metadata = volume.get("metadata") if isinstance(volume.get("metadata"), dict) else {} + name = metadata.get("name") + if not isinstance(name, str) or not name: + skipped += 1 + continue + should_delete = name in removed_pv_names + if not should_delete: + labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {} + pvc_name = labels.get("kubernetes.io/created-for/pvc/name") + should_delete = ( + isinstance(pvc_name, str) + and pvc_name.startswith(prefix) + and name not in all_pv_names + ) + if not should_delete: + continue + if not _is_old_enough(metadata): + continue + try: + delete_json(f"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/{name}") + volumes_deleted += 1 + except Exception as exc: + failures += 1 + logger.info( + "jenkins workspace longhorn volume delete failed", + extra={"event": "jenkins_workspace_cleanup", "volume": name, "detail": str(exc)}, + ) + + return JenkinsWorkspaceCleanupSummary( + pvs_deleted=pvs_deleted, + pvcs_deleted=pvcs_deleted, + volumes_deleted=volumes_deleted, + skipped=skipped, + failures=failures, + ) diff --git a/ariadne/settings.py b/ariadne/settings.py index c17a1dd..239837b 100644 --- a/ariadne/settings.py +++ b/ariadne/settings.py @@ -168,6 +168,9 @@ class Settings: platform_quality_probe_wait_timeout_sec: float platform_quality_probe_pushgateway_url: str platform_quality_probe_http_timeout_sec: int + jenkins_workspace_namespace: str + jenkins_workspace_pvc_prefix: str + jenkins_workspace_cleanup_min_age_hours: float vaultwarden_namespace: str vaultwarden_pod_label: str @@ -234,6 +237,7 @@ class Settings: metis_token_sync_vault_k8s_role: str metis_k3s_token_sync_cron: str platform_quality_suite_probe_cron: str + jenkins_workspace_cleanup_cron: str opensearch_url: str opensearch_limit_bytes: int @@ -459,6 +463,14 @@ class Settings: "platform_quality_probe_http_timeout_sec": _env_int("PLATFORM_QUALITY_PROBE_HTTP_TIMEOUT_SECONDS", 12), } + @classmethod + def _jenkins_workspace_cleanup_config(cls) -> dict[str, Any]: + return { + "jenkins_workspace_namespace": _env("JENKINS_WORKSPACE_NAMESPACE", "jenkins"), + "jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"), + "jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0), + } + @classmethod def _vaultwarden_config(cls) -> dict[str, Any]: return { @@ -505,6 +517,10 @@ class Settings: "ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE", "*/15 * * * *", ), + "jenkins_workspace_cleanup_cron": _env( + "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP", + "45 */6 * * *", + ), } @classmethod @@ -565,6 +581,7 @@ class Settings: comms_cfg = cls._comms_config() image_cfg = cls._image_sweeper_config() platform_quality_probe_cfg = cls._platform_quality_probe_config() + jenkins_workspace_cleanup_cfg = cls._jenkins_workspace_cleanup_config() vaultwarden_cfg = cls._vaultwarden_config() schedule_cfg = cls._schedule_config() cluster_cfg = cls._cluster_state_config() @@ -605,6 +622,7 @@ class Settings: **comms_cfg, **image_cfg, **platform_quality_probe_cfg, + **jenkins_workspace_cleanup_cfg, **vaultwarden_cfg, **schedule_cfg, **cluster_cfg, diff --git a/tests/test_app.py b/tests/test_app.py index 70dd90c..c5be5a6 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -64,6 +64,7 @@ def test_startup_registers_metis_watch(monkeypatch) -> None: assert any(name == "schedule.metis_sentinel_watch" for name, _cron in tasks) assert any(name == "schedule.metis_k3s_token_sync" for name, _cron in tasks) assert any(name == "schedule.platform_quality_suite_probe" for name, _cron in tasks) + assert any(name == "schedule.jenkins_workspace_cleanup" for name, _cron in tasks) def test_record_event_handles_exception(monkeypatch) -> None: diff --git a/tests/test_jenkins_workspace_cleanup.py b/tests/test_jenkins_workspace_cleanup.py new file mode 100644 index 0000000..d6d521b --- /dev/null +++ b/tests/test_jenkins_workspace_cleanup.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from datetime import datetime, timezone +import types + +from ariadne.services import jenkins_workspace_cleanup as cleanup_module + + +def test_cleanup_jenkins_workspace_storage(monkeypatch) -> None: + dummy_settings = types.SimpleNamespace( + jenkins_workspace_namespace="jenkins", + jenkins_workspace_pvc_prefix="pvc-workspace-", + jenkins_workspace_cleanup_min_age_hours=1.0, + ) + monkeypatch.setattr(cleanup_module, "settings", dummy_settings) + + now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + old_iso = "2020-01-01T00:00:00Z" + deleted_paths: list[str] = [] + + def fake_get_json(path: str): + if path == "/api/v1/namespaces/jenkins/pods": + return { + "items": [ + { + "spec": { + "volumes": [ + {"persistentVolumeClaim": {"claimName": "pvc-workspace-active"}}, + ] + } + } + ] + } + if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims": + return { + "items": [ + { + "metadata": {"name": "pvc-workspace-stale", "creationTimestamp": old_iso}, + "status": {"phase": "Lost"}, + }, + { + "metadata": {"name": "pvc-workspace-active", "creationTimestamp": old_iso}, + "status": {"phase": "Bound"}, + }, + { + "metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso}, + "status": {"phase": "Lost"}, + }, + ] + } + if path == "/api/v1/persistentvolumes": + return { + "items": [ + { + "metadata": {"name": "pvc-old", "creationTimestamp": old_iso}, + "status": {"phase": "Released"}, + "spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-stale"}}, + }, + { + "metadata": {"name": "pvc-active", "creationTimestamp": old_iso}, + "status": {"phase": "Released"}, + "spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-active"}}, + }, + { + "metadata": {"name": "pvc-fresh", "creationTimestamp": now_iso}, + "status": {"phase": "Released"}, + "spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}}, + }, + ] + } + if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": + return { + "items": [ + {"metadata": {"name": "pvc-old", "creationTimestamp": old_iso}}, + { + "metadata": { + "name": "pvc-orphan", + "creationTimestamp": old_iso, + "labels": { + "kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan", + }, + } + }, + { + "metadata": { + "name": "pvc-orphan-fresh", + "creationTimestamp": now_iso, + "labels": { + "kubernetes.io/created-for/pvc/name": "pvc-workspace-fresh", + }, + } + }, + ] + } + raise AssertionError(f"unexpected path: {path}") + + def fake_delete_json(path: str): + deleted_paths.append(path) + return {"status": "Success"} + + monkeypatch.setattr(cleanup_module, "get_json", fake_get_json) + monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json) + + summary = cleanup_module.cleanup_jenkins_workspace_storage() + + assert summary.pvcs_deleted == 1 + assert summary.pvs_deleted == 1 + assert summary.volumes_deleted == 2 + assert summary.failures == 0 + assert "/api/v1/namespaces/jenkins/persistentvolumeclaims/pvc-workspace-stale" in deleted_paths + assert "/api/v1/persistentvolumes/pvc-old" in deleted_paths + assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-old" in deleted_paths + assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan" in deleted_paths + + +def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None: + dummy_settings = types.SimpleNamespace( + jenkins_workspace_namespace="jenkins", + jenkins_workspace_pvc_prefix="pvc-workspace-", + jenkins_workspace_cleanup_min_age_hours=1.0, + ) + monkeypatch.setattr(cleanup_module, "settings", dummy_settings) + + def fake_get_json(path: str): + if path == "/api/v1/namespaces/jenkins/pods": + return {"items": []} + if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims": + return { + "items": [ + { + "metadata": {"name": "pvc-workspace-stale", "creationTimestamp": "2020-01-01T00:00:00Z"}, + "status": {"phase": "Lost"}, + } + ] + } + if path == "/api/v1/persistentvolumes": + return {"items": []} + if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": + return {"items": []} + raise AssertionError(f"unexpected path: {path}") + + def fake_delete_json(_path: str): + raise RuntimeError("boom") + + monkeypatch.setattr(cleanup_module, "get_json", fake_get_json) + monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json) + + summary = cleanup_module.cleanup_jenkins_workspace_storage() + assert summary.failures == 1 + assert summary.pvcs_deleted == 0