diff --git a/ariadne/app.py b/ariadne/app.py index 67492b0..26fde42 100644 --- a/ariadne/app.py +++ b/ariadne/app.py @@ -390,6 +390,7 @@ def _startup() -> None: "platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron, "jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron, "jenkins_workspace_cleanup_dry_run": settings.jenkins_workspace_cleanup_dry_run, + "jenkins_workspace_cleanup_max_deletions_per_run": settings.jenkins_workspace_cleanup_max_deletions_per_run, "vault_k8s_auth_cron": settings.vault_k8s_auth_cron, "vault_oidc_cron": settings.vault_oidc_cron, "comms_guest_name_cron": settings.comms_guest_name_cron, diff --git a/ariadne/services/jenkins_workspace_cleanup.py b/ariadne/services/jenkins_workspace_cleanup.py index c731ab9..aecca73 100644 --- a/ariadne/services/jenkins_workspace_cleanup.py +++ b/ariadne/services/jenkins_workspace_cleanup.py @@ -120,6 +120,11 @@ def _is_old_enough(metadata: dict[str, Any]) -> bool: return datetime.now(timezone.utc) - created_at >= min_age +def _is_deleting(metadata: dict[str, Any]) -> bool: + deletion_ts = metadata.get("deletionTimestamp") + return isinstance(deletion_ts, str) and bool(deletion_ts.strip()) + + def _is_workspace_name(name: Any) -> bool: return isinstance(name, str) and name.startswith(settings.jenkins_workspace_pvc_prefix) @@ -128,7 +133,6 @@ def _active_workspace_claims() -> set[str]: """Collect currently referenced Jenkins workspace PVC names from pods.""" namespace = settings.jenkins_workspace_namespace - prefix = settings.jenkins_workspace_pvc_prefix payload = get_json(f"/api/v1/namespaces/{namespace}/pods") items = payload.get("items") if isinstance(payload.get("items"), list) else [] active: set[str] = set() @@ -158,7 +162,6 @@ def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[_CleanupCand """Find releasable Jenkins workspace PVs and keep a set of all PV names.""" namespace = settings.jenkins_workspace_namespace - prefix = settings.jenkins_workspace_pvc_prefix payload = get_json("/api/v1/persistentvolumes") items = payload.get("items") if isinstance(payload.get("items"), list) else [] candidates: list[_CleanupCandidate] = [] @@ -182,6 +185,8 @@ def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[_CleanupCand continue if not _is_workspace_name(claim_name): continue + if _is_deleting(metadata): + continue if claim_name in active_claims: continue if phase not in {"Released", "Failed"}: @@ -219,6 +224,8 @@ def _workspace_pvc_candidates(active_claims: set[str]) -> list[_CleanupCandidate phase = status.get("phase") if not _is_workspace_name(claim_name): continue + if _is_deleting(metadata): + continue if claim_name in active_claims: continue if phase == "Bound": @@ -267,6 +274,8 @@ def _workspace_longhorn_candidates(all_pv_names: set[str], removed_pv_names: set should_delete = True if not should_delete: continue + if _is_deleting(metadata): + continue if not _is_old_enough(metadata): continue if state not in {None, "detached", "faulted", "unknown"}: @@ -334,6 +343,8 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: namespace = settings.jenkins_workspace_namespace dry_run = settings.jenkins_workspace_cleanup_dry_run + max_deletions = settings.jenkins_workspace_cleanup_max_deletions_per_run + prefix = settings.jenkins_workspace_pvc_prefix.strip() pvs_deleted = 0 pvcs_deleted = 0 volumes_deleted = 0 @@ -345,10 +356,21 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: summary: JenkinsWorkspaceCleanupSummary try: + if not namespace.strip(): + raise ValueError("jenkins workspace cleanup namespace is empty") + if not prefix: + raise ValueError("jenkins workspace cleanup pvc prefix is empty") + if settings.jenkins_workspace_cleanup_min_age_hours < 1.0: + raise ValueError("jenkins workspace cleanup min age must be >= 1 hour") + if max_deletions < 1: + raise ValueError("jenkins workspace cleanup max deletions must be >= 1") + active_claims = _active_workspace_claims() stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims) stale_pvcs = _workspace_pvc_candidates(active_claims) removed_pv_names: set[str] = set() + stale_volumes = _workspace_longhorn_candidates(all_pv_names, removed_pv_names) + planned_total = len(stale_pvs) + len(stale_pvcs) + len(stale_volumes) if dry_run: logger.info( @@ -360,8 +382,39 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: "dry_run": True, "planned_pvs": len(stale_pvs), "planned_pvcs": len(stale_pvcs), + "planned_volumes": len(stale_volumes), + "max_deletions": max_deletions, }, ) + elif planned_total > max_deletions: + failures += 1 + logger.warning( + "jenkins workspace cleanup blocked by max deletions guard", + extra={ + "event": "jenkins_workspace_cleanup", + "status": "guard_blocked", + "namespace": namespace, + "dry_run": False, + "planned_total": planned_total, + "max_deletions": max_deletions, + "planned_pvs": len(stale_pvs), + "planned_pvcs": len(stale_pvcs), + "planned_volumes": len(stale_volumes), + }, + ) + summary = JenkinsWorkspaceCleanupSummary( + pvs_planned=len(stale_pvs), + pvcs_planned=len(stale_pvcs), + volumes_planned=len(stale_volumes), + pvs_deleted=0, + pvcs_deleted=0, + volumes_deleted=0, + skipped=skipped, + failures=failures, + dry_run=dry_run, + ) + _record_metrics(summary) + return summary for pvc in stale_pvcs: claim_name = pvc.name @@ -369,7 +422,6 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: skipped += 1 continue if dry_run: - pvcs_deleted += 1 continue try: delete_json(pvc.path) @@ -387,8 +439,6 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: skipped += 1 continue if dry_run: - pvs_deleted += 1 - removed_pv_names.add(pv_name) continue try: delete_json(pv.path) @@ -401,13 +451,13 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)}, ) + # Recompute longhorn candidates using the updated removed PV list. stale_volumes = _workspace_longhorn_candidates(all_pv_names, removed_pv_names) for volume in stale_volumes: if not volume.name: skipped += 1 continue if dry_run: - volumes_deleted += 1 continue try: delete_json(volume.path) diff --git a/ariadne/settings.py b/ariadne/settings.py index 8d4712d..80c6e45 100644 --- a/ariadne/settings.py +++ b/ariadne/settings.py @@ -172,6 +172,7 @@ class Settings: jenkins_workspace_pvc_prefix: str jenkins_workspace_cleanup_min_age_hours: float jenkins_workspace_cleanup_dry_run: bool + jenkins_workspace_cleanup_max_deletions_per_run: int vaultwarden_namespace: str vaultwarden_pod_label: str @@ -471,6 +472,10 @@ class Settings: "jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"), "jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0), "jenkins_workspace_cleanup_dry_run": _env_bool("JENKINS_WORKSPACE_CLEANUP_DRY_RUN", "false"), + "jenkins_workspace_cleanup_max_deletions_per_run": _env_int( + "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN", + 20, + ), } @classmethod diff --git a/testing/test_jenkins_workspace_cleanup.py b/testing/test_jenkins_workspace_cleanup.py index da109fb..afab707 100644 --- a/testing/test_jenkins_workspace_cleanup.py +++ b/testing/test_jenkins_workspace_cleanup.py @@ -13,12 +13,13 @@ def _metric_value(name: str, labels: dict[str, str]) -> float: return float(value) if value is not None else 0.0 -def _dummy_settings(*, dry_run: bool) -> types.SimpleNamespace: +def _dummy_settings(*, dry_run: bool, max_deletions: int = 20) -> types.SimpleNamespace: return types.SimpleNamespace( jenkins_workspace_namespace="jenkins", jenkins_workspace_pvc_prefix="pvc-workspace-", jenkins_workspace_cleanup_min_age_hours=1.0, jenkins_workspace_cleanup_dry_run=dry_run, + jenkins_workspace_cleanup_max_deletions_per_run=max_deletions, ) @@ -58,6 +59,14 @@ def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]: "metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso}, "status": {"phase": "Lost"}, }, + { + "metadata": { + "name": "pvc-workspace-deleting", + "creationTimestamp": old_iso, + "deletionTimestamp": old_iso, + }, + "status": {"phase": "Lost"}, + }, ] }, "/api/v1/persistentvolumes": { @@ -82,6 +91,15 @@ def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]: "status": {"phase": "Released"}, "spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}}, }, + { + "metadata": { + "name": "pvc-deleting", + "creationTimestamp": old_iso, + "deletionTimestamp": old_iso, + }, + "status": {"phase": "Released"}, + "spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-deleting"}}, + }, ] }, "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": { @@ -116,6 +134,16 @@ def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]: }, } }, + { + "metadata": { + "name": "pvc-vol-deleting", + "creationTimestamp": old_iso, + "deletionTimestamp": old_iso, + "labels": { + "kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan", + }, + } + }, ] }, } @@ -156,9 +184,9 @@ def test_cleanup_jenkins_workspace_storage_dry_run(monkeypatch) -> None: assert summary.pvcs_planned == 1 assert summary.pvs_planned == 1 assert summary.volumes_planned == 2 - assert summary.pvcs_deleted == 1 - assert summary.pvs_deleted == 1 - assert summary.volumes_deleted == 2 + assert summary.pvcs_deleted == 0 + assert summary.pvs_deleted == 0 + assert summary.volumes_deleted == 0 assert summary.failures == 0 assert deleted_paths == [] assert _metric_value( @@ -260,3 +288,35 @@ def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None: "ariadne_jenkins_workspace_cleanup_objects_total", {"kind": "cleanup", "action": "failed", "mode": "delete"}, ) == before_failures + 1 + + +def test_cleanup_jenkins_workspace_storage_guard_blocks_mass_delete(monkeypatch) -> None: + monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False, max_deletions=1)) + + now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + old_iso = "2020-01-01T00:00:00Z" + payloads = _fake_payloads(now_iso, old_iso) + deleted_paths: list[str] = [] + + def fake_get_json(path: str): + if path in payloads: + return payloads[path] + raise AssertionError(f"unexpected path: {path}") + + def fake_delete_json(path: str): + deleted_paths.append(path) + return {"status": "Success"} + + monkeypatch.setattr(cleanup_module, "get_json", fake_get_json) + monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json) + + summary = cleanup_module.cleanup_jenkins_workspace_storage() + + assert summary.failures == 1 + assert summary.pvcs_planned == 1 + assert summary.pvs_planned == 1 + assert summary.volumes_planned == 2 + assert summary.pvcs_deleted == 0 + assert summary.pvs_deleted == 0 + assert summary.volumes_deleted == 0 + assert deleted_paths == []