cleanup(jenkins): add guardrails and deletion cap
This commit is contained in:
parent
b87151c1c1
commit
c86948b4e5
@ -390,6 +390,7 @@ def _startup() -> None:
|
|||||||
"platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron,
|
"platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron,
|
||||||
"jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron,
|
"jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron,
|
||||||
"jenkins_workspace_cleanup_dry_run": settings.jenkins_workspace_cleanup_dry_run,
|
"jenkins_workspace_cleanup_dry_run": settings.jenkins_workspace_cleanup_dry_run,
|
||||||
|
"jenkins_workspace_cleanup_max_deletions_per_run": settings.jenkins_workspace_cleanup_max_deletions_per_run,
|
||||||
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
|
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
|
||||||
"vault_oidc_cron": settings.vault_oidc_cron,
|
"vault_oidc_cron": settings.vault_oidc_cron,
|
||||||
"comms_guest_name_cron": settings.comms_guest_name_cron,
|
"comms_guest_name_cron": settings.comms_guest_name_cron,
|
||||||
|
|||||||
@ -120,6 +120,11 @@ def _is_old_enough(metadata: dict[str, Any]) -> bool:
|
|||||||
return datetime.now(timezone.utc) - created_at >= min_age
|
return datetime.now(timezone.utc) - created_at >= min_age
|
||||||
|
|
||||||
|
|
||||||
|
def _is_deleting(metadata: dict[str, Any]) -> bool:
|
||||||
|
deletion_ts = metadata.get("deletionTimestamp")
|
||||||
|
return isinstance(deletion_ts, str) and bool(deletion_ts.strip())
|
||||||
|
|
||||||
|
|
||||||
def _is_workspace_name(name: Any) -> bool:
|
def _is_workspace_name(name: Any) -> bool:
|
||||||
return isinstance(name, str) and name.startswith(settings.jenkins_workspace_pvc_prefix)
|
return isinstance(name, str) and name.startswith(settings.jenkins_workspace_pvc_prefix)
|
||||||
|
|
||||||
@ -128,7 +133,6 @@ def _active_workspace_claims() -> set[str]:
|
|||||||
"""Collect currently referenced Jenkins workspace PVC names from pods."""
|
"""Collect currently referenced Jenkins workspace PVC names from pods."""
|
||||||
|
|
||||||
namespace = settings.jenkins_workspace_namespace
|
namespace = settings.jenkins_workspace_namespace
|
||||||
prefix = settings.jenkins_workspace_pvc_prefix
|
|
||||||
payload = get_json(f"/api/v1/namespaces/{namespace}/pods")
|
payload = get_json(f"/api/v1/namespaces/{namespace}/pods")
|
||||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||||
active: set[str] = set()
|
active: set[str] = set()
|
||||||
@ -158,7 +162,6 @@ def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[_CleanupCand
|
|||||||
"""Find releasable Jenkins workspace PVs and keep a set of all PV names."""
|
"""Find releasable Jenkins workspace PVs and keep a set of all PV names."""
|
||||||
|
|
||||||
namespace = settings.jenkins_workspace_namespace
|
namespace = settings.jenkins_workspace_namespace
|
||||||
prefix = settings.jenkins_workspace_pvc_prefix
|
|
||||||
payload = get_json("/api/v1/persistentvolumes")
|
payload = get_json("/api/v1/persistentvolumes")
|
||||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||||
candidates: list[_CleanupCandidate] = []
|
candidates: list[_CleanupCandidate] = []
|
||||||
@ -182,6 +185,8 @@ def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[_CleanupCand
|
|||||||
continue
|
continue
|
||||||
if not _is_workspace_name(claim_name):
|
if not _is_workspace_name(claim_name):
|
||||||
continue
|
continue
|
||||||
|
if _is_deleting(metadata):
|
||||||
|
continue
|
||||||
if claim_name in active_claims:
|
if claim_name in active_claims:
|
||||||
continue
|
continue
|
||||||
if phase not in {"Released", "Failed"}:
|
if phase not in {"Released", "Failed"}:
|
||||||
@ -219,6 +224,8 @@ def _workspace_pvc_candidates(active_claims: set[str]) -> list[_CleanupCandidate
|
|||||||
phase = status.get("phase")
|
phase = status.get("phase")
|
||||||
if not _is_workspace_name(claim_name):
|
if not _is_workspace_name(claim_name):
|
||||||
continue
|
continue
|
||||||
|
if _is_deleting(metadata):
|
||||||
|
continue
|
||||||
if claim_name in active_claims:
|
if claim_name in active_claims:
|
||||||
continue
|
continue
|
||||||
if phase == "Bound":
|
if phase == "Bound":
|
||||||
@ -267,6 +274,8 @@ def _workspace_longhorn_candidates(all_pv_names: set[str], removed_pv_names: set
|
|||||||
should_delete = True
|
should_delete = True
|
||||||
if not should_delete:
|
if not should_delete:
|
||||||
continue
|
continue
|
||||||
|
if _is_deleting(metadata):
|
||||||
|
continue
|
||||||
if not _is_old_enough(metadata):
|
if not _is_old_enough(metadata):
|
||||||
continue
|
continue
|
||||||
if state not in {None, "detached", "faulted", "unknown"}:
|
if state not in {None, "detached", "faulted", "unknown"}:
|
||||||
@ -334,6 +343,8 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
|||||||
|
|
||||||
namespace = settings.jenkins_workspace_namespace
|
namespace = settings.jenkins_workspace_namespace
|
||||||
dry_run = settings.jenkins_workspace_cleanup_dry_run
|
dry_run = settings.jenkins_workspace_cleanup_dry_run
|
||||||
|
max_deletions = settings.jenkins_workspace_cleanup_max_deletions_per_run
|
||||||
|
prefix = settings.jenkins_workspace_pvc_prefix.strip()
|
||||||
pvs_deleted = 0
|
pvs_deleted = 0
|
||||||
pvcs_deleted = 0
|
pvcs_deleted = 0
|
||||||
volumes_deleted = 0
|
volumes_deleted = 0
|
||||||
@ -345,10 +356,21 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
|||||||
|
|
||||||
summary: JenkinsWorkspaceCleanupSummary
|
summary: JenkinsWorkspaceCleanupSummary
|
||||||
try:
|
try:
|
||||||
|
if not namespace.strip():
|
||||||
|
raise ValueError("jenkins workspace cleanup namespace is empty")
|
||||||
|
if not prefix:
|
||||||
|
raise ValueError("jenkins workspace cleanup pvc prefix is empty")
|
||||||
|
if settings.jenkins_workspace_cleanup_min_age_hours < 1.0:
|
||||||
|
raise ValueError("jenkins workspace cleanup min age must be >= 1 hour")
|
||||||
|
if max_deletions < 1:
|
||||||
|
raise ValueError("jenkins workspace cleanup max deletions must be >= 1")
|
||||||
|
|
||||||
active_claims = _active_workspace_claims()
|
active_claims = _active_workspace_claims()
|
||||||
stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims)
|
stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims)
|
||||||
stale_pvcs = _workspace_pvc_candidates(active_claims)
|
stale_pvcs = _workspace_pvc_candidates(active_claims)
|
||||||
removed_pv_names: set[str] = set()
|
removed_pv_names: set[str] = set()
|
||||||
|
stale_volumes = _workspace_longhorn_candidates(all_pv_names, removed_pv_names)
|
||||||
|
planned_total = len(stale_pvs) + len(stale_pvcs) + len(stale_volumes)
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
logger.info(
|
logger.info(
|
||||||
@ -360,8 +382,39 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
|||||||
"dry_run": True,
|
"dry_run": True,
|
||||||
"planned_pvs": len(stale_pvs),
|
"planned_pvs": len(stale_pvs),
|
||||||
"planned_pvcs": len(stale_pvcs),
|
"planned_pvcs": len(stale_pvcs),
|
||||||
|
"planned_volumes": len(stale_volumes),
|
||||||
|
"max_deletions": max_deletions,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
elif planned_total > max_deletions:
|
||||||
|
failures += 1
|
||||||
|
logger.warning(
|
||||||
|
"jenkins workspace cleanup blocked by max deletions guard",
|
||||||
|
extra={
|
||||||
|
"event": "jenkins_workspace_cleanup",
|
||||||
|
"status": "guard_blocked",
|
||||||
|
"namespace": namespace,
|
||||||
|
"dry_run": False,
|
||||||
|
"planned_total": planned_total,
|
||||||
|
"max_deletions": max_deletions,
|
||||||
|
"planned_pvs": len(stale_pvs),
|
||||||
|
"planned_pvcs": len(stale_pvcs),
|
||||||
|
"planned_volumes": len(stale_volumes),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
summary = JenkinsWorkspaceCleanupSummary(
|
||||||
|
pvs_planned=len(stale_pvs),
|
||||||
|
pvcs_planned=len(stale_pvcs),
|
||||||
|
volumes_planned=len(stale_volumes),
|
||||||
|
pvs_deleted=0,
|
||||||
|
pvcs_deleted=0,
|
||||||
|
volumes_deleted=0,
|
||||||
|
skipped=skipped,
|
||||||
|
failures=failures,
|
||||||
|
dry_run=dry_run,
|
||||||
|
)
|
||||||
|
_record_metrics(summary)
|
||||||
|
return summary
|
||||||
|
|
||||||
for pvc in stale_pvcs:
|
for pvc in stale_pvcs:
|
||||||
claim_name = pvc.name
|
claim_name = pvc.name
|
||||||
@ -369,7 +422,6 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
|||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
if dry_run:
|
if dry_run:
|
||||||
pvcs_deleted += 1
|
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
delete_json(pvc.path)
|
delete_json(pvc.path)
|
||||||
@ -387,8 +439,6 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
|||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
if dry_run:
|
if dry_run:
|
||||||
pvs_deleted += 1
|
|
||||||
removed_pv_names.add(pv_name)
|
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
delete_json(pv.path)
|
delete_json(pv.path)
|
||||||
@ -401,13 +451,13 @@ def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
|||||||
extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)},
|
extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Recompute longhorn candidates using the updated removed PV list.
|
||||||
stale_volumes = _workspace_longhorn_candidates(all_pv_names, removed_pv_names)
|
stale_volumes = _workspace_longhorn_candidates(all_pv_names, removed_pv_names)
|
||||||
for volume in stale_volumes:
|
for volume in stale_volumes:
|
||||||
if not volume.name:
|
if not volume.name:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
if dry_run:
|
if dry_run:
|
||||||
volumes_deleted += 1
|
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
delete_json(volume.path)
|
delete_json(volume.path)
|
||||||
|
|||||||
@ -172,6 +172,7 @@ class Settings:
|
|||||||
jenkins_workspace_pvc_prefix: str
|
jenkins_workspace_pvc_prefix: str
|
||||||
jenkins_workspace_cleanup_min_age_hours: float
|
jenkins_workspace_cleanup_min_age_hours: float
|
||||||
jenkins_workspace_cleanup_dry_run: bool
|
jenkins_workspace_cleanup_dry_run: bool
|
||||||
|
jenkins_workspace_cleanup_max_deletions_per_run: int
|
||||||
|
|
||||||
vaultwarden_namespace: str
|
vaultwarden_namespace: str
|
||||||
vaultwarden_pod_label: str
|
vaultwarden_pod_label: str
|
||||||
@ -471,6 +472,10 @@ class Settings:
|
|||||||
"jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"),
|
"jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"),
|
||||||
"jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0),
|
"jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0),
|
||||||
"jenkins_workspace_cleanup_dry_run": _env_bool("JENKINS_WORKSPACE_CLEANUP_DRY_RUN", "false"),
|
"jenkins_workspace_cleanup_dry_run": _env_bool("JENKINS_WORKSPACE_CLEANUP_DRY_RUN", "false"),
|
||||||
|
"jenkins_workspace_cleanup_max_deletions_per_run": _env_int(
|
||||||
|
"JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN",
|
||||||
|
20,
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@ -13,12 +13,13 @@ def _metric_value(name: str, labels: dict[str, str]) -> float:
|
|||||||
return float(value) if value is not None else 0.0
|
return float(value) if value is not None else 0.0
|
||||||
|
|
||||||
|
|
||||||
def _dummy_settings(*, dry_run: bool) -> types.SimpleNamespace:
|
def _dummy_settings(*, dry_run: bool, max_deletions: int = 20) -> types.SimpleNamespace:
|
||||||
return types.SimpleNamespace(
|
return types.SimpleNamespace(
|
||||||
jenkins_workspace_namespace="jenkins",
|
jenkins_workspace_namespace="jenkins",
|
||||||
jenkins_workspace_pvc_prefix="pvc-workspace-",
|
jenkins_workspace_pvc_prefix="pvc-workspace-",
|
||||||
jenkins_workspace_cleanup_min_age_hours=1.0,
|
jenkins_workspace_cleanup_min_age_hours=1.0,
|
||||||
jenkins_workspace_cleanup_dry_run=dry_run,
|
jenkins_workspace_cleanup_dry_run=dry_run,
|
||||||
|
jenkins_workspace_cleanup_max_deletions_per_run=max_deletions,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -58,6 +59,14 @@ def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]:
|
|||||||
"metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso},
|
"metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso},
|
||||||
"status": {"phase": "Lost"},
|
"status": {"phase": "Lost"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"name": "pvc-workspace-deleting",
|
||||||
|
"creationTimestamp": old_iso,
|
||||||
|
"deletionTimestamp": old_iso,
|
||||||
|
},
|
||||||
|
"status": {"phase": "Lost"},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"/api/v1/persistentvolumes": {
|
"/api/v1/persistentvolumes": {
|
||||||
@ -82,6 +91,15 @@ def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]:
|
|||||||
"status": {"phase": "Released"},
|
"status": {"phase": "Released"},
|
||||||
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}},
|
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"name": "pvc-deleting",
|
||||||
|
"creationTimestamp": old_iso,
|
||||||
|
"deletionTimestamp": old_iso,
|
||||||
|
},
|
||||||
|
"status": {"phase": "Released"},
|
||||||
|
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-deleting"}},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": {
|
"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": {
|
||||||
@ -116,6 +134,16 @@ def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]:
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"name": "pvc-vol-deleting",
|
||||||
|
"creationTimestamp": old_iso,
|
||||||
|
"deletionTimestamp": old_iso,
|
||||||
|
"labels": {
|
||||||
|
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -156,9 +184,9 @@ def test_cleanup_jenkins_workspace_storage_dry_run(monkeypatch) -> None:
|
|||||||
assert summary.pvcs_planned == 1
|
assert summary.pvcs_planned == 1
|
||||||
assert summary.pvs_planned == 1
|
assert summary.pvs_planned == 1
|
||||||
assert summary.volumes_planned == 2
|
assert summary.volumes_planned == 2
|
||||||
assert summary.pvcs_deleted == 1
|
assert summary.pvcs_deleted == 0
|
||||||
assert summary.pvs_deleted == 1
|
assert summary.pvs_deleted == 0
|
||||||
assert summary.volumes_deleted == 2
|
assert summary.volumes_deleted == 0
|
||||||
assert summary.failures == 0
|
assert summary.failures == 0
|
||||||
assert deleted_paths == []
|
assert deleted_paths == []
|
||||||
assert _metric_value(
|
assert _metric_value(
|
||||||
@ -260,3 +288,35 @@ def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None:
|
|||||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||||
{"kind": "cleanup", "action": "failed", "mode": "delete"},
|
{"kind": "cleanup", "action": "failed", "mode": "delete"},
|
||||||
) == before_failures + 1
|
) == before_failures + 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_cleanup_jenkins_workspace_storage_guard_blocks_mass_delete(monkeypatch) -> None:
|
||||||
|
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False, max_deletions=1))
|
||||||
|
|
||||||
|
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||||
|
old_iso = "2020-01-01T00:00:00Z"
|
||||||
|
payloads = _fake_payloads(now_iso, old_iso)
|
||||||
|
deleted_paths: list[str] = []
|
||||||
|
|
||||||
|
def fake_get_json(path: str):
|
||||||
|
if path in payloads:
|
||||||
|
return payloads[path]
|
||||||
|
raise AssertionError(f"unexpected path: {path}")
|
||||||
|
|
||||||
|
def fake_delete_json(path: str):
|
||||||
|
deleted_paths.append(path)
|
||||||
|
return {"status": "Success"}
|
||||||
|
|
||||||
|
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
|
||||||
|
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
|
||||||
|
|
||||||
|
summary = cleanup_module.cleanup_jenkins_workspace_storage()
|
||||||
|
|
||||||
|
assert summary.failures == 1
|
||||||
|
assert summary.pvcs_planned == 1
|
||||||
|
assert summary.pvs_planned == 1
|
||||||
|
assert summary.volumes_planned == 2
|
||||||
|
assert summary.pvcs_deleted == 0
|
||||||
|
assert summary.pvs_deleted == 0
|
||||||
|
assert summary.volumes_deleted == 0
|
||||||
|
assert deleted_paths == []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user