cleanup(jenkins): add safe pvc prune metrics

This commit is contained in:
Brad Stein 2026-04-12 11:00:50 -03:00
parent f47ee149ff
commit b87151c1c1
4 changed files with 522 additions and 169 deletions

View File

@ -389,6 +389,7 @@ def _startup() -> None:
"metis_k3s_token_sync_cron": settings.metis_k3s_token_sync_cron, "metis_k3s_token_sync_cron": settings.metis_k3s_token_sync_cron,
"platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron, "platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron,
"jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron, "jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron,
"jenkins_workspace_cleanup_dry_run": settings.jenkins_workspace_cleanup_dry_run,
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron, "vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
"vault_oidc_cron": settings.vault_oidc_cron, "vault_oidc_cron": settings.vault_oidc_cron,
"comms_guest_name_cron": settings.comms_guest_name_cron, "comms_guest_name_cron": settings.comms_guest_name_cron,

View File

@ -4,6 +4,8 @@ from dataclasses import dataclass
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Any from typing import Any
from prometheus_client import Counter, Gauge
from ..k8s.client import delete_json, get_json from ..k8s.client import delete_json, get_json
from ..settings import settings from ..settings import settings
from ..utils.logging import get_logger from ..utils.logging import get_logger
@ -12,6 +14,48 @@ from ..utils.logging import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)
JENKINS_WORKSPACE_CLEANUP_RUNS_TOTAL = Counter(
"ariadne_jenkins_workspace_cleanup_runs_total",
"Jenkins workspace cleanup runs by status and mode",
["status", "mode"],
)
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL = Counter(
"ariadne_jenkins_workspace_cleanup_objects_total",
"Jenkins workspace cleanup objects by kind, action, and mode",
["kind", "action", "mode"],
)
JENKINS_WORKSPACE_CLEANUP_LAST_RUN_TS = Gauge(
"ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds",
"Last Jenkins workspace cleanup run timestamp",
)
JENKINS_WORKSPACE_CLEANUP_LAST_SUCCESS_TS = Gauge(
"ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds",
"Last successful Jenkins workspace cleanup timestamp",
)
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURE_TS = Gauge(
"ariadne_jenkins_workspace_cleanup_last_failure_timestamp_seconds",
"Last failed Jenkins workspace cleanup timestamp",
)
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED = Gauge(
"ariadne_jenkins_workspace_cleanup_last_deleted_total",
"Last Jenkins workspace cleanup deleted object count",
["kind"],
)
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED = Gauge(
"ariadne_jenkins_workspace_cleanup_last_planned_total",
"Last Jenkins workspace cleanup planned object count",
["kind"],
)
JENKINS_WORKSPACE_CLEANUP_LAST_SKIPPED = Gauge(
"ariadne_jenkins_workspace_cleanup_last_skipped_total",
"Last Jenkins workspace cleanup skipped object count",
)
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURES = Gauge(
"ariadne_jenkins_workspace_cleanup_last_failures_total",
"Last Jenkins workspace cleanup failure count",
)
@dataclass(frozen=True) @dataclass(frozen=True)
class JenkinsWorkspaceCleanupSummary: class JenkinsWorkspaceCleanupSummary:
"""Summarize one Jenkins workspace-storage cleanup pass. """Summarize one Jenkins workspace-storage cleanup pass.
@ -20,11 +64,33 @@ class JenkinsWorkspaceCleanupSummary:
Outputs: deterministic counters for operator logs and metrics. Outputs: deterministic counters for operator logs and metrics.
""" """
pvs_planned: int
pvcs_planned: int
volumes_planned: int
pvs_deleted: int pvs_deleted: int
pvcs_deleted: int pvcs_deleted: int
volumes_deleted: int volumes_deleted: int
skipped: int skipped: int
failures: int failures: int
dry_run: bool
@property
def planned(self) -> int:
return self.pvs_planned + self.pvcs_planned + self.volumes_planned
@property
def deleted(self) -> int:
return self.pvs_deleted + self.pvcs_deleted + self.volumes_deleted
@dataclass(frozen=True)
class _CleanupCandidate:
name: str
kind: str
path: str
created_at: datetime | None
related_pvc: str | None = None
pv_name: str | None = None
def _parse_timestamp(raw: str) -> datetime | None: def _parse_timestamp(raw: str) -> datetime | None:
@ -37,19 +103,27 @@ def _parse_timestamp(raw: str) -> datetime | None:
return None return None
def _created_at(metadata: dict[str, Any]) -> datetime | None:
raw = metadata.get("creationTimestamp")
if not isinstance(raw, str) or not raw:
return None
return _parse_timestamp(raw)
def _is_old_enough(metadata: dict[str, Any]) -> bool: def _is_old_enough(metadata: dict[str, Any]) -> bool:
"""Return true when an object age exceeds the configured cleanup threshold.""" """Return true when an object age exceeds the configured cleanup threshold."""
raw = metadata.get("creationTimestamp") created_at = _created_at(metadata)
if not isinstance(raw, str) or not raw:
return True
created_at = _parse_timestamp(raw)
if created_at is None: if created_at is None:
return True return False
min_age = timedelta(hours=settings.jenkins_workspace_cleanup_min_age_hours) min_age = timedelta(hours=settings.jenkins_workspace_cleanup_min_age_hours)
return datetime.now(timezone.utc) - created_at >= min_age return datetime.now(timezone.utc) - created_at >= min_age
def _is_workspace_name(name: Any) -> bool:
return isinstance(name, str) and name.startswith(settings.jenkins_workspace_pvc_prefix)
def _active_workspace_claims() -> set[str]: def _active_workspace_claims() -> set[str]:
"""Collect currently referenced Jenkins workspace PVC names from pods.""" """Collect currently referenced Jenkins workspace PVC names from pods."""
@ -61,6 +135,8 @@ def _active_workspace_claims() -> set[str]:
for pod in items: for pod in items:
if not isinstance(pod, dict): if not isinstance(pod, dict):
continue continue
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
annotations = metadata.get("annotations") if isinstance(metadata.get("annotations"), dict) else {}
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {} spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
volumes = spec.get("volumes") if isinstance(spec.get("volumes"), list) else [] volumes = spec.get("volumes") if isinstance(spec.get("volumes"), list) else []
for volume in volumes: for volume in volumes:
@ -70,19 +146,22 @@ def _active_workspace_claims() -> set[str]:
if not isinstance(claim, dict): if not isinstance(claim, dict):
continue continue
claim_name = claim.get("claimName") claim_name = claim.get("claimName")
if isinstance(claim_name, str) and claim_name.startswith(prefix): if _is_workspace_name(claim_name):
active.add(claim_name) active.add(claim_name)
claim_name = annotations.get("jenkins.io/workspace-pvc")
if _is_workspace_name(claim_name):
active.add(claim_name)
return active return active
def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[dict[str, Any]], set[str]]: def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[_CleanupCandidate], set[str]]:
"""Find releasable Jenkins workspace PVs and keep a set of all PV names.""" """Find releasable Jenkins workspace PVs and keep a set of all PV names."""
namespace = settings.jenkins_workspace_namespace namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix prefix = settings.jenkins_workspace_pvc_prefix
payload = get_json("/api/v1/persistentvolumes") payload = get_json("/api/v1/persistentvolumes")
items = payload.get("items") if isinstance(payload.get("items"), list) else [] items = payload.get("items") if isinstance(payload.get("items"), list) else []
candidates: list[dict[str, Any]] = [] candidates: list[_CleanupCandidate] = []
all_pv_names: set[str] = set() all_pv_names: set[str] = set()
for pv in items: for pv in items:
@ -101,7 +180,7 @@ def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[dict[str, An
phase = status.get("phase") phase = status.get("phase")
if claim_namespace != namespace: if claim_namespace != namespace:
continue continue
if not isinstance(claim_name, str) or not claim_name.startswith(prefix): if not _is_workspace_name(claim_name):
continue continue
if claim_name in active_claims: if claim_name in active_claims:
continue continue
@ -109,18 +188,27 @@ def _workspace_pv_candidates(active_claims: set[str]) -> tuple[list[dict[str, An
continue continue
if not _is_old_enough(metadata): if not _is_old_enough(metadata):
continue continue
candidates.append(pv) if not isinstance(name, str) or not name:
continue
candidates.append(
_CleanupCandidate(
name=name,
kind="pv",
path=f"/api/v1/persistentvolumes/{name}",
created_at=_created_at(metadata),
related_pvc=claim_name if isinstance(claim_name, str) else None,
)
)
return candidates, all_pv_names return candidates, all_pv_names
def _workspace_pvc_candidates(active_claims: set[str]) -> list[dict[str, Any]]: def _workspace_pvc_candidates(active_claims: set[str]) -> list[_CleanupCandidate]:
"""Find stale Jenkins workspace PVCs that are not actively referenced.""" """Find stale Jenkins workspace PVCs that are not actively referenced."""
namespace = settings.jenkins_workspace_namespace namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix
payload = get_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims") payload = get_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims")
items = payload.get("items") if isinstance(payload.get("items"), list) else [] items = payload.get("items") if isinstance(payload.get("items"), list) else []
candidates: list[dict[str, Any]] = [] candidates: list[_CleanupCandidate] = []
for pvc in items: for pvc in items:
if not isinstance(pvc, dict): if not isinstance(pvc, dict):
@ -129,7 +217,7 @@ def _workspace_pvc_candidates(active_claims: set[str]) -> list[dict[str, Any]]:
status = pvc.get("status") if isinstance(pvc.get("status"), dict) else {} status = pvc.get("status") if isinstance(pvc.get("status"), dict) else {}
claim_name = metadata.get("name") claim_name = metadata.get("name")
phase = status.get("phase") phase = status.get("phase")
if not isinstance(claim_name, str) or not claim_name.startswith(prefix): if not _is_workspace_name(claim_name):
continue continue
if claim_name in active_claims: if claim_name in active_claims:
continue continue
@ -137,96 +225,246 @@ def _workspace_pvc_candidates(active_claims: set[str]) -> list[dict[str, Any]]:
continue continue
if not _is_old_enough(metadata): if not _is_old_enough(metadata):
continue continue
candidates.append(pvc) if not isinstance(claim_name, str) or not claim_name:
continue
candidates.append(
_CleanupCandidate(
name=claim_name,
kind="pvc",
path=f"/api/v1/namespaces/{namespace}/persistentvolumeclaims/{claim_name}",
created_at=_created_at(metadata),
)
)
return candidates return candidates
def _workspace_longhorn_candidates(all_pv_names: set[str], removed_pv_names: set[str]) -> list[_CleanupCandidate]:
namespace = "longhorn-system"
payload = get_json("/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes")
items = payload.get("items") if isinstance(payload.get("items"), list) else []
candidates: list[_CleanupCandidate] = []
for volume in items:
if not isinstance(volume, dict):
continue
metadata = volume.get("metadata") if isinstance(volume.get("metadata"), dict) else {}
status = volume.get("status") if isinstance(volume.get("status"), dict) else {}
spec = volume.get("spec") if isinstance(volume.get("spec"), dict) else {}
name = metadata.get("name")
if not isinstance(name, str) or not name:
continue
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
pvc_name = labels.get("kubernetes.io/created-for/pvc/name")
robust_state = status.get("robustness")
state = status.get("state")
attached = status.get("isAttached")
frontend = spec.get("frontend")
should_delete = False
if name in removed_pv_names:
should_delete = True
elif _is_workspace_name(pvc_name) and name not in all_pv_names:
should_delete = True
if not should_delete:
continue
if not _is_old_enough(metadata):
continue
if state not in {None, "detached", "faulted", "unknown"}:
continue
if attached is True:
continue
if robust_state not in {None, "unknown", "faulted", "degraded"}:
continue
if frontend not in {None, "", "blockdev"}:
continue
candidates.append(
_CleanupCandidate(
name=name,
kind="longhorn_volume",
path=f"/apis/longhorn.io/v1beta2/namespaces/{namespace}/volumes/{name}",
created_at=_created_at(metadata),
pv_name=name,
)
)
return candidates
def _record_metrics(summary: JenkinsWorkspaceCleanupSummary) -> None:
mode = "dry_run" if summary.dry_run else "delete"
status = "ok" if summary.failures == 0 else "error"
JENKINS_WORKSPACE_CLEANUP_RUNS_TOTAL.labels(status=status, mode=mode).inc()
if summary.failures:
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURE_TS.set(datetime.now(timezone.utc).timestamp())
else:
JENKINS_WORKSPACE_CLEANUP_LAST_SUCCESS_TS.set(datetime.now(timezone.utc).timestamp())
JENKINS_WORKSPACE_CLEANUP_LAST_RUN_TS.set(datetime.now(timezone.utc).timestamp())
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED.labels(kind="pvc").set(summary.pvcs_deleted)
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED.labels(kind="pv").set(summary.pvs_deleted)
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED.labels(kind="longhorn_volume").set(summary.volumes_deleted)
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED.labels(kind="pvc").set(summary.pvcs_planned)
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED.labels(kind="pv").set(summary.pvs_planned)
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED.labels(kind="longhorn_volume").set(summary.volumes_planned)
JENKINS_WORKSPACE_CLEANUP_LAST_SKIPPED.set(summary.skipped)
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURES.set(summary.failures)
for kind, planned, deleted in (
("pvc", summary.pvcs_planned, summary.pvcs_deleted),
("pv", summary.pvs_planned, summary.pvs_deleted),
("longhorn_volume", summary.volumes_planned, summary.volumes_deleted),
):
if planned:
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(kind=kind, action="planned", mode=mode).inc(planned)
if deleted:
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(kind=kind, action="deleted", mode=mode).inc(deleted)
if summary.skipped:
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(
kind="cleanup",
action="skipped",
mode=mode,
).inc(summary.skipped)
if summary.failures:
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(
kind="cleanup",
action="failed",
mode=mode,
).inc(summary.failures)
def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary: def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
"""Delete stale Jenkins workspace PVC/PV artifacts and orphan Longhorn volumes.""" """Delete stale Jenkins workspace PVC/PV artifacts and orphan Longhorn volumes."""
namespace = settings.jenkins_workspace_namespace namespace = settings.jenkins_workspace_namespace
prefix = settings.jenkins_workspace_pvc_prefix dry_run = settings.jenkins_workspace_cleanup_dry_run
pvs_deleted = 0 pvs_deleted = 0
pvcs_deleted = 0 pvcs_deleted = 0
volumes_deleted = 0 volumes_deleted = 0
skipped = 0 skipped = 0
failures = 0 failures = 0
stale_pvs: list[_CleanupCandidate] = []
stale_pvcs: list[_CleanupCandidate] = []
stale_volumes: list[_CleanupCandidate] = []
active_claims = _active_workspace_claims() summary: JenkinsWorkspaceCleanupSummary
stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims) try:
stale_pvcs = _workspace_pvc_candidates(active_claims) active_claims = _active_workspace_claims()
removed_pv_names: set[str] = set() stale_pvs, all_pv_names = _workspace_pv_candidates(active_claims)
stale_pvcs = _workspace_pvc_candidates(active_claims)
removed_pv_names: set[str] = set()
for pvc in stale_pvcs: if dry_run:
metadata = pvc.get("metadata") if isinstance(pvc.get("metadata"), dict) else {}
claim_name = metadata.get("name")
if not isinstance(claim_name, str) or not claim_name:
skipped += 1
continue
try:
delete_json(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims/{claim_name}")
pvcs_deleted += 1
except Exception as exc:
failures += 1
logger.info( logger.info(
"jenkins workspace pvc delete failed", "jenkins workspace cleanup dry-run enabled",
extra={"event": "jenkins_workspace_cleanup", "claim": claim_name, "detail": str(exc)}, extra={
"event": "jenkins_workspace_cleanup",
"status": "dry_run",
"namespace": namespace,
"dry_run": True,
"planned_pvs": len(stale_pvs),
"planned_pvcs": len(stale_pvcs),
},
) )
for pv in stale_pvs: for pvc in stale_pvcs:
metadata = pv.get("metadata") if isinstance(pv.get("metadata"), dict) else {} claim_name = pvc.name
pv_name = metadata.get("name") if not claim_name:
if not isinstance(pv_name, str) or not pv_name: skipped += 1
skipped += 1 continue
continue if dry_run:
try: pvcs_deleted += 1
delete_json(f"/api/v1/persistentvolumes/{pv_name}") continue
removed_pv_names.add(pv_name) try:
pvs_deleted += 1 delete_json(pvc.path)
except Exception as exc: pvcs_deleted += 1
failures += 1 except Exception as exc:
logger.info( failures += 1
"jenkins workspace pv delete failed", logger.info(
extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)}, "jenkins workspace pvc delete failed",
) extra={"event": "jenkins_workspace_cleanup", "claim": claim_name, "detail": str(exc)},
)
payload = get_json("/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes") for pv in stale_pvs:
items = payload.get("items") if isinstance(payload.get("items"), list) else [] pv_name = pv.name
for volume in items: if not pv_name:
if not isinstance(volume, dict): skipped += 1
continue continue
metadata = volume.get("metadata") if isinstance(volume.get("metadata"), dict) else {} if dry_run:
name = metadata.get("name") pvs_deleted += 1
if not isinstance(name, str) or not name: removed_pv_names.add(pv_name)
skipped += 1 continue
continue try:
should_delete = name in removed_pv_names delete_json(pv.path)
if not should_delete: removed_pv_names.add(pv_name)
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {} pvs_deleted += 1
pvc_name = labels.get("kubernetes.io/created-for/pvc/name") except Exception as exc:
should_delete = ( failures += 1
isinstance(pvc_name, str) logger.info(
and pvc_name.startswith(prefix) "jenkins workspace pv delete failed",
and name not in all_pv_names extra={"event": "jenkins_workspace_cleanup", "pv": pv_name, "detail": str(exc)},
) )
if not should_delete:
continue
if not _is_old_enough(metadata):
continue
try:
delete_json(f"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/{name}")
volumes_deleted += 1
except Exception as exc:
failures += 1
logger.info(
"jenkins workspace longhorn volume delete failed",
extra={"event": "jenkins_workspace_cleanup", "volume": name, "detail": str(exc)},
)
return JenkinsWorkspaceCleanupSummary( stale_volumes = _workspace_longhorn_candidates(all_pv_names, removed_pv_names)
pvs_deleted=pvs_deleted, for volume in stale_volumes:
pvcs_deleted=pvcs_deleted, if not volume.name:
volumes_deleted=volumes_deleted, skipped += 1
skipped=skipped, continue
failures=failures, if dry_run:
volumes_deleted += 1
continue
try:
delete_json(volume.path)
volumes_deleted += 1
except Exception as exc:
failures += 1
logger.info(
"jenkins workspace longhorn volume delete failed",
extra={"event": "jenkins_workspace_cleanup", "volume": volume.name, "detail": str(exc)},
)
summary = JenkinsWorkspaceCleanupSummary(
pvs_planned=len(stale_pvs),
pvcs_planned=len(stale_pvcs),
volumes_planned=len(stale_volumes),
pvs_deleted=pvs_deleted,
pvcs_deleted=pvcs_deleted,
volumes_deleted=volumes_deleted,
skipped=skipped,
failures=failures,
dry_run=dry_run,
)
except Exception as exc:
failures += 1
logger.exception(
"jenkins workspace cleanup failed",
extra={"event": "jenkins_workspace_cleanup", "status": "error", "namespace": namespace, "detail": str(exc)},
)
summary = JenkinsWorkspaceCleanupSummary(
pvs_planned=len(stale_pvs),
pvcs_planned=len(stale_pvcs),
volumes_planned=len(stale_volumes),
pvs_deleted=pvs_deleted,
pvcs_deleted=pvcs_deleted,
volumes_deleted=volumes_deleted,
skipped=skipped,
failures=failures,
dry_run=dry_run,
)
_record_metrics(summary)
raise
_record_metrics(summary)
logger.info(
"jenkins workspace cleanup finished",
extra={
"event": "jenkins_workspace_cleanup",
"status": "ok" if failures == 0 else "error",
"dry_run": dry_run,
"namespace": namespace,
"planned_pvs": summary.pvs_planned,
"planned_pvcs": summary.pvcs_planned,
"planned_volumes": summary.volumes_planned,
"deleted_pvs": pvs_deleted,
"deleted_pvcs": pvcs_deleted,
"deleted_volumes": volumes_deleted,
"skipped": skipped,
"failures": failures,
},
) )
return summary

View File

@ -171,6 +171,7 @@ class Settings:
jenkins_workspace_namespace: str jenkins_workspace_namespace: str
jenkins_workspace_pvc_prefix: str jenkins_workspace_pvc_prefix: str
jenkins_workspace_cleanup_min_age_hours: float jenkins_workspace_cleanup_min_age_hours: float
jenkins_workspace_cleanup_dry_run: bool
vaultwarden_namespace: str vaultwarden_namespace: str
vaultwarden_pod_label: str vaultwarden_pod_label: str
@ -469,6 +470,7 @@ class Settings:
"jenkins_workspace_namespace": _env("JENKINS_WORKSPACE_NAMESPACE", "jenkins"), "jenkins_workspace_namespace": _env("JENKINS_WORKSPACE_NAMESPACE", "jenkins"),
"jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"), "jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"),
"jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0), "jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0),
"jenkins_workspace_cleanup_dry_run": _env_bool("JENKINS_WORKSPACE_CLEANUP_DRY_RUN", "false"),
} }
@classmethod @classmethod

View File

@ -3,101 +3,200 @@ from __future__ import annotations
from datetime import datetime, timezone from datetime import datetime, timezone
import types import types
from prometheus_client import REGISTRY
from ariadne.services import jenkins_workspace_cleanup as cleanup_module from ariadne.services import jenkins_workspace_cleanup as cleanup_module
def test_cleanup_jenkins_workspace_storage(monkeypatch) -> None: def _metric_value(name: str, labels: dict[str, str]) -> float:
dummy_settings = types.SimpleNamespace( value = REGISTRY.get_sample_value(name, labels)
return float(value) if value is not None else 0.0
def _dummy_settings(*, dry_run: bool) -> types.SimpleNamespace:
return types.SimpleNamespace(
jenkins_workspace_namespace="jenkins", jenkins_workspace_namespace="jenkins",
jenkins_workspace_pvc_prefix="pvc-workspace-", jenkins_workspace_pvc_prefix="pvc-workspace-",
jenkins_workspace_cleanup_min_age_hours=1.0, jenkins_workspace_cleanup_min_age_hours=1.0,
jenkins_workspace_cleanup_dry_run=dry_run,
) )
monkeypatch.setattr(cleanup_module, "settings", dummy_settings)
def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]:
return {
"/api/v1/namespaces/jenkins/pods": {
"items": [
{
"metadata": {
"annotations": {
"jenkins.io/workspace-pvc": "pvc-workspace-annotated-active",
}
},
"spec": {
"volumes": [
{"persistentVolumeClaim": {"claimName": "pvc-workspace-active"}},
]
},
}
]
},
"/api/v1/namespaces/jenkins/persistentvolumeclaims": {
"items": [
{
"metadata": {"name": "pvc-workspace-stale", "creationTimestamp": old_iso},
"status": {"phase": "Lost"},
},
{
"metadata": {"name": "pvc-workspace-active", "creationTimestamp": old_iso},
"status": {"phase": "Bound"},
},
{
"metadata": {"name": "pvc-workspace-annotated-active", "creationTimestamp": old_iso},
"status": {"phase": "Lost"},
},
{
"metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso},
"status": {"phase": "Lost"},
},
]
},
"/api/v1/persistentvolumes": {
"items": [
{
"metadata": {"name": "pvc-old", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-stale"}},
},
{
"metadata": {"name": "pvc-active", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-active"}},
},
{
"metadata": {"name": "pvc-annotated", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-annotated-active"}},
},
{
"metadata": {"name": "pvc-fresh", "creationTimestamp": now_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}},
},
]
},
"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": {
"items": [
{"metadata": {"name": "pvc-old", "creationTimestamp": old_iso}},
{
"metadata": {
"name": "pvc-orphan",
"creationTimestamp": old_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
},
}
},
{
"metadata": {
"name": "pvc-attached",
"creationTimestamp": old_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-annotated-active",
},
},
"status": {"state": "attached", "isAttached": True, "robustness": "healthy"},
"spec": {"frontend": "blockdev"},
},
{
"metadata": {
"name": "pvc-orphan-fresh",
"creationTimestamp": now_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-fresh",
},
}
},
]
},
}
def test_cleanup_jenkins_workspace_storage_dry_run(monkeypatch) -> None:
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=True))
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
old_iso = "2020-01-01T00:00:00Z" old_iso = "2020-01-01T00:00:00Z"
payloads = _fake_payloads(now_iso, old_iso)
deleted_paths: list[str] = [] deleted_paths: list[str] = []
def fake_get_json(path: str): def fake_get_json(path: str):
if path == "/api/v1/namespaces/jenkins/pods": if path in payloads:
return { return payloads[path]
"items": [
{
"spec": {
"volumes": [
{"persistentVolumeClaim": {"claimName": "pvc-workspace-active"}},
]
}
}
]
}
if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims":
return {
"items": [
{
"metadata": {"name": "pvc-workspace-stale", "creationTimestamp": old_iso},
"status": {"phase": "Lost"},
},
{
"metadata": {"name": "pvc-workspace-active", "creationTimestamp": old_iso},
"status": {"phase": "Bound"},
},
{
"metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso},
"status": {"phase": "Lost"},
},
]
}
if path == "/api/v1/persistentvolumes":
return {
"items": [
{
"metadata": {"name": "pvc-old", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-stale"}},
},
{
"metadata": {"name": "pvc-active", "creationTimestamp": old_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-active"}},
},
{
"metadata": {"name": "pvc-fresh", "creationTimestamp": now_iso},
"status": {"phase": "Released"},
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}},
},
]
}
if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes":
return {
"items": [
{"metadata": {"name": "pvc-old", "creationTimestamp": old_iso}},
{
"metadata": {
"name": "pvc-orphan",
"creationTimestamp": old_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
},
}
},
{
"metadata": {
"name": "pvc-orphan-fresh",
"creationTimestamp": now_iso,
"labels": {
"kubernetes.io/created-for/pvc/name": "pvc-workspace-fresh",
},
}
},
]
}
raise AssertionError(f"unexpected path: {path}") raise AssertionError(f"unexpected path: {path}")
def fake_delete_json(path: str): def fake_delete_json(path: str):
deleted_paths.append(path) deleted_paths.append(path)
return {"status": "Success"} return {"status": "Success"}
before_runs = _metric_value(
"ariadne_jenkins_workspace_cleanup_runs_total",
{"status": "ok", "mode": "dry_run"},
)
before_planned = _metric_value(
"ariadne_jenkins_workspace_cleanup_objects_total",
{"kind": "pvc", "action": "planned", "mode": "dry_run"},
)
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
summary = cleanup_module.cleanup_jenkins_workspace_storage()
assert summary.dry_run is True
assert summary.pvcs_planned == 1
assert summary.pvs_planned == 1
assert summary.volumes_planned == 2
assert summary.pvcs_deleted == 1
assert summary.pvs_deleted == 1
assert summary.volumes_deleted == 2
assert summary.failures == 0
assert deleted_paths == []
assert _metric_value(
"ariadne_jenkins_workspace_cleanup_runs_total",
{"status": "ok", "mode": "dry_run"},
) == before_runs + 1
assert _metric_value(
"ariadne_jenkins_workspace_cleanup_objects_total",
{"kind": "pvc", "action": "planned", "mode": "dry_run"},
) == before_planned + 1
def test_cleanup_jenkins_workspace_storage(monkeypatch) -> None:
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False))
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
old_iso = "2020-01-01T00:00:00Z"
deleted_paths: list[str] = []
payloads = _fake_payloads(now_iso, old_iso)
def fake_get_json(path: str):
if path in payloads:
return payloads[path]
raise AssertionError(f"unexpected path: {path}")
def fake_delete_json(path: str):
deleted_paths.append(path)
return {"status": "Success"}
before_runs = _metric_value(
"ariadne_jenkins_workspace_cleanup_runs_total",
{"status": "ok", "mode": "delete"},
)
before_deleted = _metric_value(
"ariadne_jenkins_workspace_cleanup_objects_total",
{"kind": "longhorn_volume", "action": "deleted", "mode": "delete"},
)
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json) monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json) monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
@ -111,15 +210,19 @@ def test_cleanup_jenkins_workspace_storage(monkeypatch) -> None:
assert "/api/v1/persistentvolumes/pvc-old" in deleted_paths assert "/api/v1/persistentvolumes/pvc-old" in deleted_paths
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-old" in deleted_paths assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-old" in deleted_paths
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan" in deleted_paths assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan" in deleted_paths
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-attached" not in deleted_paths
assert _metric_value(
"ariadne_jenkins_workspace_cleanup_runs_total",
{"status": "ok", "mode": "delete"},
) == before_runs + 1
assert _metric_value(
"ariadne_jenkins_workspace_cleanup_objects_total",
{"kind": "longhorn_volume", "action": "deleted", "mode": "delete"},
) == before_deleted + 2
def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None: def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None:
dummy_settings = types.SimpleNamespace( monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False))
jenkins_workspace_namespace="jenkins",
jenkins_workspace_pvc_prefix="pvc-workspace-",
jenkins_workspace_cleanup_min_age_hours=1.0,
)
monkeypatch.setattr(cleanup_module, "settings", dummy_settings)
def fake_get_json(path: str): def fake_get_json(path: str):
if path == "/api/v1/namespaces/jenkins/pods": if path == "/api/v1/namespaces/jenkins/pods":
@ -142,9 +245,18 @@ def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None:
def fake_delete_json(_path: str): def fake_delete_json(_path: str):
raise RuntimeError("boom") raise RuntimeError("boom")
before_failures = _metric_value(
"ariadne_jenkins_workspace_cleanup_objects_total",
{"kind": "cleanup", "action": "failed", "mode": "delete"},
)
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json) monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json) monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
summary = cleanup_module.cleanup_jenkins_workspace_storage() summary = cleanup_module.cleanup_jenkins_workspace_storage()
assert summary.failures == 1 assert summary.failures == 1
assert summary.pvcs_deleted == 0 assert summary.pvcs_deleted == 0
assert _metric_value(
"ariadne_jenkins_workspace_cleanup_objects_total",
{"kind": "cleanup", "action": "failed", "mode": "delete"},
) == before_failures + 1