Compare commits

...

2 Commits

6 changed files with 106 additions and 26 deletions

View File

@ -46,6 +46,8 @@ PERCENT_THRESHOLDS = {
],
}
NAMESPACE_CPU_WINDOW = "1m"
# ---------------------------------------------------------------------------
# Cluster metadata
# ---------------------------------------------------------------------------
@ -172,7 +174,7 @@ def node_io_expr(scope=""):
def namespace_selector(scope_var):
return f'namespace!="",pod!="",container!="",{scope_var}'
return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}'
def namespace_gpu_selector(scope_var):
@ -180,7 +182,10 @@ def namespace_gpu_selector(scope_var):
def namespace_cpu_raw(scope_var):
return f"sum(rate(container_cpu_usage_seconds_total{{{namespace_selector(scope_var)}}}[5m])) by (namespace)"
return (
"sum(rate(container_cpu_usage_seconds_total"
f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)"
)
def namespace_ram_raw(scope_var):
@ -942,7 +947,7 @@ def build_overview():
namespace_cpu_share_expr(cpu_scope),
{"h": 9, "w": 8, "x": 0, "y": 16},
links=namespace_scope_links("namespace_scope_cpu"),
description="Use panel links to switch namespace scope.",
description="Values are normalized within the selected scope; use panel links to switch scope.",
)
)
panels.append(
@ -952,7 +957,7 @@ def build_overview():
namespace_gpu_share_expr(gpu_scope),
{"h": 9, "w": 8, "x": 8, "y": 16},
links=namespace_scope_links("namespace_scope_gpu"),
description="Use panel links to switch namespace scope.",
description="Values are normalized within the selected scope; use panel links to switch scope.",
)
)
panels.append(
@ -962,7 +967,7 @@ def build_overview():
namespace_ram_share_expr(ram_scope),
{"h": 9, "w": 8, "x": 16, "y": 16},
links=namespace_scope_links("namespace_scope_ram"),
description="Use panel links to switch namespace scope.",
description="Values are normalized within the selected scope; use panel links to switch scope.",
)
)
@ -1783,7 +1788,7 @@ def build_gpu_dashboard():
namespace_gpu_share_expr(gpu_scope),
{"h": 8, "w": 12, "x": 0, "y": 0},
links=namespace_scope_links("namespace_scope_gpu"),
description="Use panel links to switch namespace scope.",
description="Values are normalized within the selected scope; use panel links to switch scope.",
)
)
panels.append(

View File

@ -12,6 +12,7 @@ Targets (best-effort):
Safety:
- Requires an explicit username prefix (e.g. "test-")
- Dry-run unless --apply is set
- --apply requires an explicit --confirm guard
- Validates prefixes to a conservative charset
"""
@ -259,8 +260,13 @@ def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str:
data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8")
req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST")
req.add_header("Content-Type", "application/x-www-form-urlencoded")
with urllib.request.urlopen(req, timeout=10) as resp:
set_cookie = resp.headers.get("Set-Cookie") or ""
try:
with urllib.request.urlopen(req, timeout=10) as resp:
set_cookie = resp.headers.get("Set-Cookie") or ""
except urllib.error.HTTPError as exc:
if exc.code == 429:
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
raise
cookie = set_cookie.split(";", 1)[0].strip()
if not cookie:
raise RuntimeError("vaultwarden admin cookie missing")
@ -270,8 +276,13 @@ def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str:
def _vaultwarden_list_users(base_url: str, cookie: str) -> list[VaultwardenUser]:
req = urllib.request.Request(f"{base_url}/admin/users", method="GET")
req.add_header("Cookie", cookie)
with urllib.request.urlopen(req, timeout=30) as resp:
payload = json.loads(resp.read().decode("utf-8"))
try:
with urllib.request.urlopen(req, timeout=30) as resp:
payload = json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
if exc.code == 429:
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
raise
if not isinstance(payload, list):
raise RuntimeError("unexpected vaultwarden /admin/users response")
users: list[VaultwardenUser] = []
@ -336,17 +347,49 @@ def main() -> int:
action="store_true",
help="Actually delete; otherwise dry-run only.",
)
parser.add_argument(
"--confirm",
default="",
help=(
"Required when using --apply. Must exactly equal the comma-separated "
"sorted prefix list (e.g. 'atlas-,bob-,e2e-,test-')."
),
)
parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.")
parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.")
parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.")
parser.add_argument(
"--protect-keycloak-username",
action="append",
default=[],
help="Keycloak usernames that must never be deleted (repeatable).",
)
parser.add_argument(
"--protect-vaultwarden-email",
action="append",
default=[],
help="Vaultwarden emails that must never be deleted (repeatable).",
)
args = parser.parse_args()
prefixes = _validate_prefixes(args.prefix)
prefixes = sorted(set(_validate_prefixes(args.prefix)))
apply = bool(args.apply)
expected_confirm = ",".join(prefixes)
protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]}
protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()}
if apply and args.confirm != expected_confirm:
raise SystemExit(
f"refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')"
)
print("Atlas test-user cleanup")
print("prefixes:", ", ".join(prefixes))
print("prefixes:", expected_confirm)
print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)")
if protected_keycloak:
print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak)))
if protected_vaultwarden:
print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden)))
print()
if not args.skip_portal_db:
@ -375,6 +418,8 @@ def main() -> int:
for user in _keycloak_list_users(kc_server, kc_realm, token, prefix):
if not _starts_with_any(user.username, prefixes):
continue
if user.username in protected_keycloak:
continue
found[user.user_id] = user
users = list(found.values())
users.sort(key=lambda u: u.username)
@ -403,12 +448,42 @@ def main() -> int:
admin_token = _kubectl_get_secret_value("vaultwarden", "vaultwarden-admin", "ADMIN_TOKEN")
base_url = "http://127.0.0.1:18081"
cookie = _vaultwarden_admin_cookie(admin_token, base_url)
users = _vaultwarden_list_users(base_url, cookie)
try:
cookie = ""
for attempt in range(7):
try:
cookie = _vaultwarden_admin_cookie(admin_token, base_url)
break
except RuntimeError as exc:
if "rate limited" in str(exc).lower():
time.sleep(min(60.0, 2.0**attempt))
continue
raise
if not cookie:
raise RuntimeError("vaultwarden admin login repeatedly rate limited")
users: list[VaultwardenUser] = []
for attempt in range(7):
try:
users = _vaultwarden_list_users(base_url, cookie)
break
except RuntimeError as exc:
if "rate limited" in str(exc).lower():
time.sleep(min(60.0, 2.0**attempt))
continue
raise
if not users:
raise RuntimeError("vaultwarden user list unavailable (possibly rate limited)")
except RuntimeError as exc:
print(f"Vaultwarden: ERROR: {exc}")
print()
return 1
matched: list[VaultwardenUser] = []
for user in users:
local = user.email.split("@", 1)[0]
if _starts_with_any(local, prefixes):
if user.email in protected_vaultwarden:
continue
matched.append(user)
matched.sort(key=lambda u: u.email)
print(f"Vaultwarden: {len(matched)} users matched")

View File

@ -71,7 +71,7 @@
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 2,

View File

@ -1086,7 +1086,7 @@
},
"targets": [
{
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)",
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)",
"refId": "A",
"legendFormat": "{{namespace}}"
}
@ -1137,7 +1137,7 @@
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 12,
@ -1206,7 +1206,7 @@
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 13,
@ -1224,7 +1224,7 @@
},
"targets": [
{
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)",
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)",
"refId": "A",
"legendFormat": "{{namespace}}"
}
@ -1275,7 +1275,7 @@
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 14,

View File

@ -80,7 +80,7 @@ data:
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 2,

View File

@ -1095,7 +1095,7 @@ data:
},
"targets": [
{
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)",
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)",
"refId": "A",
"legendFormat": "{{namespace}}"
}
@ -1146,7 +1146,7 @@ data:
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 12,
@ -1215,7 +1215,7 @@ data:
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 13,
@ -1233,7 +1233,7 @@ data:
},
"targets": [
{
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)",
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)",
"refId": "A",
"legendFormat": "{{namespace}}"
}
@ -1284,7 +1284,7 @@ data:
"targetBlank": false
}
],
"description": "Use panel links to switch namespace scope."
"description": "Values are normalized within the selected scope; use panel links to switch scope."
},
{
"id": 14,