Compare commits
No commits in common. "96f923ae4c3e91133d764835b28c5ac808474bf2" and "616c6308b1e3c3cbb3c7ab31449e699861e07bf8" have entirely different histories.
96f923ae4c
...
616c6308b1
@ -538,7 +538,7 @@ PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = (
|
||||
f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))), 1)) '
|
||||
f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))) > 0))'
|
||||
)
|
||||
PVC_BACKUP_AGE_HOURS_BY_PVC = "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))"
|
||||
PVC_BACKUP_AGE_HOURS_BY_PVC = "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours))"
|
||||
ANANKE_SELECTOR = 'job="ananke-power"'
|
||||
ANANKE_UPS_DB_NAME = "Pyrphoros"
|
||||
ANANKE_UPS_DB_NODE = "titan-db"
|
||||
@ -1627,7 +1627,7 @@ def build_overview():
|
||||
)
|
||||
panels[-1]["links"] = link_to("atlas-storage")
|
||||
panels[-1]["description"] = (
|
||||
"Oldest successful backup age in hours by PVC. PVCs with missing or unhealthy backup state are forced to 999h so the red bars stay visible in the overview."
|
||||
"Oldest backup age in hours by PVC. This panel is reserved for the upcoming PVC backup health feed and will show no data until those metrics are published."
|
||||
)
|
||||
|
||||
panels.append(
|
||||
@ -1913,17 +1913,13 @@ def build_overview():
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
bargauge_panel(
|
||||
22,
|
||||
"Nodes Closest to Full Astraios Disks",
|
||||
astraios_usage_expr(),
|
||||
f"topk(12, {astraios_usage_expr()})",
|
||||
{"h": 16, "w": 12, "x": 12, "y": 71},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
legend_display="table",
|
||||
legend_placement="right",
|
||||
time_from="1w",
|
||||
thresholds=PERCENT_THRESHOLDS,
|
||||
links=link_to("atlas-storage"),
|
||||
)
|
||||
)
|
||||
|
||||
@ -17,7 +17,6 @@ spec:
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: element-call
|
||||
image: ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc
|
||||
|
||||
@ -119,7 +119,6 @@ spec:
|
||||
> /synapse/config/conf.d/runtime-secrets.yaml
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
@ -418,7 +417,6 @@ spec:
|
||||
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
|
||||
@ -23,7 +23,6 @@ resources:
|
||||
- oneoffs/synapse-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/logs-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/metis-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/soteria-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/metis-ssh-keys-secret-ensure-job.yaml
|
||||
- oneoffs/harbor-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/vault-oidc-secret-ensure-job.yaml
|
||||
|
||||
@ -1,198 +0,0 @@
|
||||
# services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml
|
||||
# One-off job for sso/soteria-oidc-secret-ensure-1.
|
||||
# Purpose: ensure the Soteria oauth2-proxy OIDC client and Vault secret exist.
|
||||
# Keep this completed Job around; bump the suffix if it ever needs to be rerun.
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: soteria-oidc-secret-ensure-1
|
||||
namespace: sso
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "sso-secrets"
|
||||
vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin"
|
||||
vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: |
|
||||
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
|
||||
export KEYCLOAK_ADMIN="{{ .Data.data.username }}"
|
||||
export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}"
|
||||
export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}"
|
||||
{{ end }}
|
||||
spec:
|
||||
serviceAccountName: mas-secrets-ensure
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
containers:
|
||||
- name: apply
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/secrets/keycloak-admin-env.sh
|
||||
KC_URL="http://keycloak.sso.svc.cluster.local"
|
||||
ACCESS_TOKEN=""
|
||||
for attempt in 1 2 3 4 5; do
|
||||
TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \
|
||||
-H 'Content-Type: application/x-www-form-urlencoded' \
|
||||
-d "grant_type=password" \
|
||||
-d "client_id=admin-cli" \
|
||||
-d "username=${KEYCLOAK_ADMIN}" \
|
||||
-d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)"
|
||||
ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)"
|
||||
if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then
|
||||
break
|
||||
fi
|
||||
echo "Keycloak token request failed (attempt ${attempt})" >&2
|
||||
sleep $((attempt * 2))
|
||||
done
|
||||
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
|
||||
echo "Failed to fetch Keycloak admin token" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients?clientId=soteria" || true)"
|
||||
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
|
||||
|
||||
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
|
||||
create_payload='{"clientId":"soteria","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://backup.bstein.dev/oauth2/callback"],"webOrigins":["https://backup.bstein.dev"],"rootUrl":"https://backup.bstein.dev","baseUrl":"/"}'
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${create_payload}" \
|
||||
"$KC_URL/admin/realms/atlas/clients")"
|
||||
if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then
|
||||
echo "Keycloak client create failed (status ${status})" >&2
|
||||
exit 1
|
||||
fi
|
||||
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients?clientId=soteria" || true)"
|
||||
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
|
||||
fi
|
||||
|
||||
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
|
||||
echo "Keycloak client soteria not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)"
|
||||
if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
|
||||
echo "Keycloak client scope groups not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)"
|
||||
OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)"
|
||||
|
||||
if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \
|
||||
&& ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
|
||||
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
|
||||
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
|
||||
echo "Failed to attach groups client scope to soteria (status ${status})" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
update_payload='{"enabled":true,"clientId":"soteria","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://backup.bstein.dev/oauth2/callback"],"webOrigins":["https://backup.bstein.dev"],"rootUrl":"https://backup.bstein.dev","baseUrl":"/"}'
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${update_payload}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}")"
|
||||
if [ "$status" != "204" ]; then
|
||||
echo "Keycloak client update failed (status ${status})" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)"
|
||||
if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then
|
||||
echo "Keycloak client secret not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
|
||||
vault_role="${VAULT_ROLE:-sso-secrets}"
|
||||
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
|
||||
vault_token="$(curl -sS --request POST --data "${login_payload}" \
|
||||
"${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
|
||||
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
|
||||
echo "vault login failed" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
read_status="$(curl -sS -o /tmp/soteria-oidc-read.json -w "%{http_code}" \
|
||||
-H "X-Vault-Token: ${vault_token}" \
|
||||
"${vault_addr}/v1/kv/data/atlas/maintenance/soteria-oidc" || true)"
|
||||
COOKIE_SECRET=""
|
||||
if [ "${read_status}" = "200" ]; then
|
||||
COOKIE_SECRET="$(jq -r '.data.data.cookie_secret // empty' /tmp/soteria-oidc-read.json)"
|
||||
elif [ "${read_status}" != "404" ]; then
|
||||
echo "Vault read failed (status ${read_status})" >&2
|
||||
cat /tmp/soteria-oidc-read.json >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
if [ -n "${COOKIE_SECRET}" ]; then
|
||||
length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')"
|
||||
if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then
|
||||
COOKIE_SECRET=""
|
||||
fi
|
||||
fi
|
||||
if [ -z "${COOKIE_SECRET}" ]; then
|
||||
COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')"
|
||||
fi
|
||||
|
||||
payload="$(jq -nc \
|
||||
--arg client_id "soteria" \
|
||||
--arg client_secret "${CLIENT_SECRET}" \
|
||||
--arg cookie_secret "${COOKIE_SECRET}" \
|
||||
'{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')"
|
||||
write_status="$(curl -sS -o /tmp/soteria-oidc-write.json -w "%{http_code}" -X POST \
|
||||
-H "X-Vault-Token: ${vault_token}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${payload}" "${vault_addr}/v1/kv/data/atlas/maintenance/soteria-oidc")"
|
||||
if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
|
||||
echo "Vault write failed (status ${write_status})" >&2
|
||||
cat /tmp/soteria-oidc-write.json >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
verify_status="$(curl -sS -o /tmp/soteria-oidc-verify.json -w "%{http_code}" \
|
||||
-H "X-Vault-Token: ${vault_token}" \
|
||||
"${vault_addr}/v1/kv/data/atlas/maintenance/soteria-oidc" || true)"
|
||||
if [ "${verify_status}" != "200" ]; then
|
||||
echo "Vault verify failed (status ${verify_status})" >&2
|
||||
cat /tmp/soteria-oidc-verify.json >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Soteria OIDC secret ready in Vault"
|
||||
@ -176,7 +176,6 @@ spec:
|
||||
logLevel: DEBUG
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
|
||||
@ -349,14 +349,6 @@ spec:
|
||||
value: "11 */6 * * *"
|
||||
- name: ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE
|
||||
value: "*/15 * * * *"
|
||||
- name: JENKINS_WORKSPACE_NAMESPACE
|
||||
value: jenkins
|
||||
- name: JENKINS_WORKSPACE_PVC_PREFIX
|
||||
value: pvc-workspace-
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS
|
||||
value: "24"
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_DRY_RUN
|
||||
value: "false"
|
||||
- name: METRICS_PATH
|
||||
value: "/metrics"
|
||||
resources:
|
||||
|
||||
@ -16,16 +16,6 @@ rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- pods
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- delete
|
||||
- apiGroups: ["longhorn.io"]
|
||||
resources:
|
||||
- volumes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
|
||||
@ -37,9 +37,6 @@ resources:
|
||||
- node-image-sweeper-serviceaccount.yaml
|
||||
- node-image-sweeper-daemonset.yaml
|
||||
- metis-service.yaml
|
||||
- soteria-ingress.yaml
|
||||
- soteria-certificate.yaml
|
||||
- oauth2-proxy-soteria.yaml
|
||||
- oauth2-proxy-metis.yaml
|
||||
- metis-certificate.yaml
|
||||
- metis-ingress.yaml
|
||||
@ -49,7 +46,7 @@ images:
|
||||
- name: registry.bstein.dev/bstein/metis
|
||||
newTag: 0.1.0-9-amd64
|
||||
- name: registry.bstein.dev/bstein/soteria
|
||||
newTag: 0.1.0-21 # {"$imagepolicy": "maintenance:soteria:tag"}
|
||||
newTag: 0.1.0-11 # {"$imagepolicy": "maintenance:soteria:tag"}
|
||||
configMapGenerator:
|
||||
- name: disable-k3s-traefik-script
|
||||
namespace: maintenance
|
||||
|
||||
@ -1,121 +0,0 @@
|
||||
# services/maintenance/oauth2-proxy-soteria.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: oauth2-proxy-soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: oauth2-proxy-soteria
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 4180
|
||||
selector:
|
||||
app: oauth2-proxy-soteria
|
||||
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: oauth2-proxy-soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: oauth2-proxy-soteria
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: oauth2-proxy-soteria
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: oauth2-proxy-soteria
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "maintenance"
|
||||
vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/maintenance/soteria-oidc"
|
||||
vault.hashicorp.com/agent-inject-template-oidc-config: |
|
||||
{{- with secret "kv/data/atlas/maintenance/soteria-oidc" -}}
|
||||
client_id = "{{ .Data.data.client_id }}"
|
||||
client_secret = "{{ .Data.data.client_secret }}"
|
||||
cookie_secret = "{{ .Data.data.cookie_secret }}"
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: maintenance-vault-sync
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["amd64","arm64"]
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values: ["titan-13","titan-15","titan-17","titan-19"]
|
||||
containers:
|
||||
- name: oauth2-proxy
|
||||
image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- --provider=oidc
|
||||
- --config=/vault/secrets/oidc-config
|
||||
- --redirect-url=https://backup.bstein.dev/oauth2/callback
|
||||
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
|
||||
- --scope=openid profile email groups
|
||||
- --email-domain=*
|
||||
- --allowed-group=admin
|
||||
- --allowed-group=/admin
|
||||
- --allowed-group=maintenance
|
||||
- --allowed-group=/maintenance
|
||||
- --set-xauthrequest=true
|
||||
- --pass-access-token=true
|
||||
- --set-authorization-header=true
|
||||
- --cookie-secure=true
|
||||
- --cookie-samesite=lax
|
||||
- --cookie-refresh=20m
|
||||
- --cookie-expire=168h
|
||||
- --insecure-oidc-allow-unverified-email=true
|
||||
- --upstream=http://soteria.maintenance.svc.cluster.local
|
||||
- --http-address=0.0.0.0:4180
|
||||
- --skip-provider-button=true
|
||||
- --approval-prompt=auto
|
||||
- --skip-jwt-bearer-tokens=true
|
||||
- --oidc-groups-claim=groups
|
||||
- --cookie-domain=backup.bstein.dev
|
||||
ports:
|
||||
- containerPort: 4180
|
||||
name: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 4180
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 4180
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 20
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 256Mi
|
||||
@ -1,13 +0,0 @@
|
||||
# services/maintenance/soteria-certificate.yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: backup-tls
|
||||
namespace: maintenance
|
||||
spec:
|
||||
secretName: backup-tls
|
||||
issuerRef:
|
||||
kind: ClusterIssuer
|
||||
name: letsencrypt
|
||||
dnsNames:
|
||||
- backup.bstein.dev
|
||||
@ -1,14 +0,0 @@
|
||||
# services/maintenance/soteria-configmap.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
data:
|
||||
SOTERIA_BACKUP_DRIVER: longhorn
|
||||
SOTERIA_LONGHORN_URL: http://longhorn-backend.longhorn-system.svc:9500
|
||||
SOTERIA_LONGHORN_BACKUP_MODE: incremental
|
||||
SOTERIA_AUTH_REQUIRED: "true"
|
||||
SOTERIA_ALLOWED_GROUPS: admin,maintenance
|
||||
SOTERIA_BACKUP_MAX_AGE_HOURS: "24"
|
||||
SOTERIA_METRICS_REFRESH_SECONDS: "300"
|
||||
@ -1,76 +0,0 @@
|
||||
# services/maintenance/soteria-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: soteria
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: soteria
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: soteria
|
||||
spec:
|
||||
serviceAccountName: soteria
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 50
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi4"]
|
||||
containers:
|
||||
- name: soteria
|
||||
image: registry.bstein.dev/bstein/soteria:0.1.0-21
|
||||
imagePullPolicy: Always
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: soteria
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 2
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: http
|
||||
initialDelaySeconds: 2
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 2
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
|
||||
@ -1,27 +0,0 @@
|
||||
# services/maintenance/soteria-ingress.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: traefik
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
traefik.ingress.kubernetes.io/router.middlewares: ""
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
tls:
|
||||
- hosts: ["backup.bstein.dev"]
|
||||
secretName: backup-tls
|
||||
rules:
|
||||
- host: backup.bstein.dev
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: oauth2-proxy-soteria
|
||||
port:
|
||||
number: 80
|
||||
@ -1,21 +0,0 @@
|
||||
# services/maintenance/soteria-service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: soteria
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "80"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: soteria
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: http
|
||||
|
||||
@ -14,8 +14,6 @@ spec:
|
||||
labels:
|
||||
app: maintenance-vault-sync
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
serviceAccountName: maintenance-vault-sync
|
||||
containers:
|
||||
- name: sync
|
||||
|
||||
@ -1970,7 +1970,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{pvc}}",
|
||||
"instant": true
|
||||
@ -2034,7 +2034,7 @@
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Oldest successful backup age in hours by PVC. PVCs with missing or unhealthy backup state are forced to 999h so the red bars stay visible in the overview."
|
||||
"description": "Oldest backup age in hours by PVC. This panel is reserved for the upcoming PVC backup health feed and will show no data until those metrics are published."
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
@ -3175,7 +3175,7 @@
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "timeseries",
|
||||
"type": "bargauge",
|
||||
"title": "Nodes Closest to Full Astraios Disks",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -3189,36 +3189,68 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"expr": "sort_desc(topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 75
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 91.5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"last"
|
||||
]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"timeFrom": "1w",
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@ -1979,7 +1979,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{pvc}}",
|
||||
"instant": true
|
||||
@ -2043,7 +2043,7 @@ data:
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Oldest successful backup age in hours by PVC. PVCs with missing or unhealthy backup state are forced to 999h so the red bars stay visible in the overview."
|
||||
"description": "Oldest backup age in hours by PVC. This panel is reserved for the upcoming PVC backup health feed and will show no data until those metrics are published."
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
@ -3184,7 +3184,7 @@ data:
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "timeseries",
|
||||
"type": "bargauge",
|
||||
"title": "Nodes Closest to Full Astraios Disks",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -3198,36 +3198,68 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"expr": "sort_desc(topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 75
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 91.5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"last"
|
||||
]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"timeFrom": "1w",
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@ -18,7 +18,6 @@ spec:
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: collabora
|
||||
image: collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26
|
||||
|
||||
@ -9,4 +9,3 @@ spec:
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user