feat: add Ariadne service and glue scheduling

This commit is contained in:
Brad Stein 2026-01-19 16:58:02 -03:00
parent 791108723e
commit bb41c219f6
21 changed files with 685 additions and 1 deletions

View File

@ -336,6 +336,10 @@ GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPE
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})"
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
@ -2230,6 +2234,39 @@ def build_testing_dashboard():
instant=True,
)
)
panels.append(
table_panel(
7,
"Ariadne Task Errors (24h)",
ARIADNE_TASK_ERRORS_24H,
{"h": 6, "w": 12, "x": 0, "y": 12},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
8,
"Ariadne Schedule Last Success (hours ago)",
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 12},
unit="h",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
9,
"Ariadne Access Requests",
ARIADNE_ACCESS_REQUESTS,
{"h": 4, "w": 24, "x": 0, "y": 18},
unit="none",
transformations=sort_desc,
instant=True,
)
)
return {
"uid": "atlas-testing",

View File

@ -8,6 +8,7 @@ metadata:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3

View File

@ -29,3 +29,17 @@ subjects:
- kind: ServiceAccount
name: bstein-dev-home
namespace: bstein-dev-home
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ariadne-firefly-user-sync
namespace: finance
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: bstein-dev-home-firefly-user-sync
subjects:
- kind: ServiceAccount
name: ariadne
namespace: maintenance

View File

@ -8,7 +8,7 @@ rules:
- apiGroups: ["batch"]
resources: ["cronjobs"]
verbs: ["get"]
resourceNames: ["wger-user-sync"]
resourceNames: ["wger-user-sync", "wger-admin-ensure"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "get", "list", "watch"]
@ -29,3 +29,17 @@ subjects:
- kind: ServiceAccount
name: bstein-dev-home
namespace: bstein-dev-home
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ariadne-wger-user-sync
namespace: health
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: bstein-dev-home-wger-user-sync
subjects:
- kind: ServiceAccount
name: ariadne
namespace: maintenance

View File

@ -8,6 +8,7 @@ metadata:
atlas.bstein.dev/glue: "true"
spec:
schedule: "15 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3

View File

@ -331,6 +331,8 @@ spec:
# Ensure basic realm groups exist for provisioning.
ensure_group("dev")
ensure_group("admin")
ensure_group("demo")
ensure_group("test")
planka_group = ensure_group("planka-users")
if planka_group and planka_group.get("id"):

View File

@ -8,6 +8,7 @@ metadata:
atlas.bstein.dev/glue: "true"
spec:
schedule: "30 4 * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:

View File

@ -0,0 +1,181 @@
# services/maintenance/ariadne-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: ariadne
namespace: maintenance
spec:
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app: ariadne
template:
metadata:
labels:
app: ariadne
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "maintenance"
vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-ariadne-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/mailu/mailu-db-secret" }}
export MAILU_DB_NAME="{{ .Data.data.database }}"
export MAILU_DB_USER="{{ .Data.data.username }}"
export MAILU_DB_PASSWORD="{{ .Data.data.password }}"
{{ end }}
{{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }}
export SMTP_HOST="mailu-front.mailu-mailserver.svc.cluster.local"
export SMTP_PORT="587"
export SMTP_STARTTLS="true"
export SMTP_USE_TLS="false"
export SMTP_USERNAME="no-reply-portal@bstein.dev"
export SMTP_PASSWORD="{{ .Data.data.password }}"
export SMTP_FROM="no-reply-portal@bstein.dev"
{{ end }}
spec:
serviceAccountName: ariadne
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: ariadne
image: registry.bstein.dev/bstein/ariadne:0.1.0
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- >-
. /vault/secrets/ariadne-env.sh
&& exec uvicorn ariadne.app:app --host 0.0.0.0 --port 8080
ports:
- name: http
containerPort: 8080
env:
- name: KEYCLOAK_URL
value: https://sso.bstein.dev
- name: KEYCLOAK_REALM
value: atlas
- name: KEYCLOAK_CLIENT_ID
value: bstein-dev-home
- name: KEYCLOAK_ISSUER
value: https://sso.bstein.dev/realms/atlas
- name: KEYCLOAK_JWKS_URL
value: http://keycloak.sso.svc.cluster.local/realms/atlas/protocol/openid-connect/certs
- name: KEYCLOAK_ADMIN_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_ADMIN_REALM
value: atlas
- name: KEYCLOAK_ADMIN_CLIENT_ID
value: bstein-dev-home-admin
- name: PORTAL_PUBLIC_BASE_URL
value: https://bstein.dev
- name: PORTAL_ADMIN_USERS
value: bstein
- name: PORTAL_ADMIN_GROUPS
value: admin
- name: ACCOUNT_ALLOWED_GROUPS
value: dev,admin
- name: ALLOWED_FLAG_GROUPS
value: demo,test
- name: DEFAULT_USER_GROUPS
value: dev
- name: MAILU_DOMAIN
value: bstein.dev
- name: MAILU_SYNC_URL
value: http://mailu-sync-listener.mailu-mailserver.svc.cluster.local:8080/events
- name: MAILU_MAILBOX_WAIT_TIMEOUT_SEC
value: "60"
- name: MAILU_DB_HOST
value: postgres-service.postgres.svc.cluster.local
- name: MAILU_DB_PORT
value: "5432"
- name: NEXTCLOUD_NAMESPACE
value: nextcloud
- name: NEXTCLOUD_MAIL_SYNC_CRONJOB
value: nextcloud-mail-sync
- name: NEXTCLOUD_MAIL_SYNC_WAIT_TIMEOUT_SEC
value: "90"
- name: NEXTCLOUD_MAIL_SYNC_JOB_TTL_SEC
value: "3600"
- name: WGER_NAMESPACE
value: health
- name: WGER_USER_SYNC_CRONJOB
value: wger-user-sync
- name: WGER_ADMIN_CRONJOB
value: wger-admin-ensure
- name: WGER_USER_SYNC_WAIT_TIMEOUT_SEC
value: "90"
- name: FIREFLY_NAMESPACE
value: finance
- name: FIREFLY_USER_SYNC_CRONJOB
value: firefly-user-sync
- name: FIREFLY_USER_SYNC_WAIT_TIMEOUT_SEC
value: "90"
- name: VAULTWARDEN_NAMESPACE
value: vaultwarden
- name: VAULTWARDEN_POD_LABEL
value: app=vaultwarden
- name: VAULTWARDEN_POD_PORT
value: "80"
- name: VAULTWARDEN_SERVICE_HOST
value: vaultwarden-service.vaultwarden.svc.cluster.local
- name: VAULTWARDEN_ADMIN_SECRET_NAME
value: vaultwarden-admin
- name: VAULTWARDEN_ADMIN_SECRET_KEY
value: ADMIN_TOKEN
- name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC
value: "900"
- name: VAULTWARDEN_ADMIN_RATE_LIMIT_BACKOFF_SEC
value: "600"
- name: VAULTWARDEN_RETRY_COOLDOWN_SEC
value: "1800"
- name: VAULTWARDEN_FAILURE_BAILOUT
value: "2"
- name: ARIADNE_PROVISION_POLL_INTERVAL_SEC
value: "5"
- name: ARIADNE_PROVISION_RETRY_COOLDOWN_SEC
value: "30"
- name: ARIADNE_SCHEDULE_TICK_SEC
value: "5"
- name: ARIADNE_SCHEDULE_MAILU_SYNC
value: "30 4 * * *"
- name: ARIADNE_SCHEDULE_NEXTCLOUD_SYNC
value: "0 5 * * *"
- name: ARIADNE_SCHEDULE_VAULTWARDEN_SYNC
value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_WGER_ADMIN
value: "15 3 * * *"
- name: WELCOME_EMAIL_ENABLED
value: "true"
- name: K8S_API_TIMEOUT_SEC
value: "5"
- name: METRICS_PATH
value: "/metrics"
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10

View File

@ -0,0 +1,13 @@
# services/maintenance/ariadne-service.yaml
apiVersion: v1
kind: Service
metadata:
name: ariadne
namespace: maintenance
spec:
selector:
app: ariadne
ports:
- name: http
port: 80
targetPort: http

View File

@ -0,0 +1,8 @@
# services/maintenance/ariadne-serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: ariadne
namespace: maintenance
imagePullSecrets:
- name: harbor-regcred

View File

@ -3,10 +3,16 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- secretproviderclass.yaml
- vault-serviceaccount.yaml
- vault-sync-deployment.yaml
- ariadne-serviceaccount.yaml
- disable-k3s-traefik-serviceaccount.yaml
- k3s-traefik-cleanup-rbac.yaml
- node-nofile-serviceaccount.yaml
- pod-cleaner-rbac.yaml
- ariadne-deployment.yaml
- ariadne-service.yaml
- disable-k3s-traefik-daemonset.yaml
- k3s-traefik-cleanup-job.yaml
- node-nofile-daemonset.yaml

View File

@ -0,0 +1,21 @@
# services/maintenance/secretproviderclass.yaml
apiVersion: secrets-store.csi.x-k8s.io/v1
kind: SecretProviderClass
metadata:
name: maintenance-vault
namespace: maintenance
spec:
provider: vault
parameters:
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
roleName: "maintenance"
objects: |
- objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/harbor-pull/maintenance"
secretKey: "dockerconfigjson"
secretObjects:
- secretName: harbor-regcred
type: kubernetes.io/dockerconfigjson
data:
- objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson

View File

@ -0,0 +1,6 @@
# services/maintenance/vault-serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: maintenance-vault-sync
namespace: maintenance

View File

@ -0,0 +1,34 @@
# services/maintenance/vault-sync-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: maintenance-vault-sync
namespace: maintenance
spec:
replicas: 1
selector:
matchLabels:
app: maintenance-vault-sync
template:
metadata:
labels:
app: maintenance-vault-sync
spec:
serviceAccountName: maintenance-vault-sync
containers:
- name: sync
image: alpine:3.20
command: ["/bin/sh", "-c"]
args:
- "sleep infinity"
volumeMounts:
- name: vault-secrets
mountPath: /vault/secrets
readOnly: true
volumes:
- name: vault-secrets
csi:
driver: secrets-store.csi.k8s.io
readOnly: true
volumeAttributes:
secretProviderClass: maintenance-vault

View File

@ -321,6 +321,156 @@
}
}
]
},
{
"id": 7,
"type": "table",
"title": "Ariadne Task Errors (24h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 12
},
"targets": [
{
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[24h]))",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 8,
"type": "table",
"title": "Ariadne Schedule Last Success (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 12
},
"targets": [
{
"expr": "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 9,
"type": "table",
"title": "Ariadne Access Requests",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 18
},
"targets": [
{
"expr": "ariadne_access_requests_total",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
}
],
"time": {

View File

@ -330,6 +330,156 @@ data:
}
}
]
},
{
"id": 7,
"type": "table",
"title": "Ariadne Task Errors (24h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 12
},
"targets": [
{
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[24h]))",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 8,
"type": "table",
"title": "Ariadne Schedule Last Success (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 12
},
"targets": [
{
"expr": "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 9,
"type": "table",
"title": "Ariadne Access Requests",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 18
},
"targets": [
{
"expr": "ariadne_access_requests_total",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
}
],
"time": {

View File

@ -8,6 +8,7 @@ metadata:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1

View File

@ -27,3 +27,16 @@ subjects:
- kind: ServiceAccount
name: bstein-dev-home
namespace: bstein-dev-home
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ariadne-nextcloud-mail-sync
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: bstein-dev-home-nextcloud-mail-sync
subjects:
- kind: ServiceAccount
name: ariadne
namespace: maintenance

View File

@ -230,6 +230,8 @@ write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \
"crypto/* harbor-pull/crypto" ""
write_policy_and_role "health" "health" "health-vault-sync" \
"health/*" ""
write_policy_and_role "maintenance" "maintenance" "ariadne" \
"portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret harbor-pull/maintenance" ""
write_policy_and_role "finance" "finance" "finance-vault" \
"finance/* shared/postmark-relay" ""
write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \

View File

@ -0,0 +1,28 @@
# services/vaultwarden/ariadne-rbac.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: ariadne-vaultwarden-admin-reader
namespace: vaultwarden
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get"]
resourceNames: ["vaultwarden-admin"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ariadne-vaultwarden-admin-reader
namespace: vaultwarden
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: ariadne-vaultwarden-admin-reader
subjects:
- kind: ServiceAccount
name: ariadne
namespace: maintenance

View File

@ -5,6 +5,7 @@ namespace: vaultwarden
resources:
- namespace.yaml
- serviceaccount.yaml
- ariadne-rbac.yaml
- pvc.yaml
- deployment.yaml
- service.yaml