From 1fedb5ecbecd7eea132ce3f1a2beba326c5f89ec Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 20 Jan 2026 23:03:39 -0300 Subject: [PATCH] maintenance: wire ariadne db and dashboards --- scripts/dashboards_render_atlas.py | 25 +++ services/maintenance/ariadne-deployment.yaml | 165 ++++++++++++++---- services/maintenance/ariadne-rbac.yaml | 14 +- .../monitoring/dashboards/atlas-testing.json | 113 ++++++++++++ .../monitoring/grafana-dashboard-testing.yaml | 113 ++++++++++++ .../vault/scripts/vault_k8s_auth_configure.sh | 2 +- 6 files changed, 399 insertions(+), 33 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 116bf21..a3fb372 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -340,6 +340,8 @@ ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{statu ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))' ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600" ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total" +ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}' +ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}' GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" @@ -2267,6 +2269,29 @@ def build_testing_dashboard(): instant=True, ) ) + panels.append( + stat_panel( + 10, + "Ariadne CI Coverage (%)", + ARIADNE_CI_COVERAGE, + {"h": 4, "w": 6, "x": 0, "y": 22}, + unit="percent", + decimals=1, + instant=True, + legend="{{branch}}", + ) + ) + panels.append( + table_panel( + 11, + "Ariadne CI Tests (latest)", + ARIADNE_CI_TESTS, + {"h": 6, "w": 18, "x": 6, "y": 22}, + unit="none", + transformations=sort_desc, + instant=True, + ) + ) return { "uid": "atlas-testing", diff --git a/services/maintenance/ariadne-deployment.yaml b/services/maintenance/ariadne-deployment.yaml index cd0d38c..57ce72b 100644 --- a/services/maintenance/ariadne-deployment.yaml +++ b/services/maintenance/ariadne-deployment.yaml @@ -20,14 +20,30 @@ spec: prometheus.io/path: "/metrics" vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "maintenance" - vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/portal/atlas-portal-db" + vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db" vault.hashicorp.com/agent-inject-template-ariadne-env.sh: | - {{ with secret "kv/data/atlas/portal/atlas-portal-db" }} - export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" + {{ with secret "kv/data/atlas/maintenance/ariadne-db" }} + export PORTAL_DATABASE_URL="{{ .Data.data.database_url }}" {{ end }} {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" {{ end }} + {{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }} + export NEXTCLOUD_DB_NAME="{{ .Data.data.database }}" + export NEXTCLOUD_DB_USER="{{ index .Data.data "db-username" }}" + export NEXTCLOUD_DB_PASSWORD="{{ index .Data.data "db-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }} + export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}" + export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/health/wger-admin" }} + export WGER_ADMIN_USERNAME="{{ .Data.data.username }}" + export WGER_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/finance/firefly-secrets" }} + export FIREFLY_CRON_TOKEN="{{ .Data.data.STATIC_CRON_TOKEN }}" + {{ end }} {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} export MAILU_DB_NAME="{{ .Data.data.database }}" export MAILU_DB_USER="{{ .Data.data.username }}" @@ -42,6 +58,35 @@ spec: export SMTP_PASSWORD="{{ .Data.data.password }}" export SMTP_FROM="no-reply-portal@bstein.dev" {{ end }} + {{ with secret "kv/data/atlas/comms/mas-admin-client-runtime" }} + export COMMS_MAS_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" }} + export COMMS_BOT_PASSWORD="{{ index .Data.data "bot-password" }}" + export COMMS_SEEDER_PASSWORD="{{ index .Data.data "seeder-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/synapse-db" }} + export COMMS_SYNAPSE_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/vault/vault-oidc-config" }} + export VAULT_OIDC_DISCOVERY_URL="{{ .Data.data.discovery_url }}" + export VAULT_OIDC_CLIENT_ID="{{ .Data.data.client_id }}" + export VAULT_OIDC_CLIENT_SECRET="{{ .Data.data.client_secret }}" + export VAULT_OIDC_DEFAULT_ROLE="{{ .Data.data.default_role }}" + export VAULT_OIDC_SCOPES="{{ .Data.data.scopes }}" + export VAULT_OIDC_USER_CLAIM="{{ .Data.data.user_claim }}" + export VAULT_OIDC_GROUPS_CLAIM="{{ .Data.data.groups_claim }}" + export VAULT_OIDC_TOKEN_POLICIES="{{ .Data.data.token_policies }}" + export VAULT_OIDC_ADMIN_GROUP="{{ .Data.data.admin_group }}" + export VAULT_OIDC_ADMIN_POLICIES="{{ .Data.data.admin_policies }}" + export VAULT_OIDC_DEV_GROUP="{{ .Data.data.dev_group }}" + export VAULT_OIDC_DEV_POLICIES="{{ .Data.data.dev_policies }}" + export VAULT_OIDC_USER_GROUP="{{ .Data.data.user_group }}" + export VAULT_OIDC_USER_POLICIES="{{ .Data.data.user_policies }}" + export VAULT_OIDC_REDIRECT_URIS="{{ .Data.data.redirect_uris }}" + export VAULT_OIDC_BOUND_AUDIENCES="{{ .Data.data.bound_audiences }}" + export VAULT_OIDC_BOUND_CLAIMS_TYPE="{{ .Data.data.bound_claims_type }}" + {{ end }} spec: serviceAccountName: ariadne nodeSelector: @@ -92,6 +137,8 @@ spec: value: dev - name: MAILU_DOMAIN value: bstein.dev + - name: MAILU_HOST + value: mail.bstein.dev - name: MAILU_SYNC_URL value: http://mailu-sync-listener.mailu-mailserver.svc.cluster.local:8080/events - name: MAILU_MAILBOX_WAIT_TIMEOUT_SEC @@ -102,46 +149,84 @@ spec: value: "5432" - name: NEXTCLOUD_NAMESPACE value: nextcloud - - name: NEXTCLOUD_MAIL_SYNC_CRONJOB - value: nextcloud-mail-sync - - name: NEXTCLOUD_MAIL_SYNC_WAIT_TIMEOUT_SEC - value: "90" - - name: NEXTCLOUD_MAIL_SYNC_JOB_TTL_SEC - value: "3600" + - name: NEXTCLOUD_POD_LABEL + value: app=nextcloud + - name: NEXTCLOUD_CONTAINER + value: nextcloud + - name: NEXTCLOUD_EXEC_TIMEOUT_SEC + value: "120" + - name: NEXTCLOUD_URL + value: https://cloud.bstein.dev + - name: NEXTCLOUD_DB_HOST + value: postgres-service.postgres.svc.cluster.local + - name: NEXTCLOUD_DB_PORT + value: "5432" - name: WGER_NAMESPACE value: health - - name: WGER_USER_SYNC_CRONJOB - value: wger-user-sync - - name: WGER_ADMIN_CRONJOB - value: wger-admin-ensure - name: WGER_USER_SYNC_WAIT_TIMEOUT_SEC value: "90" + - name: WGER_POD_LABEL + value: app=wger + - name: WGER_CONTAINER + value: wger + - name: WGER_ADMIN_EMAIL + value: brad@bstein.dev - name: FIREFLY_NAMESPACE value: finance - - name: FIREFLY_USER_SYNC_CRONJOB - value: firefly-user-sync - name: FIREFLY_USER_SYNC_WAIT_TIMEOUT_SEC value: "90" + - name: FIREFLY_POD_LABEL + value: app=firefly + - name: FIREFLY_CONTAINER + value: firefly + - name: FIREFLY_CRON_BASE_URL + value: http://firefly.finance.svc.cluster.local/api/v1/cron + - name: FIREFLY_CRON_TIMEOUT_SEC + value: "30" - name: VAULT_NAMESPACE value: vault - - name: VAULT_K8S_AUTH_CRONJOB - value: vault-k8s-auth-config - - name: VAULT_OIDC_CRONJOB - value: vault-oidc-config - - name: VAULT_JOB_WAIT_TIMEOUT_SEC - value: "120" + - name: VAULT_ADDR + value: http://vault.vault.svc.cluster.local:8200 + - name: VAULT_K8S_ROLE + value: vault-admin + - name: VAULT_K8S_ROLE_TTL + value: 1h - name: COMMS_NAMESPACE value: comms - - name: COMMS_GUEST_NAME_CRONJOB - value: guest-name-randomizer - - name: COMMS_PIN_INVITE_CRONJOB - value: pin-othrys-invite - - name: COMMS_RESET_ROOM_CRONJOB - value: othrys-room-reset - - name: COMMS_SEED_ROOM_CRONJOB - value: seed-othrys-room - - name: COMMS_JOB_WAIT_TIMEOUT_SEC - value: "60" + - name: COMMS_SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: COMMS_AUTH_BASE + value: http://matrix-authentication-service:8080 + - name: COMMS_MAS_ADMIN_API_BASE + value: http://matrix-authentication-service:8081/api/admin/v1 + - name: COMMS_MAS_TOKEN_URL + value: http://matrix-authentication-service:8080/oauth2/token + - name: COMMS_MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: COMMS_SERVER_NAME + value: live.bstein.dev + - name: COMMS_ROOM_ALIAS + value: "#othrys:live.bstein.dev" + - name: COMMS_ROOM_NAME + value: Othrys + - name: COMMS_PIN_MESSAGE + value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." + - name: COMMS_SEEDER_USER + value: othrys-seeder + - name: COMMS_BOT_USER + value: atlasbot + - name: COMMS_SYNAPSE_DB_HOST + value: postgres-service.postgres.svc.cluster.local + - name: COMMS_SYNAPSE_DB_PORT + value: "5432" + - name: COMMS_SYNAPSE_DB_NAME + value: synapse + - name: COMMS_SYNAPSE_DB_USER + value: synapse + - name: COMMS_TIMEOUT_SEC + value: "30" + - name: COMMS_GUEST_STALE_DAYS + value: "14" - name: VAULTWARDEN_NAMESPACE value: vaultwarden - name: VAULTWARDEN_POD_LABEL @@ -172,10 +257,22 @@ spec: value: "30 4 * * *" - name: ARIADNE_SCHEDULE_NEXTCLOUD_SYNC value: "0 5 * * *" + - name: ARIADNE_SCHEDULE_NEXTCLOUD_CRON + value: "*/5 * * * *" + - name: ARIADNE_SCHEDULE_NEXTCLOUD_MAINTENANCE + value: "30 4 * * *" - name: ARIADNE_SCHEDULE_VAULTWARDEN_SYNC value: "*/15 * * * *" - name: ARIADNE_SCHEDULE_WGER_ADMIN value: "15 3 * * *" + - name: ARIADNE_SCHEDULE_FIREFLY_CRON + value: "0 3 * * *" + - name: ARIADNE_SCHEDULE_POD_CLEANER + value: "0 * * * *" + - name: ARIADNE_SCHEDULE_OPENSEARCH_PRUNE + value: "23 3 * * *" + - name: ARIADNE_SCHEDULE_IMAGE_SWEEPER + value: "30 4 * * 0" - name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH value: "*/15 * * * *" - name: ARIADNE_SCHEDULE_VAULT_OIDC @@ -192,6 +289,12 @@ spec: value: "true" - name: K8S_API_TIMEOUT_SEC value: "5" + - name: OPENSEARCH_URL + value: http://opensearch-master.logging.svc.cluster.local:9200 + - name: OPENSEARCH_LIMIT_BYTES + value: "1099511627776" + - name: OPENSEARCH_INDEX_PATTERNS + value: kube-*,journald-*,trace-analytics-* - name: METRICS_PATH value: "/metrics" resources: diff --git a/services/maintenance/ariadne-rbac.yaml b/services/maintenance/ariadne-rbac.yaml index 8d2a2a9..e2f08c9 100644 --- a/services/maintenance/ariadne-rbac.yaml +++ b/services/maintenance/ariadne-rbac.yaml @@ -6,13 +6,25 @@ metadata: rules: - apiGroups: ["batch"] resources: - - cronjobs - jobs verbs: - get - list - watch - create + - apiGroups: [""] + resources: + - pods + verbs: + - get + - list + - watch + - delete + - apiGroups: [""] + resources: + - pods/exec + verbs: + - create --- apiVersion: rbac.authorization.k8s.io/v1 diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index c9c0c9a..b76f909 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -471,6 +471,119 @@ } } ] + }, + { + "id": 10, + "type": "stat", + "title": "Ariadne CI Coverage (%)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 22 + }, + "targets": [ + { + "expr": "ariadne_ci_coverage_percent{repo=\"ariadne\"}", + "refId": "A", + "legendFormat": "{{branch}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 11, + "type": "table", + "title": "Ariadne CI Tests (latest)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 18, + "x": 6, + "y": 22 + }, + "targets": [ + { + "expr": "ariadne_ci_tests_total{repo=\"ariadne\"}", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] } ], "time": { diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index 7746f16..09c29a4 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -480,6 +480,119 @@ data: } } ] + }, + { + "id": 10, + "type": "stat", + "title": "Ariadne CI Coverage (%)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 22 + }, + "targets": [ + { + "expr": "ariadne_ci_coverage_percent{repo=\"ariadne\"}", + "refId": "A", + "legendFormat": "{{branch}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 11, + "type": "table", + "title": "Ariadne CI Tests (latest)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 18, + "x": 6, + "y": 22 + }, + "targets": [ + { + "expr": "ariadne_ci_tests_total{repo=\"ariadne\"}", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] } ], "time": { diff --git a/services/vault/scripts/vault_k8s_auth_configure.sh b/services/vault/scripts/vault_k8s_auth_configure.sh index a5ccb61..c14c5ec 100644 --- a/services/vault/scripts/vault_k8s_auth_configure.sh +++ b/services/vault/scripts/vault_k8s_auth_configure.sh @@ -231,7 +231,7 @@ write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \ write_policy_and_role "health" "health" "health-vault-sync" \ "health/*" "" write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync" \ - "portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret shared/harbor-pull" "" + "maintenance/ariadne-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret shared/harbor-pull" "" write_policy_and_role "finance" "finance" "finance-vault" \ "finance/* shared/postmark-relay" "" write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \