From 70e1205f5f3cde9c2957b57c0ef34426333fd58f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 16:24:13 -0300 Subject: [PATCH 001/684] jenkins: pin oidc via jcasc --- services/jenkins/helmrelease.yaml | 70 +++++++++---------------------- 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 4cdede0..df75acc 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -156,54 +156,6 @@ spec: - name: jenkins-home mountPath: /var/jenkins_home initScripts: - oidc.groovy: | - import hudson.util.Secret - import jenkins.model.IdStrategy - import jenkins.model.Jenkins - import org.jenkinsci.plugins.oic.OicSecurityRealm - import org.jenkinsci.plugins.oic.OicServerWellKnownConfiguration - import hudson.security.FullControlOnceLoggedInAuthorizationStrategy - def env = System.getenv() - if (!(env['ENABLE_OIDC'] ?: 'false').toBoolean()) { - println("OIDC disabled (ENABLE_OIDC=false); keeping default security realm") - return - } - def required = ['OIDC_CLIENT_ID','OIDC_CLIENT_SECRET','OIDC_ISSUER'] - if (!required.every { env[it] }) { - throw new IllegalStateException("OIDC enabled but missing vars: ${required.findAll { !env[it] }}") - } - try { - def wellKnown = "${env['OIDC_ISSUER']}/.well-known/openid-configuration" - def serverCfg = new OicServerWellKnownConfiguration(wellKnown) - serverCfg.setScopesOverride('openid profile email') - def realm = new OicSecurityRealm( - env['OIDC_CLIENT_ID'], - Secret.fromString(env['OIDC_CLIENT_SECRET']), - serverCfg, - false, - IdStrategy.CASE_INSENSITIVE, - IdStrategy.CASE_INSENSITIVE - ) - realm.createProxyAwareResourceRetriver() - realm.setLogoutFromOpenidProvider(true) - realm.setPostLogoutRedirectUrl('https://ci.bstein.dev') - realm.setUserNameField('preferred_username') - realm.setFullNameFieldName('name') - realm.setEmailFieldName('email') - realm.setGroupsFieldName('groups') - realm.setRootURLFromRequest(true) - realm.setSendScopesInTokenRequest(true) - def j = Jenkins.get() - j.setSecurityRealm(realm) - def auth = new FullControlOnceLoggedInAuthorizationStrategy() - auth.setAllowAnonymousRead(false) - j.setAuthorizationStrategy(auth) - j.save() - println("Configured OIDC realm from init script (well-known)") - } catch (Exception e) { - println("Failed to configure OIDC realm: ${e}") - throw e - } theme.groovy: | import jenkins.model.Jenkins import org.codefirst.SimpleThemeDecorator @@ -223,8 +175,26 @@ spec: } JCasC: defaultConfig: false - securityRealm: "" - authorizationStrategy: "" + securityRealm: | + oic: + clientId: "${OIDC_CLIENT_ID}" + clientSecret: "${OIDC_CLIENT_SECRET}" + tokenServerUrl: "${OIDC_TOKEN_URL}" + authorizationServerUrl: "${OIDC_AUTH_URL}" + userInfoUrl: "${OIDC_USERINFO_URL}" + logoutFromOpenIdProvider: true + postLogoutRedirectUrl: "https://ci.bstein.dev" + scopes: "openid profile email" + rootURLFromRequest: true + userNameField: "preferred_username" + fullNameFieldName: "name" + emailFieldName: "email" + groupsFieldName: "groups" + escapeHatchEnabled: false + maxClockSkew: 120 + authorizationStrategy: | + loggedInUsersCanDoAnything: + allowAnonymousRead: false configScripts: base.yaml: | jenkins: -- 2.47.2 From af411e795c442b2b1a809655a311895894a6dd41 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 16:34:29 -0300 Subject: [PATCH 002/684] flux: track feature/sso-hardening --- clusters/atlas/flux-system/gotk-sync.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 473ab99..713e739 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -8,7 +8,7 @@ metadata: spec: interval: 1m0s ref: - branch: main + branch: feature/sso-hardening secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git -- 2.47.2 From f4fa44c842b051d120089538168cd96abd383a38 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 16:44:24 -0300 Subject: [PATCH 003/684] jenkins: fix oidc jcasc schema --- services/jenkins/helmrelease.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index df75acc..80f3604 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -179,19 +179,18 @@ spec: oic: clientId: "${OIDC_CLIENT_ID}" clientSecret: "${OIDC_CLIENT_SECRET}" - tokenServerUrl: "${OIDC_TOKEN_URL}" - authorizationServerUrl: "${OIDC_AUTH_URL}" - userInfoUrl: "${OIDC_USERINFO_URL}" + serverConfiguration: + wellKnownOpenIDConfigurationUrl: "${OIDC_ISSUER}/.well-known/openid-configuration" logoutFromOpenIdProvider: true postLogoutRedirectUrl: "https://ci.bstein.dev" scopes: "openid profile email" + sendScopesInTokenRequest: true rootURLFromRequest: true userNameField: "preferred_username" fullNameFieldName: "name" emailFieldName: "email" groupsFieldName: "groups" escapeHatchEnabled: false - maxClockSkew: 120 authorizationStrategy: | loggedInUsersCanDoAnything: allowAnonymousRead: false -- 2.47.2 From 1357d783de4e182c57d227fdae286717297f30aa Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 17:36:56 -0300 Subject: [PATCH 004/684] jenkins: fix oidc with wellknown config --- services/jenkins/helmrelease.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 80f3604..e0d8fbb 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -180,17 +180,17 @@ spec: clientId: "${OIDC_CLIENT_ID}" clientSecret: "${OIDC_CLIENT_SECRET}" serverConfiguration: - wellKnownOpenIDConfigurationUrl: "${OIDC_ISSUER}/.well-known/openid-configuration" - logoutFromOpenIdProvider: true + wellKnown: + wellKnownOpenIDConfigurationUrl: "${OIDC_ISSUER}/.well-known/openid-configuration" + scopesOverride: "openid profile email" + logoutFromOpenIdProvider: true postLogoutRedirectUrl: "https://ci.bstein.dev" - scopes: "openid profile email" sendScopesInTokenRequest: true rootURLFromRequest: true userNameField: "preferred_username" fullNameFieldName: "name" emailFieldName: "email" groupsFieldName: "groups" - escapeHatchEnabled: false authorizationStrategy: | loggedInUsersCanDoAnything: allowAnonymousRead: false -- 2.47.2 From c3ffde1b1fc25174f8ed6b3a45fed85c81c059b5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 17:59:55 -0300 Subject: [PATCH 005/684] jenkins: restore harbor robot creds --- services/jenkins/helmrelease.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index e0d8fbb..7e721d8 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -128,6 +128,16 @@ spec: secretKeyRef: name: jenkins-oidc key: logoutUrl + - name: HARBOR_ROBOT_USERNAME + valueFrom: + secretKeyRef: + name: harbor-robot-creds + key: username + - name: HARBOR_ROBOT_PASSWORD + valueFrom: + secretKeyRef: + name: harbor-robot-creds + key: password - name: GITEA_PAT_USERNAME valueFrom: secretKeyRef: @@ -275,6 +285,12 @@ spec: username: "${GITEA_PAT_USERNAME}" password: "${GITEA_PAT_TOKEN}" description: "Gitea PAT for pipelines" + - usernamePassword: + scope: GLOBAL + id: harbor-robot + username: "${HARBOR_ROBOT_USERNAME}" + password: "${HARBOR_ROBOT_PASSWORD}" + description: "Harbor robot for pipelines" jobs.yaml: | jobs: - script: | -- 2.47.2 From 0c5bce93ca3c767534e05afa3f8830836673eb07 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 18:03:52 -0300 Subject: [PATCH 006/684] jenkins: fix oidc indent and harbor creds --- services/jenkins/helmrelease.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 7e721d8..b2bff90 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -193,7 +193,7 @@ spec: wellKnown: wellKnownOpenIDConfigurationUrl: "${OIDC_ISSUER}/.well-known/openid-configuration" scopesOverride: "openid profile email" - logoutFromOpenIdProvider: true + logoutFromOpenIdProvider: true postLogoutRedirectUrl: "https://ci.bstein.dev" sendScopesInTokenRequest: true rootURLFromRequest: true -- 2.47.2 From fa44a00d0bd1ba99e37dbf6576abcc11cd1a57a6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 18:31:19 -0300 Subject: [PATCH 007/684] Flux image automation: track main branch for bstein-dev-home --- .../applications/bstein-dev-home/image-automation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index 6245fb0..e198db4 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -13,14 +13,14 @@ spec: git: checkout: ref: - branch: feature/bstein-dev-home + branch: main commit: author: email: ops@bstein.dev name: flux-bot messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" push: - branch: feature/bstein-dev-home + branch: main update: strategy: Setters path: services/bstein-dev-home -- 2.47.2 From b97b22fc01a54be85c62026327f26925ab435c15 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 18:31:48 -0300 Subject: [PATCH 008/684] jenkins: drop helm, run via raw manifests --- services/jenkins/configmap-init-scripts.yaml | 24 ++ services/jenkins/configmap-jcasc.yaml | 165 +++++++++ services/jenkins/configmap-plugins.yaml | 17 + services/jenkins/deployment.yaml | 195 ++++++++++ services/jenkins/helmrelease.yaml | 358 ------------------- services/jenkins/ingress.yaml | 26 ++ services/jenkins/kustomization.yaml | 8 +- services/jenkins/pvc.yaml | 13 + services/jenkins/service.yaml | 18 + 9 files changed, 465 insertions(+), 359 deletions(-) create mode 100644 services/jenkins/configmap-init-scripts.yaml create mode 100644 services/jenkins/configmap-jcasc.yaml create mode 100644 services/jenkins/configmap-plugins.yaml create mode 100644 services/jenkins/deployment.yaml delete mode 100644 services/jenkins/helmrelease.yaml create mode 100644 services/jenkins/ingress.yaml create mode 100644 services/jenkins/pvc.yaml create mode 100644 services/jenkins/service.yaml diff --git a/services/jenkins/configmap-init-scripts.yaml b/services/jenkins/configmap-init-scripts.yaml new file mode 100644 index 0000000..ed87720 --- /dev/null +++ b/services/jenkins/configmap-init-scripts.yaml @@ -0,0 +1,24 @@ +# services/jenkins/configmap-init-scripts.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jenkins-init-scripts + namespace: jenkins +data: + theme.groovy: | + import jenkins.model.Jenkins + import org.codefirst.SimpleThemeDecorator + + def instance = Jenkins.get() + def decorators = instance.getExtensionList(SimpleThemeDecorator.class) + + if (decorators?.size() > 0) { + def theme = decorators[0] + theme.setCssUrl("https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css") + theme.setJsUrl("") + theme.setTheme("") + instance.save() + println("Applied simple-theme-plugin dark theme") + } else { + println("simple-theme-plugin not installed; skipping theme configuration") + } diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml new file mode 100644 index 0000000..958e8a8 --- /dev/null +++ b/services/jenkins/configmap-jcasc.yaml @@ -0,0 +1,165 @@ +# services/jenkins/configmap-jcasc.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jenkins-jcasc + namespace: jenkins +data: + securityrealm.yaml: | + jenkins: + securityRealm: + oic: + clientId: "${OIDC_CLIENT_ID}" + clientSecret: "${OIDC_CLIENT_SECRET}" + serverConfiguration: + wellKnown: + wellKnownOpenIDConfigurationUrl: "${OIDC_ISSUER}/.well-known/openid-configuration" + scopesOverride: "openid profile email" + logoutFromOpenIdProvider: true + postLogoutRedirectUrl: "https://ci.bstein.dev" + sendScopesInTokenRequest: true + rootURLFromRequest: true + userNameField: "preferred_username" + fullNameFieldName: "name" + emailFieldName: "email" + groupsFieldName: "groups" + authorization.yaml: | + jenkins: + authorizationStrategy: + loggedInUsersCanDoAnything: + allowAnonymousRead: false + creds.yaml: | + credentials: + system: + domainCredentials: + - credentials: + - usernamePassword: + scope: GLOBAL + id: gitea-pat + username: "${GITEA_PAT_USERNAME}" + password: "${GITEA_PAT_TOKEN}" + description: "Gitea PAT for pipelines" + - usernamePassword: + scope: GLOBAL + id: harbor-robot + username: "${HARBOR_ROBOT_USERNAME}" + password: "${HARBOR_ROBOT_PASSWORD}" + description: "Harbor robot for pipelines" + jobs.yaml: | + jobs: + - script: | + pipelineJob('harbor-arm-build') { + triggers { + scm('H/5 * * * *') + } + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/harbor-arm-build.git') + credentials('gitea-pat') + } + branches('*/master') + } + } + } + } + } + pipelineJob('ci-demo') { + triggers { + scm('H/1 * * * *') + } + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/ci-demo.git') + credentials('gitea-pat') + } + branches('*/master') + } + } + scriptPath('Jenkinsfile') + } + } + } + pipelineJob('bstein-dev-home') { + triggers { + scm('H/2 * * * *') + } + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/bstein-dev-home.git') + credentials('gitea-pat') + } + branches('*/master') + } + } + scriptPath('Jenkinsfile') + } + } + } + base.yaml: | + jenkins: + disableRememberMe: false + mode: NORMAL + numExecutors: 0 + labelString: "" + projectNamingStrategy: "standard" + markupFormatter: + plainText + clouds: + - kubernetes: + containerCapStr: "10" + connectTimeout: "5" + readTimeout: "15" + jenkinsUrl: "http://jenkins.jenkins.svc.cluster.local:8080" + jenkinsTunnel: "jenkins-agent.jenkins.svc.cluster.local:50000" + skipTlsVerify: false + maxRequestsPerHostStr: "32" + retentionTimeout: "5" + waitForPodSec: "600" + name: "kubernetes" + namespace: "jenkins" + restrictedPssSecurityContext: false + serverUrl: "https://kubernetes.default" + credentialsId: "" + podLabels: + - key: "jenkins/jenkins-jenkins-agent" + value: "true" + templates: + - name: "default" + namespace: "jenkins" + containers: + - name: "jnlp" + args: "^${computer.jnlpmac} ^${computer.name}" + envVars: + - envVar: + key: "JENKINS_URL" + value: "http://jenkins.jenkins.svc.cluster.local:8080/" + image: "jenkins/inbound-agent:3355.v388858a_47b_33-3" + privileged: "false" + resourceLimitCpu: 512m + resourceLimitMemory: 512Mi + resourceRequestCpu: 512m + resourceRequestMemory: 512Mi + ttyEnabled: false + workingDir: /home/jenkins/agent + idleMinutes: 0 + instanceCap: 2147483647 + label: "jenkins-jenkins-agent " + nodeUsageMode: "NORMAL" + podRetention: Never + serviceAccount: "default" + slaveConnectTimeoutStr: "100" + yamlMergeStrategy: override + inheritYamlMergeStrategy: false + slaveAgentPort: 50000 + crumbIssuer: + standard: + excludeClientIPFromCrumb: true diff --git a/services/jenkins/configmap-plugins.yaml b/services/jenkins/configmap-plugins.yaml new file mode 100644 index 0000000..eabea13 --- /dev/null +++ b/services/jenkins/configmap-plugins.yaml @@ -0,0 +1,17 @@ +# services/jenkins/configmap-plugins.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jenkins-plugins + namespace: jenkins +data: + plugins.txt: | + kubernetes + workflow-aggregator + git + pipeline-utility-steps + configuration-as-code + configuration-as-code-support + oic-auth + job-dsl + simple-theme-plugin diff --git a/services/jenkins/deployment.yaml b/services/jenkins/deployment.yaml new file mode 100644 index 0000000..d9cf1ea --- /dev/null +++ b/services/jenkins/deployment.yaml @@ -0,0 +1,195 @@ +# services/jenkins/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jenkins + namespace: jenkins + labels: + app: jenkins +spec: + replicas: 1 + selector: + matchLabels: + app: jenkins + strategy: + type: Recreate + template: + metadata: + labels: + app: jenkins + spec: + serviceAccountName: default + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + hostAliases: + - ip: 38.28.125.112 + hostnames: + - sso.bstein.dev + securityContext: + fsGroup: 1000 + initContainers: + - name: install-plugins + image: jenkins/jenkins:2.528.3-jdk21 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + set -euo pipefail + jenkins-plugin-cli --plugin-file /plugins/plugins.txt + volumeMounts: + - name: plugins + mountPath: /plugins/plugins.txt + subPath: plugins.txt + - name: plugin-dir + mountPath: /usr/share/jenkins/ref/plugins + containers: + - name: jenkins + image: jenkins/jenkins:2.528.3-jdk21 + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 8080 + - name: agent-listener + containerPort: 50000 + env: + - name: JAVA_OPTS + value: "-Xms512m -Xmx2048m" + - name: JENKINS_OPTS + value: "--webroot=/var/jenkins_cache/war" + - name: JENKINS_SLAVE_AGENT_PORT + value: "50000" + - name: CASC_JENKINS_CONFIG + value: /config/jcasc + - name: ENABLE_OIDC + value: "true" + - name: OIDC_ISSUER + value: "https://sso.bstein.dev/realms/atlas" + - name: OIDC_CLIENT_ID + valueFrom: + secretKeyRef: + name: jenkins-oidc + key: clientId + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: jenkins-oidc + key: clientSecret + - name: OIDC_AUTH_URL + valueFrom: + secretKeyRef: + name: jenkins-oidc + key: authorizationUrl + - name: OIDC_TOKEN_URL + valueFrom: + secretKeyRef: + name: jenkins-oidc + key: tokenUrl + - name: OIDC_USERINFO_URL + valueFrom: + secretKeyRef: + name: jenkins-oidc + key: userInfoUrl + - name: OIDC_LOGOUT_URL + valueFrom: + secretKeyRef: + name: jenkins-oidc + key: logoutUrl + - name: HARBOR_ROBOT_USERNAME + valueFrom: + secretKeyRef: + name: harbor-robot-creds + key: username + - name: HARBOR_ROBOT_PASSWORD + valueFrom: + secretKeyRef: + name: harbor-robot-creds + key: password + - name: GITEA_PAT_USERNAME + valueFrom: + secretKeyRef: + name: gitea-pat + key: username + - name: GITEA_PAT_TOKEN + valueFrom: + secretKeyRef: + name: gitea-pat + key: token + resources: + requests: + cpu: 750m + memory: 1536Mi + limits: + cpu: 1500m + memory: 3Gi + livenessProbe: + httpGet: + path: /login + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /login + port: http + initialDelaySeconds: 20 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + startupProbe: + httpGet: + path: /login + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + failureThreshold: 20 + volumeMounts: + - name: jenkins-home + mountPath: /var/jenkins_home + - name: jenkins-cache + mountPath: /var/jenkins_cache + - name: jcasc + mountPath: /config/jcasc + - name: init-scripts + mountPath: /usr/share/jenkins/ref/init.groovy.d + - name: plugin-dir + mountPath: /usr/share/jenkins/ref/plugins + - name: tmp + mountPath: /tmp + volumes: + - name: jenkins-home + persistentVolumeClaim: + claimName: jenkins + - name: jenkins-cache + emptyDir: {} + - name: plugin-dir + emptyDir: {} + - name: plugins + configMap: + name: jenkins-plugins + - name: jcasc + configMap: + name: jenkins-jcasc + - name: init-scripts + configMap: + name: jenkins-init-scripts + - name: tmp + emptyDir: {} diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml deleted file mode 100644 index b2bff90..0000000 --- a/services/jenkins/helmrelease.yaml +++ /dev/null @@ -1,358 +0,0 @@ -# services/jenkins/helmrelease.yaml -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: jenkins - namespace: jenkins -spec: - interval: 30m - chart: - spec: - chart: jenkins - version: 5.8.114 - sourceRef: - kind: HelmRepository - name: jenkins - namespace: flux-system - install: - timeout: 15m - remediation: - retries: 3 - upgrade: - timeout: 15m - remediation: - retries: 3 - remediateLastFailure: true - cleanupOnFail: true - rollback: - timeout: 15m - values: - controller: - nodeSelector: - kubernetes.io/arch: arm64 - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: ["arm64"] - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 90 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5"] - - weight: 50 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi4"] - resources: - requests: - cpu: 750m - memory: 1.5Gi - limits: - cpu: 1500m - memory: 3Gi - javaOpts: "-Xms512m -Xmx2048m" - startupProbe: - httpGet: - path: /login - port: http - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 20 - jenkinsUrl: https://ci.bstein.dev - ingress: - enabled: true - hostName: ci.bstein.dev - ingressClassName: traefik - annotations: - cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.entrypoints: websecure - tls: - - secretName: jenkins-tls - hosts: - - ci.bstein.dev - hostAliases: - - ip: 38.28.125.112 - hostnames: - - sso.bstein.dev - installPlugins: - - kubernetes - - workflow-aggregator - - git - - pipeline-utility-steps - - configuration-as-code - - oic-auth - - job-dsl - - configuration-as-code-support - - simple-theme-plugin - containerEnv: - - name: ENABLE_OIDC - value: "true" - - name: OIDC_ISSUER - value: "https://sso.bstein.dev/realms/atlas" - - name: OIDC_CLIENT_ID - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: clientId - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: clientSecret - - name: OIDC_AUTH_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: authorizationUrl - - name: OIDC_TOKEN_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: tokenUrl - - name: OIDC_USERINFO_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: userInfoUrl - - name: OIDC_LOGOUT_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: logoutUrl - - name: HARBOR_ROBOT_USERNAME - valueFrom: - secretKeyRef: - name: harbor-robot-creds - key: username - - name: HARBOR_ROBOT_PASSWORD - valueFrom: - secretKeyRef: - name: harbor-robot-creds - key: password - - name: GITEA_PAT_USERNAME - valueFrom: - secretKeyRef: - name: gitea-pat - key: username - - name: GITEA_PAT_TOKEN - valueFrom: - secretKeyRef: - name: gitea-pat - key: token - customInitContainers: - - name: clean-jcasc-stale - image: alpine:3.20 - imagePullPolicy: IfNotPresent - command: - - sh - - -c - - | - set -euo pipefail - rm -f /var/jenkins_home/casc_configs/* || true - securityContext: - runAsNonRoot: true - runAsUser: 1000 - runAsGroup: 1000 - volumeMounts: - - name: jenkins-home - mountPath: /var/jenkins_home - initScripts: - theme.groovy: | - import jenkins.model.Jenkins - import org.codefirst.SimpleThemeDecorator - - def instance = Jenkins.get() - def decorators = instance.getExtensionList(SimpleThemeDecorator.class) - - if (decorators?.size() > 0) { - def theme = decorators[0] - theme.setCssUrl("https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css") - theme.setJsUrl("") - theme.setTheme("") - instance.save() - println("Applied simple-theme-plugin dark theme") - } else { - println("simple-theme-plugin not installed; skipping theme configuration") - } - JCasC: - defaultConfig: false - securityRealm: | - oic: - clientId: "${OIDC_CLIENT_ID}" - clientSecret: "${OIDC_CLIENT_SECRET}" - serverConfiguration: - wellKnown: - wellKnownOpenIDConfigurationUrl: "${OIDC_ISSUER}/.well-known/openid-configuration" - scopesOverride: "openid profile email" - logoutFromOpenIdProvider: true - postLogoutRedirectUrl: "https://ci.bstein.dev" - sendScopesInTokenRequest: true - rootURLFromRequest: true - userNameField: "preferred_username" - fullNameFieldName: "name" - emailFieldName: "email" - groupsFieldName: "groups" - authorizationStrategy: | - loggedInUsersCanDoAnything: - allowAnonymousRead: false - configScripts: - base.yaml: | - jenkins: - disableRememberMe: false - mode: NORMAL - numExecutors: 0 - labelString: "" - projectNamingStrategy: "standard" - markupFormatter: - plainText - clouds: - - kubernetes: - containerCapStr: "10" - defaultsProviderTemplate: "" - connectTimeout: "5" - readTimeout: "15" - jenkinsUrl: "http://jenkins.jenkins.svc.cluster.local:8080" - jenkinsTunnel: "jenkins-agent.jenkins.svc.cluster.local:50000" - skipTlsVerify: false - usageRestricted: false - maxRequestsPerHostStr: "32" - retentionTimeout: "5" - waitForPodSec: "600" - name: "kubernetes" - namespace: "jenkins" - restrictedPssSecurityContext: false - serverUrl: "https://kubernetes.default" - credentialsId: "" - podLabels: - - key: "jenkins/jenkins-jenkins-agent" - value: "true" - templates: - - name: "default" - namespace: "jenkins" - id: a23c9bbcd21e360a77d51b426f05bd7b8032d8fdedd6ffb97c436883ce6c5ffa - containers: - - name: "jnlp" - alwaysPullImage: false - args: "^${computer.jnlpmac} ^${computer.name}" - envVars: - - envVar: - key: "JENKINS_URL" - value: "http://jenkins.jenkins.svc.cluster.local:8080/" - image: "jenkins/inbound-agent:3355.v388858a_47b_33-3" - privileged: "false" - resourceLimitCpu: 512m - resourceLimitMemory: 512Mi - resourceRequestCpu: 512m - resourceRequestMemory: 512Mi - ttyEnabled: false - workingDir: /home/jenkins/agent - idleMinutes: 0 - instanceCap: 2147483647 - label: "jenkins-jenkins-agent " - nodeUsageMode: "NORMAL" - podRetention: Never - showRawYaml: true - serviceAccount: "default" - slaveConnectTimeoutStr: "100" - yamlMergeStrategy: override - inheritYamlMergeStrategy: false - slaveAgentPort: 50000 - crumbIssuer: - standard: - excludeClientIPFromCrumb: true - security: - apiToken: - creationOfLegacyTokenEnabled: false - tokenGenerationOnCreationEnabled: false - usageStatisticsEnabled: true - creds.yaml: | - credentials: - system: - domainCredentials: - - credentials: - - usernamePassword: - scope: GLOBAL - id: gitea-pat - username: "${GITEA_PAT_USERNAME}" - password: "${GITEA_PAT_TOKEN}" - description: "Gitea PAT for pipelines" - - usernamePassword: - scope: GLOBAL - id: harbor-robot - username: "${HARBOR_ROBOT_USERNAME}" - password: "${HARBOR_ROBOT_PASSWORD}" - description: "Harbor robot for pipelines" - jobs.yaml: | - jobs: - - script: | - pipelineJob('harbor-arm-build') { - triggers { - scm('H/5 * * * *') - } - definition { - cpsScm { - scm { - git { - remote { - url('https://scm.bstein.dev/bstein/harbor-arm-build.git') - credentials('gitea-pat') - } - branches('*/master') - } - } - } - } - } - pipelineJob('ci-demo') { - triggers { - scm('H/1 * * * *') - } - definition { - cpsScm { - scm { - git { - remote { - url('https://scm.bstein.dev/bstein/ci-demo.git') - credentials('gitea-pat') - } - branches('*/master') - } - } - scriptPath('Jenkinsfile') - } - } - } - pipelineJob('bstein-dev-home') { - triggers { - scm('H/2 * * * *') - } - definition { - cpsScm { - scm { - git { - remote { - url('https://scm.bstein.dev/bstein/bstein-dev-home.git') - credentials('gitea-pat') - } - branches('*/master') - } - } - scriptPath('Jenkinsfile') - } - } - } - persistence: - enabled: true - storageClass: astreae - size: 50Gi - serviceAccount: - create: true diff --git a/services/jenkins/ingress.yaml b/services/jenkins/ingress.yaml new file mode 100644 index 0000000..e702c8c --- /dev/null +++ b/services/jenkins/ingress.yaml @@ -0,0 +1,26 @@ +# services/jenkins/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: jenkins + namespace: jenkins + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure +spec: + ingressClassName: traefik + tls: + - secretName: jenkins-tls + hosts: + - ci.bstein.dev + rules: + - host: ci.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: jenkins + port: + name: http diff --git a/services/jenkins/kustomization.yaml b/services/jenkins/kustomization.yaml index b20b1d3..a0bd200 100644 --- a/services/jenkins/kustomization.yaml +++ b/services/jenkins/kustomization.yaml @@ -4,4 +4,10 @@ kind: Kustomization namespace: jenkins resources: - namespace.yaml - - helmrelease.yaml + - pvc.yaml + - configmap-jcasc.yaml + - configmap-init-scripts.yaml + - configmap-plugins.yaml + - deployment.yaml + - service.yaml + - ingress.yaml diff --git a/services/jenkins/pvc.yaml b/services/jenkins/pvc.yaml new file mode 100644 index 0000000..049df32 --- /dev/null +++ b/services/jenkins/pvc.yaml @@ -0,0 +1,13 @@ +# services/jenkins/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: jenkins + namespace: jenkins +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: astreae diff --git a/services/jenkins/service.yaml b/services/jenkins/service.yaml new file mode 100644 index 0000000..5fba878 --- /dev/null +++ b/services/jenkins/service.yaml @@ -0,0 +1,18 @@ +# services/jenkins/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: jenkins + namespace: jenkins + labels: + app: jenkins +spec: + ports: + - name: http + port: 8080 + targetPort: 8080 + - name: agent-listener + port: 50000 + targetPort: 50000 + selector: + app: jenkins -- 2.47.2 From f092f00bff99c5f831f92907d1aefce4532d11bf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 18:37:23 -0300 Subject: [PATCH 009/684] jenkins: bind pvc to retained volume --- services/jenkins/pvc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/jenkins/pvc.yaml b/services/jenkins/pvc.yaml index 049df32..be4a55b 100644 --- a/services/jenkins/pvc.yaml +++ b/services/jenkins/pvc.yaml @@ -11,3 +11,4 @@ spec: requests: storage: 50Gi storageClassName: astreae + volumeName: pvc-3e12e869-5b33-4035-93d2-411ebc02ff31 -- 2.47.2 From f9c4967eed09aef4b4927d50e4ee5735c9d03c40 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 18:39:32 -0300 Subject: [PATCH 010/684] jenkins: switch healthcheck to deployment/service --- .../flux-system/applications/jenkins/kustomization.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clusters/atlas/flux-system/applications/jenkins/kustomization.yaml b/clusters/atlas/flux-system/applications/jenkins/kustomization.yaml index 98a7211..37d3d23 100644 --- a/clusters/atlas/flux-system/applications/jenkins/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/jenkins/kustomization.yaml @@ -16,8 +16,12 @@ spec: - name: helm - name: traefik healthChecks: - - apiVersion: helm.toolkit.fluxcd.io/v2 - kind: HelmRelease + - apiVersion: apps/v1 + kind: Deployment + name: jenkins + namespace: jenkins + - apiVersion: v1 + kind: Service name: jenkins namespace: jenkins wait: false -- 2.47.2 From ba47e00c88d13d68cf765396a526855afbf3a2b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 19:03:28 -0300 Subject: [PATCH 011/684] Point bstein-dev-home to latest images (0.1.1-0) --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 3268f6d..b13d2b9 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: 0.1.0-11 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: 0.1.0-11 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 6405cd823d8d45ceaf5e92e9d127baf81d16d355 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 19:04:23 -0300 Subject: [PATCH 012/684] Point bstein-dev-home to latest tags --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index b13d2b9..dd4d6e7 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 7533cec0eefc1532f042847ae1740230dc73627f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 19:30:09 -0300 Subject: [PATCH 013/684] vault: drop helm, add raw statefulset --- services/vault/configmap.yaml | 24 +++++++++ services/vault/helmrelease.yaml | 68 ------------------------ services/vault/ingress.yaml | 4 +- services/vault/kustomization.yaml | 6 ++- services/vault/service.yaml | 37 +++++++++++++ services/vault/statefulset.yaml | 87 +++++++++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 72 deletions(-) create mode 100644 services/vault/configmap.yaml delete mode 100644 services/vault/helmrelease.yaml create mode 100644 services/vault/service.yaml create mode 100644 services/vault/statefulset.yaml diff --git a/services/vault/configmap.yaml b/services/vault/configmap.yaml new file mode 100644 index 0000000..6f36043 --- /dev/null +++ b/services/vault/configmap.yaml @@ -0,0 +1,24 @@ +# services/vault/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: vault-config + namespace: vault +data: + local.hcl: | + ui = true + cluster_name = "vault-k8s" + + listener "tcp" { + address = "0.0.0.0:8200" + cluster_address = "0.0.0.0:8201" + tls_cert_file = "/vault/userconfig/tls/tls.crt" + tls_key_file = "/vault/userconfig/tls/tls.key" + } + + storage "raft" { + path = "/vault/data" + } + + api_addr = "https://secret.bstein.dev" + cluster_addr = "https://vault-0.vault-internal:8201" diff --git a/services/vault/helmrelease.yaml b/services/vault/helmrelease.yaml deleted file mode 100644 index 604d31c..0000000 --- a/services/vault/helmrelease.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# services/vault/helmrelease.yaml -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: vault - namespace: vault -spec: - interval: 30m - chart: - spec: - chart: vault - version: 0.x.x - sourceRef: - kind: HelmRepository - name: hashicorp - namespace: flux-system - install: - remediation: { retries: 3 } - upgrade: - remediation: { retries: 3 } - values: - injector: - enabled: true - resources: - requests: { cpu: "50m", memory: "64Mi" } - csi: - enabled: false - server: - ha: - enabled: true - replicas: 1 - raft: - enabled: true - extraEnvironmentVars: - VAULT_API_ADDR: "https://secret.bstein.dev" - VAULT_REDIRECT_ADDR: "https://secret.bstein.dev" - dataStorage: - enabled: true - size: 10Gi - storageClass: astreae - resources: - requests: { cpu: "100m", memory: "256Mi" } - service: - type: ClusterIP - extraVolumes: - - type: secret - name: vault-server-tls - path: /vault/userconfig/tls - extraVolumeMounts: - - name: vault-server-tls - mountPath: /vault/userconfig/tls - readOnly: true - config: | - ui = true - cluster_name = "vault-k8s" - listener "tcp" { - address = "0.0.0.0:8200" - cluster_address = "0.0.0.0:8201" - tls_cert_file = "/vault/userconfig/tls/tls.crt" - tls_key_file = "/vault/userconfig/tls/tls.key" - } - storage "raft" { - path = "/vault/data" - } - api_addr = "https://secret.bstein.dev" - cluster_addr = "https://vault-0.vault-internal:8201" - ui: - enabled: true diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 91d9ca4..bb8d336 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -1,4 +1,4 @@ -# services/vault/helmrelease.yaml +# services/vault/ingress.yaml apiVersion: networking.k8s.io/v1 kind: Ingress metadata: @@ -22,6 +22,6 @@ spec: pathType: Prefix backend: service: - name: vault-ui + name: vault port: number: 8200 diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 1d7af87..9fdb061 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -4,7 +4,9 @@ kind: Kustomization namespace: vault resources: - namespace.yaml - - helmrelease.yaml - - certificate.yaml + - configmap.yaml + - statefulset.yaml + - service.yaml - ingress.yaml + - certificate.yaml - serverstransport.yaml diff --git a/services/vault/service.yaml b/services/vault/service.yaml new file mode 100644 index 0000000..0c1c451 --- /dev/null +++ b/services/vault/service.yaml @@ -0,0 +1,37 @@ +# services/vault/service.yaml +--- +apiVersion: v1 +kind: Service +metadata: + name: vault + namespace: vault +spec: + ports: + - name: api + port: 8200 + targetPort: 8200 + - name: cluster + port: 8201 + targetPort: 8201 + selector: + app: vault + +--- +apiVersion: v1 +kind: Service +metadata: + name: vault-internal + namespace: vault + labels: + app: vault +spec: + clusterIP: None + ports: + - name: api + port: 8200 + targetPort: 8200 + - name: cluster + port: 8201 + targetPort: 8201 + selector: + app: vault diff --git a/services/vault/statefulset.yaml b/services/vault/statefulset.yaml new file mode 100644 index 0000000..fbbc028 --- /dev/null +++ b/services/vault/statefulset.yaml @@ -0,0 +1,87 @@ +# services/vault/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vault + namespace: vault + labels: + app: vault +spec: + serviceName: vault-internal + replicas: 1 + selector: + matchLabels: + app: vault + template: + metadata: + labels: + app: vault + spec: + securityContext: + fsGroup: 1000 + containers: + - name: vault + image: hashicorp/vault:1.17.6 + imagePullPolicy: IfNotPresent + args: ["server", "-config=/vault/config/local.hcl"] + ports: + - name: api + containerPort: 8200 + - name: cluster + containerPort: 8201 + env: + - name: VAULT_API_ADDR + value: "https://secret.bstein.dev" + - name: VAULT_CLUSTER_ADDR + value: "https://vault-0.vault-internal:8201" + - name: VAULT_REDIRECT_ADDR + value: "https://secret.bstein.dev" + - name: VAULT_LOG_LEVEL + value: "info" + readinessProbe: + exec: + command: ["vault", "status", "-tls-skip-verify"] + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + livenessProbe: + exec: + command: ["vault", "status", "-tls-skip-verify"] + initialDelaySeconds: 60 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 6 + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + capabilities: + add: ["IPC_LOCK"] + drop: ["ALL"] + volumeMounts: + - name: config + mountPath: /vault/config + - name: data + mountPath: /vault/data + - name: tls + mountPath: /vault/userconfig/tls + readOnly: true + volumes: + - name: config + configMap: + name: vault-config + - name: tls + secret: + secretName: vault-server-tls + optional: false + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + storageClassName: astreae -- 2.47.2 From fad7204dfbab41fc5dd6d8b7b0ec553345678208 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 19:58:06 -0300 Subject: [PATCH 014/684] mailu: switch relay to postmark --- services/mailu/helmrelease.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index c8b0975..a40f047 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -33,8 +33,8 @@ spec: dkim: enabled: true externalRelay: - host: "[email-smtp.us-east-2.amazonaws.com]:587" - existingSecret: mailu-ses-relay + host: "[smtp.postmarkapp.com]:587" + existingSecret: mailu-postmark-relay usernameKey: relay-username passwordKey: relay-password timezone: Etc/UTC -- 2.47.2 From 524868b05d79de2f9f58295ac0adb275c88a39ff Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 20:32:10 -0300 Subject: [PATCH 015/684] vault: fix manifest and disable mlock --- services/vault/configmap.yaml | 1 + services/vault/statefulset.yaml | 35 +++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/services/vault/configmap.yaml b/services/vault/configmap.yaml index 6f36043..6fc1d6b 100644 --- a/services/vault/configmap.yaml +++ b/services/vault/configmap.yaml @@ -8,6 +8,7 @@ data: local.hcl: | ui = true cluster_name = "vault-k8s" + disable_mlock = true listener "tcp" { address = "0.0.0.0:8200" diff --git a/services/vault/statefulset.yaml b/services/vault/statefulset.yaml index fbbc028..94dd383 100644 --- a/services/vault/statefulset.yaml +++ b/services/vault/statefulset.yaml @@ -19,10 +19,31 @@ spec: spec: securityContext: fsGroup: 1000 + initContainers: + - name: setup-config + image: alpine:3.20 + command: + - sh + - -c + - | + set -euo pipefail + cp /config-src/local.hcl /vault/config/local.hcl + chown 1000:1000 /vault/config/local.hcl + chmod 640 /vault/config/local.hcl + securityContext: + runAsUser: 0 + runAsGroup: 0 + allowPrivilegeEscalation: false + volumeMounts: + - name: config-template + mountPath: /config-src + - name: config + mountPath: /vault/config containers: - name: vault image: hashicorp/vault:1.17.6 imagePullPolicy: IfNotPresent + command: ["vault"] args: ["server", "-config=/vault/config/local.hcl"] ports: - name: api @@ -38,6 +59,14 @@ spec: value: "https://secret.bstein.dev" - name: VAULT_LOG_LEVEL value: "info" + - name: VAULT_DISABLE_MLOCK + value: "true" + - name: VAULT_DISABLE_PERM_MGMT + value: "true" + - name: SKIP_CHOWN + value: "true" + - name: SKIP_SETCAP + value: "true" readinessProbe: exec: command: ["vault", "status", "-tls-skip-verify"] @@ -47,7 +76,7 @@ spec: failureThreshold: 6 livenessProbe: exec: - command: ["vault", "status", "-tls-skip-verify"] + command: ["sh", "-c", "vault status -tls-skip-verify >/dev/null 2>&1 || true"] initialDelaySeconds: 60 periodSeconds: 20 timeoutSeconds: 5 @@ -69,9 +98,11 @@ spec: mountPath: /vault/userconfig/tls readOnly: true volumes: - - name: config + - name: config-template configMap: name: vault-config + - name: config + emptyDir: {} - name: tls secret: secretName: vault-server-tls -- 2.47.2 From 3db523335da6b82ff46c9597281452dc133ee627 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 20:58:29 -0300 Subject: [PATCH 016/684] vault: fix traefik serversTransport name --- services/vault/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index bb8d336..c8ed24c 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -8,7 +8,7 @@ metadata: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/service.serversscheme: https - traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd + traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From 0071f13063781c672d7a00b7ff869d39e239eb1a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 21:02:49 -0300 Subject: [PATCH 017/684] vault: pin to worker arm64 nodes --- services/vault/statefulset.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/vault/statefulset.yaml b/services/vault/statefulset.yaml index 94dd383..e9a1c21 100644 --- a/services/vault/statefulset.yaml +++ b/services/vault/statefulset.yaml @@ -17,6 +17,9 @@ spec: labels: app: vault spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + kubernetes.io/arch: arm64 securityContext: fsGroup: 1000 initContainers: -- 2.47.2 From 303e7e770f78583e287f1d3a56ff257a240b6bdb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 21:08:10 -0300 Subject: [PATCH 018/684] vault: traefik serversTransport must include namespace --- services/vault/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index c8ed24c..bb8d336 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -8,7 +8,7 @@ metadata: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/service.serversscheme: https - traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd + traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From b9144ebb5e0a9de1361d854599b43f7cc35ac199 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 21:13:31 -0300 Subject: [PATCH 019/684] jellyfin: bootstrap oidc plugin --- services/jellyfin/deployment.yaml | 88 +++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index fec0c78..53f79bc 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -28,6 +28,94 @@ spec: fsGroup: 65532 fsGroupChangePolicy: OnRootMismatch runAsGroup: 65532 + initContainers: + - name: install-oidc-plugin + image: alpine:3.20 + securityContext: + runAsUser: 0 + env: + - name: OIDC_PLUGIN_VERSION + value: "1.0.2.0" + - name: OIDC_PLUGIN_URL + value: "https://raw.githubusercontent.com/lolerskatez/JellyfinOIDCPlugin/master/OIDC_Authentication_1.0.2.0.zip" + - name: OIDC_ISSUER + value: "https://sso.bstein.dev/realms/atlas" + - name: OIDC_REDIRECT_URI + value: "https://stream.bstein.dev/oauth2/callback" + - name: OIDC_LOGOUT_URI + value: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://stream.bstein.dev/" + - name: OIDC_SCOPES + value: "openid,profile,email,groups" + - name: OIDC_ROLE_CLAIM + value: "groups" + - name: OIDC_CLIENT_ID + valueFrom: + secretKeyRef: + name: jellyfin-oidc + key: client-id + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: jellyfin-oidc + key: client-secret + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + if [ -z "${OIDC_CLIENT_ID:-}" ] || [ -z "${OIDC_CLIENT_SECRET:-}" ]; then + echo "OIDC_CLIENT_ID or OIDC_CLIENT_SECRET missing; create secret jellyfin-oidc" >&2 + exit 1 + fi + apk add --no-cache wget unzip + plugin_dir="/config/plugins/OIDC Authentication_${OIDC_PLUGIN_VERSION}" + config_dir="/config/plugins/configurations" + tmp_zip="$(mktemp)" + echo "Downloading OIDC plugin ${OIDC_PLUGIN_VERSION} from ${OIDC_PLUGIN_URL}" + wget -O "${tmp_zip}" "${OIDC_PLUGIN_URL}" + rm -rf "${plugin_dir}" + mkdir -p "${plugin_dir}" "${config_dir}" + unzip -o "${tmp_zip}" -d "${plugin_dir}" + rm -f "${tmp_zip}" + cat >"${plugin_dir}/meta.json" <<'EOF' + { + "category": "Authentication", + "changelog": "OIDC SSO authentication plugin; auto user creation and role mapping", + "description": "OpenID Connect (OIDC) authentication provider for Jellyfin with SSO support.", + "guid": "a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6", + "name": "OIDC Authentication", + "overview": "Enable Single Sign-On (SSO) for Jellyfin using an OpenID Connect provider.", + "owner": "lolerskatez", + "targetAbi": "10.10.7.0", + "timestamp": "2025-12-17T04:00:00Z", + "version": "1.0.2.0", + "status": "Active", + "autoUpdate": false, + "imagePath": "", + "assemblies": [] + } + EOF + IFS=',' read -ra scopes <<< "${OIDC_SCOPES}" + { + echo '' + echo '' + echo " ${OIDC_ISSUER}" + echo " ${OIDC_CLIENT_ID}" + echo " ${OIDC_CLIENT_SECRET}" + echo ' ' + for s in "${scopes[@]}"; do + trimmed="$(echo "${s}" | xargs)" + [ -z "${trimmed}" ] && continue + echo " ${trimmed}" + done + echo ' ' + echo " ${OIDC_ROLE_CLAIM}" + echo " ${OIDC_REDIRECT_URI}" + echo " ${OIDC_LOGOUT_URI}" + echo ' ' + echo ' true' + echo ' false' + echo '' + } >"${config_dir}/OIDC Authentication.xml" runtimeClassName: nvidia containers: - name: jellyfin -- 2.47.2 From ed868a5faa0199e7772efc3fd463021def163dde Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 21:19:21 -0300 Subject: [PATCH 020/684] jellyfin: fix oidc installer script --- services/jellyfin/deployment.yaml | 44 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index 53f79bc..961db93 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -94,28 +94,28 @@ spec: "assemblies": [] } EOF - IFS=',' read -ra scopes <<< "${OIDC_SCOPES}" - { - echo '' - echo '' - echo " ${OIDC_ISSUER}" - echo " ${OIDC_CLIENT_ID}" - echo " ${OIDC_CLIENT_SECRET}" - echo ' ' - for s in "${scopes[@]}"; do - trimmed="$(echo "${s}" | xargs)" - [ -z "${trimmed}" ] && continue - echo " ${trimmed}" - done - echo ' ' - echo " ${OIDC_ROLE_CLAIM}" - echo " ${OIDC_REDIRECT_URI}" - echo " ${OIDC_LOGOUT_URI}" - echo ' ' - echo ' true' - echo ' false' - echo '' - } >"${config_dir}/OIDC Authentication.xml" + scope_lines="" + for s in $(echo "${OIDC_SCOPES}" | tr ',' ' '); do + trimmed="$(echo "${s}" | xargs)" + [ -z "${trimmed}" ] && continue + scope_lines="${scope_lines} ${trimmed}\n" + done + cat >"${config_dir}/OIDC Authentication.xml" < + + ${OIDC_ISSUER} + ${OIDC_CLIENT_ID} + ${OIDC_CLIENT_SECRET} + + $(printf "%b" "${scope_lines}") + ${OIDC_ROLE_CLAIM} + ${OIDC_REDIRECT_URI} + ${OIDC_LOGOUT_URI} + + true + false + + EOF runtimeClassName: nvidia containers: - name: jellyfin -- 2.47.2 From 1b65987dfe89adfe5215dd3c48971ede951d35d0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 21:30:04 -0300 Subject: [PATCH 021/684] jellyfin: upgrade to 10.11 and seed oidc plugin --- services/jellyfin/deployment.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index 961db93..d58fd7b 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -58,6 +58,9 @@ spec: secretKeyRef: name: jellyfin-oidc key: client-secret + volumeMounts: + - name: config + mountPath: /config command: ["/bin/sh", "-c"] args: - | @@ -85,7 +88,7 @@ spec: "name": "OIDC Authentication", "overview": "Enable Single Sign-On (SSO) for Jellyfin using an OpenID Connect provider.", "owner": "lolerskatez", - "targetAbi": "10.10.7.0", + "targetAbi": "10.11.3.0", "timestamp": "2025-12-17T04:00:00Z", "version": "1.0.2.0", "status": "Active", @@ -116,10 +119,11 @@ spec: false EOF + chown -R 1000:65532 "${plugin_dir}" "${config_dir}/OIDC Authentication.xml" runtimeClassName: nvidia containers: - name: jellyfin - image: docker.io/jellyfin/jellyfin:10.10.7 + image: docker.io/jellyfin/jellyfin:10.11.5 imagePullPolicy: IfNotPresent ports: - name: http -- 2.47.2 From eb0db2ce81d99126692687bb69a93b8e83569de7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 21:32:40 -0300 Subject: [PATCH 022/684] jellyfin: clean old ldap plugin before oidc init --- services/jellyfin/deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index d58fd7b..afd5a74 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -69,6 +69,7 @@ spec: echo "OIDC_CLIENT_ID or OIDC_CLIENT_SECRET missing; create secret jellyfin-oidc" >&2 exit 1 fi + rm -rf "/config/plugins/LDAP Authentication_20.0.0.0" apk add --no-cache wget unzip plugin_dir="/config/plugins/OIDC Authentication_${OIDC_PLUGIN_VERSION}" config_dir="/config/plugins/configurations" -- 2.47.2 From 65f8b7c893fc48b6c4bc990b96407ea61dd201fe Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:16:20 -0300 Subject: [PATCH 023/684] vault: correct serversTransport reference --- services/vault/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index bb8d336..c8ed24c 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -8,7 +8,7 @@ metadata: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/service.serversscheme: https - traefik.ingress.kubernetes.io/service.serversTransport: vault-vault-to-https@kubernetescrd + traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From af3d453e86597916df1f37c870eaf0784c3c5ecb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:48:40 -0300 Subject: [PATCH 024/684] vault: let traefik speak http to service --- services/vault/ingress.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index c8ed24c..021f700 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,6 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/service.serversscheme: https traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik -- 2.47.2 From 75b62e5ae2008380c4b4e1a6ba3729b736db73ed Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:49:34 -0300 Subject: [PATCH 025/684] vault: add traefik redirect middleware --- services/vault/ingress.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 021f700..a4d9851 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,6 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.middlewares: vault-vault-https-redirect@kubernetescrd traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik -- 2.47.2 From f243be21e6d0e5a3f9197be106e7a608c8b5acc1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:50:44 -0300 Subject: [PATCH 026/684] vault: drop unused redirect middleware --- services/vault/ingress.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index a4d9851..021f700 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,6 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.middlewares: vault-vault-https-redirect@kubernetescrd traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik -- 2.47.2 From c2dfba67c21a9057011e247216b8bd8eaa703609 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:51:32 -0300 Subject: [PATCH 027/684] vault: remove serversTransport, speak http to service --- services/vault/ingress.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 021f700..1d9d523 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,7 +7,6 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From d3ca57eabff7c2e0caafc02728bef800d7770917 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:52:19 -0300 Subject: [PATCH 028/684] vault: backend over https with serversTransport --- services/vault/ingress.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 1d9d523..c8ed24c 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,6 +7,8 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/service.serversscheme: https + traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From fa977a69f45cf8b0786da77424ad051f4503343c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 23:54:28 -0300 Subject: [PATCH 029/684] vault: run http inside cluster (tls terminated at ingress) --- services/vault/configmap.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/services/vault/configmap.yaml b/services/vault/configmap.yaml index 6fc1d6b..d4ffdb5 100644 --- a/services/vault/configmap.yaml +++ b/services/vault/configmap.yaml @@ -13,8 +13,7 @@ data: listener "tcp" { address = "0.0.0.0:8200" cluster_address = "0.0.0.0:8201" - tls_cert_file = "/vault/userconfig/tls/tls.crt" - tls_key_file = "/vault/userconfig/tls/tls.key" + tls_disable = true } storage "raft" { -- 2.47.2 From e354f8bc3f23c94d23c6ad8410448cb596d0f380 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 00:03:11 -0300 Subject: [PATCH 030/684] vault: keep probes HTTPS, drop ingress backend tweaks --- services/vault/ingress.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index c8ed24c..1d9d523 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,8 +7,6 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/service.serversscheme: https - traefik.ingress.kubernetes.io/service.serversTransport: vault-to-https@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From dba8364c74bf723eb91ec5666af55ef6eb1c56da Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 00:09:44 -0300 Subject: [PATCH 031/684] vault: probes use http VAULT_ADDR for http listener --- services/vault/statefulset.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/services/vault/statefulset.yaml b/services/vault/statefulset.yaml index e9a1c21..d1b22c8 100644 --- a/services/vault/statefulset.yaml +++ b/services/vault/statefulset.yaml @@ -54,6 +54,8 @@ spec: - name: cluster containerPort: 8201 env: + - name: VAULT_ADDR + value: "http://127.0.0.1:8200" - name: VAULT_API_ADDR value: "https://secret.bstein.dev" - name: VAULT_CLUSTER_ADDR @@ -72,14 +74,14 @@ spec: value: "true" readinessProbe: exec: - command: ["vault", "status", "-tls-skip-verify"] + command: ["sh", "-c", "VAULT_ADDR=http://127.0.0.1:8200 vault status"] initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 6 livenessProbe: exec: - command: ["sh", "-c", "vault status -tls-skip-verify >/dev/null 2>&1 || true"] + command: ["sh", "-c", "VAULT_ADDR=http://127.0.0.1:8200 vault status >/dev/null 2>&1 || true"] initialDelaySeconds: 60 periodSeconds: 20 timeoutSeconds: 5 -- 2.47.2 From 5b0fbd344b1da24cdafcc7c9e6e544db97ca0f30 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 13:32:36 -0300 Subject: [PATCH 032/684] jellyfin: pull oidc plugin from streaming harbor and fix oidc redirect --- services/jellyfin/deployment.yaml | 87 +++- services/jellyfin/oidc/Jenkinsfile | 568 ++++++++++++++++++++++++++ services/jenkins/configmap-jcasc.yaml | 16 + 3 files changed, 659 insertions(+), 12 deletions(-) create mode 100644 services/jellyfin/oidc/Jenkinsfile diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index afd5a74..86e313a 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -29,6 +29,44 @@ spec: fsGroupChangePolicy: OnRootMismatch runAsGroup: 65532 initContainers: + - name: fetch-oidc-plugin + image: alpine:3.20 + securityContext: + runAsUser: 0 + env: + - name: OIDC_PLUGIN_REPO + value: "registry.bstein.dev/streaming/oidc-plugin" + - name: OIDC_PLUGIN_TAG + value: "10.11.5" + - name: ORAS_USERNAME + valueFrom: + secretKeyRef: + name: harbor-robot + key: username + optional: true + - name: ORAS_PASSWORD + valueFrom: + secretKeyRef: + name: harbor-robot + key: password + optional: true + volumeMounts: + - name: oidc-plugin + mountPath: /plugin-src + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + apk add --no-cache curl tar + ORAS_VERSION=1.2.0 + curl -sSL "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin oras + ref="${OIDC_PLUGIN_REPO}:${OIDC_PLUGIN_TAG}" + cd /plugin-src + if [ -n "${ORAS_USERNAME:-}" ] && [ -n "${ORAS_PASSWORD:-}" ]; then + oras login "$(echo "${OIDC_PLUGIN_REPO}" | cut -d/ -f1)" -u "${ORAS_USERNAME}" -p "${ORAS_PASSWORD}" + fi + oras pull --allow-path-traversal "${ref}" + ls -lh /plugin-src - name: install-oidc-plugin image: alpine:3.20 securityContext: @@ -36,8 +74,6 @@ spec: env: - name: OIDC_PLUGIN_VERSION value: "1.0.2.0" - - name: OIDC_PLUGIN_URL - value: "https://raw.githubusercontent.com/lolerskatez/JellyfinOIDCPlugin/master/OIDC_Authentication_1.0.2.0.zip" - name: OIDC_ISSUER value: "https://sso.bstein.dev/realms/atlas" - name: OIDC_REDIRECT_URI @@ -45,7 +81,7 @@ spec: - name: OIDC_LOGOUT_URI value: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://stream.bstein.dev/" - name: OIDC_SCOPES - value: "openid,profile,email,groups" + value: "openid,profile,email" - name: OIDC_ROLE_CLAIM value: "groups" - name: OIDC_CLIENT_ID @@ -61,6 +97,8 @@ spec: volumeMounts: - name: config mountPath: /config + - name: oidc-plugin + mountPath: /plugin-src command: ["/bin/sh", "-c"] args: - | @@ -70,16 +108,20 @@ spec: exit 1 fi rm -rf "/config/plugins/LDAP Authentication_20.0.0.0" - apk add --no-cache wget unzip + apk add --no-cache unzip plugin_dir="/config/plugins/OIDC Authentication_${OIDC_PLUGIN_VERSION}" config_dir="/config/plugins/configurations" - tmp_zip="$(mktemp)" - echo "Downloading OIDC plugin ${OIDC_PLUGIN_VERSION} from ${OIDC_PLUGIN_URL}" - wget -O "${tmp_zip}" "${OIDC_PLUGIN_URL}" + plugin_zip="/plugin-src/OIDC_Authentication_${OIDC_PLUGIN_VERSION}-net9.zip" + if [ ! -s "${plugin_zip}" ]; then + echo "Plugin zip missing at ${plugin_zip}" >&2 + echo "Contents of /plugin-src:" >&2 + ls -lah /plugin-src >&2 || true + exit 1 + fi rm -rf "${plugin_dir}" mkdir -p "${plugin_dir}" "${config_dir}" - unzip -o "${tmp_zip}" -d "${plugin_dir}" - rm -f "${tmp_zip}" + unzip -o "${plugin_zip}" -d "${plugin_dir}" + rm -f "${plugin_dir}"/Microsoft.Extensions.*.dll cat >"${plugin_dir}/meta.json" <<'EOF' { "category": "Authentication", @@ -89,7 +131,7 @@ spec: "name": "OIDC Authentication", "overview": "Enable Single Sign-On (SSO) for Jellyfin using an OpenID Connect provider.", "owner": "lolerskatez", - "targetAbi": "10.11.3.0", + "targetAbi": "10.11.5.0", "timestamp": "2025-12-17T04:00:00Z", "version": "1.0.2.0", "status": "Active", @@ -104,7 +146,8 @@ spec: [ -z "${trimmed}" ] && continue scope_lines="${scope_lines} ${trimmed}\n" done - cat >"${config_dir}/OIDC Authentication.xml" <"${config_file}" < ${OIDC_ISSUER} @@ -120,7 +163,7 @@ spec: false EOF - chown -R 1000:65532 "${plugin_dir}" "${config_dir}/OIDC Authentication.xml" + chown -R 1000:65532 "${plugin_dir}" "${config_file}" runtimeClassName: nvidia containers: - name: jellyfin @@ -140,6 +183,22 @@ spec: value: "65532" - name: UMASK value: "002" + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - | + set -e + target="/jellyfin/jellyfin-web/index.html" + marker='api/oidc/inject' + if grep -q "${marker}" "${target}"; then + exit 0 + fi + tmp="$(mktemp)" + awk -v marker="${marker}" 'BEGIN{inserted=0} /<\/head>/ && !inserted {print " "; inserted=1} {print}' "${target}" > "${tmp}" + cp "${tmp}" "${target}" resources: limits: nvidia.com/gpu: 1 @@ -157,6 +216,8 @@ spec: - name: media mountPath: /media securityContext: + runAsUser: 0 + runAsGroup: 0 allowPrivilegeEscalation: false readOnlyRootFilesystem: false volumes: @@ -169,3 +230,5 @@ spec: - name: media persistentVolumeClaim: claimName: jellyfin-media-asteria-new + - name: oidc-plugin + emptyDir: {} diff --git a/services/jellyfin/oidc/Jenkinsfile b/services/jellyfin/oidc/Jenkinsfile new file mode 100644 index 0000000..6886dc9 --- /dev/null +++ b/services/jellyfin/oidc/Jenkinsfile @@ -0,0 +1,568 @@ +pipeline { + agent { + kubernetes { + yaml """ +apiVersion: v1 +kind: Pod +spec: + restartPolicy: Never + containers: + - name: dotnet + image: mcr.microsoft.com/dotnet/sdk:9.0 + command: + - cat + tty: true +""" + } + } + options { + timestamps() + } + parameters { + string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/oidc-plugin', description: 'OCI repository for the plugin artifact') + string(name: 'JELLYFIN_VERSION', defaultValue: '10.11.5', description: 'Jellyfin version to tag the plugin with') + string(name: 'PLUGIN_VERSION', defaultValue: '1.0.2.0', description: 'Plugin version') + } + environment { + ORAS_VERSION = "1.2.0" + DOTNET_CLI_TELEMETRY_OPTOUT = "1" + DOTNET_SKIP_FIRST_TIME_EXPERIENCE = "1" + } + stages { + stage('Checkout') { + steps { + container('dotnet') { + checkout scm + } + } + } + stage('Build plugin') { + steps { + container('dotnet') { + sh ''' + set -euo pipefail + apt-get update + apt-get install -y --no-install-recommends zip curl ca-certificates git + WORKDIR="$(pwd)/build" + SRC_DIR="${WORKDIR}/src" + DIST_DIR="${WORKDIR}/dist" + ART_DIR="${WORKDIR}/artifact" + rm -rf "${SRC_DIR}" "${DIST_DIR}" "${ART_DIR}" + mkdir -p "${SRC_DIR}" "${DIST_DIR}" "${ART_DIR}" + git clone https://github.com/lolerskatez/JellyfinOIDCPlugin.git "${SRC_DIR}" + cd "${SRC_DIR}" + # Override controllers to avoid DI version issues and add injection script + cat > Controllers/OidcController.cs <<'EOF' +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using IdentityModel.OidcClient; +using MediaBrowser.Controller.Library; +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.DependencyInjection; + +namespace JellyfinOIDCPlugin.Controllers; + +#nullable enable + +[ApiController] +[Route("api/oidc")] +public class OidcController : ControllerBase +{ + private IUserManager UserManager => HttpContext.RequestServices.GetRequiredService(); + private static readonly Dictionary StateManager = new(); // Store AuthorizeState objects + + [HttpGet("start")] + public async Task Start() + { + var config = Plugin.Instance?.Configuration; + if (config == null) + { + return BadRequest("Plugin not initialized"); + } + + var options = new OidcClientOptions + { + Authority = config.OidEndpoint?.Trim(), + ClientId = config.OidClientId?.Trim(), + ClientSecret = config.OidSecret?.Trim(), + RedirectUri = GetRedirectUri(), + Scope = string.Join(" ", config.OidScopes) + }; + + try + { + var client = new OidcClient(options); + var result = await client.PrepareLoginAsync().ConfigureAwait(false); + + // Store the authorize state for the callback + var stateString = (string)result.GetType().GetProperty("State")?.GetValue(result); + if (!string.IsNullOrEmpty(stateString)) + { + StateManager[stateString] = result; + } + + var startUrl = (string)result.GetType().GetProperty("StartUrl")?.GetValue(result); + if (string.IsNullOrEmpty(startUrl)) + { + Console.WriteLine("OIDC: Could not get StartUrl from OIDC result"); + return BadRequest("OIDC initialization failed"); + } + + return Redirect(startUrl); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC start error: {ex}"); + return BadRequest("OIDC error: " + ex.Message); + } + } + + [HttpGet("callback")] + public async Task Callback() + { + var config = Plugin.Instance?.Configuration; + if (config == null) + { + return BadRequest("Plugin not initialized"); + } + + try + { + var stateParam = Request.Query["state"].ToString(); + if (string.IsNullOrEmpty(stateParam) || !StateManager.TryGetValue(stateParam, out var storedState)) + { + Console.WriteLine($"OIDC: Invalid state {stateParam}"); + return BadRequest("Invalid state"); + } + + var options = new OidcClientOptions + { + Authority = config.OidEndpoint?.Trim(), + ClientId = config.OidClientId?.Trim(), + ClientSecret = config.OidSecret?.Trim(), + RedirectUri = GetRedirectUri(), + Scope = string.Join(" ", config.OidScopes) + }; + + var client = new OidcClient(options); + // Cast stored state to AuthorizeState - it's stored as object + var authorizeState = (AuthorizeState)storedState; + var result = await client.ProcessResponseAsync(Request.QueryString.Value, authorizeState).ConfigureAwait(false); + + if (result.IsError) + { + Console.WriteLine($"OIDC callback failed: {result.Error} - {result.ErrorDescription}"); + return BadRequest("OIDC authentication failed"); + } + + // Get email from claims + var email = result.User?.FindFirst("email")?.Value ?? + result.User?.FindFirst("preferred_username")?.Value ?? + result.User?.FindFirst("sub")?.Value; + + if (string.IsNullOrEmpty(email)) + { + Console.WriteLine("OIDC: No email/username found in OIDC response"); + return BadRequest("No email/username found in OIDC response"); + } + + // Get or create user + var user = UserManager.GetUserByName(email); + if (user == null) + { + Console.WriteLine($"OIDC: Creating new user {email}"); + user = await UserManager.CreateUserAsync(email).ConfigureAwait(false); + } + + // Set authentication provider + user.AuthenticationProviderId = "OIDC"; + + // Get roles from claims + var rolesClaimValue = result.User?.FindFirst(config.RoleClaim)?.Value; + var roles = string.IsNullOrEmpty(rolesClaimValue) + ? Array.Empty() + : rolesClaimValue.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); + + // Set permissions based on groups + var isAdmin = roles.Any(r => r.Equals("admin", StringComparison.OrdinalIgnoreCase)); + var isPowerUser = roles.Any(r => r.Equals("Power User", StringComparison.OrdinalIgnoreCase)) && !isAdmin; + + Console.WriteLine($"OIDC: User {email} authenticated. Admin: {isAdmin}, PowerUser: {isPowerUser}"); + + // Update user in database + await UserManager.UpdateUserAsync(user).ConfigureAwait(false); + + StateManager.Remove(stateParam); + + // Redirect to Jellyfin main page + return Redirect("/"); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC callback error: {ex}"); + return BadRequest("OIDC error: " + ex.Message); + } + } + + [HttpPost("token")] + public async Task ExchangeToken([FromBody] TokenExchangeRequest request) + { + var config = Plugin.Instance?.Configuration; + if (config == null) + { + Console.WriteLine("OIDC: Plugin not initialized"); + return BadRequest("Plugin not initialized"); + } + + if (string.IsNullOrEmpty(request?.AccessToken)) + { + Console.WriteLine("OIDC: No access token provided"); + return BadRequest("Access token is required"); + } + + try + { + Console.WriteLine("OIDC: Processing token exchange request"); + + // Validate the token with the OIDC provider using UserInfo endpoint + var options = new OidcClientOptions + { + Authority = config.OidEndpoint?.Trim(), + ClientId = config.OidClientId?.Trim(), + ClientSecret = config.OidSecret?.Trim(), + Scope = string.Join(" ", config.OidScopes) + }; + + var client = new OidcClient(options); + + // Use the access token to get user info + var userInfoResult = await client.GetUserInfoAsync(request.AccessToken).ConfigureAwait(false); + + if (userInfoResult.IsError) + { + Console.WriteLine($"OIDC: Failed to get user info: {userInfoResult.Error}"); + return Unauthorized("Invalid access token"); + } + + // Extract email/username from user info + var email = userInfoResult.Claims.FirstOrDefault(c => c.Type == "email")?.Value ?? + userInfoResult.Claims.FirstOrDefault(c => c.Type == "preferred_username")?.Value ?? + userInfoResult.Claims.FirstOrDefault(c => c.Type == "sub")?.Value; + + if (string.IsNullOrEmpty(email)) + { + Console.WriteLine("OIDC: No email/username found in token"); + return BadRequest("No email/username found in token"); + } + + // Get or create user + var user = UserManager.GetUserByName(email); + if (user == null) + { + if (!config.AutoCreateUser) + { + Console.WriteLine($"OIDC: User {email} not found and auto-create disabled"); + return Unauthorized("User does not exist and auto-creation is disabled"); + } + + Console.WriteLine($"OIDC: Creating new user from token {email}"); + user = await UserManager.CreateUserAsync(email).ConfigureAwait(false); + } + + // Update user authentication provider + user.AuthenticationProviderId = "OIDC"; + + // Get roles from claims + var rolesClaimName = config.RoleClaim ?? "groups"; + var rolesClaimValue = userInfoResult.Claims.FirstOrDefault(c => c.Type == rolesClaimName)?.Value; + var roles = string.IsNullOrEmpty(rolesClaimValue) + ? Array.Empty() + : rolesClaimValue.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); + + // Set permissions based on groups + var isAdmin = roles.Any(r => r.Equals("admin", StringComparison.OrdinalIgnoreCase)); + var isPowerUser = roles.Any(r => r.Equals("Power User", StringComparison.OrdinalIgnoreCase)) && !isAdmin; + + Console.WriteLine($"OIDC: Token exchange for {email} Admin:{isAdmin} Power:{isPowerUser}"); + + // Update user in database + await UserManager.UpdateUserAsync(user).ConfigureAwait(false); + + // Return success with user info + return Ok(new TokenExchangeResponse + { + Success = true, + UserId = user.Id.ToString(), + Username = user.Username, + Email = email, + IsAdmin = isAdmin, + Message = "User authenticated successfully" + }); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC token exchange error: {ex}"); + return StatusCode(500, $"Token exchange failed: {ex.Message}"); + } + } + + private string GetRedirectUri() + { + var configured = Plugin.Instance?.Configuration?.RedirectUri; + if (!string.IsNullOrWhiteSpace(configured)) + { + return configured!; + } + + return $"{Request.Scheme}://{Request.Host}/api/oidc/callback"; + } +} + +public class TokenExchangeRequest +{ + public string? AccessToken { get; set; } + public string? IdToken { get; set; } +} + +public class TokenExchangeResponse +{ + public bool Success { get; set; } + public string? UserId { get; set; } + public string? Username { get; set; } + public string? Email { get; set; } + public bool IsAdmin { get; set; } + public string? Message { get; set; } +} +EOF + + cat > Controllers/OidcStaticController.cs <<'EOF' +using System; +using System.IO; +using System.Reflection; +using MediaBrowser.Common.Plugins; +using Microsoft.AspNetCore.Mvc; + +namespace JellyfinOIDCPlugin.Controllers; + +[ApiController] +[Route("api/oidc")] +public class OidcStaticController : ControllerBase +{ + [HttpGet("login.js")] + public IActionResult GetLoginScript() + { + try + { + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-login.js"); + if (stream == null) + { + Console.WriteLine("OIDC: Login script resource not found"); + return NotFound(); + } + + using var reader = new StreamReader(stream); + var content = reader.ReadToEnd(); + + return Content(content, "application/javascript"); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC: Error serving login script {ex}"); + return StatusCode(500, "Error loading login script"); + } + } + + [HttpGet("loader.js")] + public IActionResult GetLoader() + { + try + { + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-loader.js"); + if (stream == null) + { + Console.WriteLine("OIDC: Loader script resource not found"); + return NotFound(); + } + + using var reader = new StreamReader(stream); + var content = reader.ReadToEnd(); + + return Content(content, "application/javascript"); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC: Error serving loader script {ex}"); + return StatusCode(500, "Error loading loader script"); + } + } + + [HttpGet("inject")] + public IActionResult GetInject() + { + try + { + var script = @" +(function() { + console.log('[OIDC Plugin] Bootstrap inject started'); + + // Load oidc-loader.js dynamically + const loaderScript = document.createElement('script'); + loaderScript.src = '/api/oidc/loader.js'; + loaderScript.type = 'application/javascript'; + loaderScript.onerror = function() { + console.error('[OIDC Plugin] Failed to load loader.js'); + }; + loaderScript.onload = function() { + console.log('[OIDC Plugin] Loader.js loaded successfully'); + }; + + // Append to head or body + const target = document.head || document.documentElement; + target.appendChild(loaderScript); + + console.log('[OIDC Plugin] Bootstrap script appended to page'); +})(); +"; + return Content(script, "application/javascript"); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC: Error serving inject script {ex}"); + return StatusCode(500, "Error loading inject script"); + } + } + + [HttpGet("global.js")] + public IActionResult GetGlobalInjector() + { + try + { + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-global-injector.js"); + if (stream == null) + { + Console.WriteLine("OIDC: Global injector resource not found"); + return NotFound(); + } + + using var reader = new StreamReader(stream); + var content = reader.ReadToEnd(); + + return Content(content, "application/javascript"); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC: Error serving global injector {ex}"); + return StatusCode(500, "Error loading global injector"); + } + } + + [HttpGet("config")] + public IActionResult GetConfigurationPage() + { + try + { + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.configurationpage.html"); + if (stream == null) + { + Console.WriteLine("OIDC: Configuration page resource not found"); + return NotFound("Configuration page resource not found"); + } + + using var reader = new StreamReader(stream); + var content = reader.ReadToEnd(); + + return Content(content, "text/html"); + } + catch (Exception ex) + { + Console.WriteLine($"OIDC: Error serving configuration page {ex}"); + return StatusCode(500, $"Error loading configuration page: {ex.Message}"); + } + } +} +EOF + cat > JellyfinOIDCPlugin.csproj <<'EOF' + + + net9.0 + JellyfinOIDCPlugin.v2 + JellyfinOIDCPlugin + latest + enable + enable + 1.0.2.0 + 1.0.2.0 + false + + + + runtime + + + runtime + + + runtime + + + runtime + + + runtime + + + none + + + runtime + + + + + + + + +EOF + dotnet restore + dotnet publish -c Release --no-self-contained -o "${DIST_DIR}" + cd "${DIST_DIR}" + zip -r "${ART_DIR}/OIDC_Authentication_${PLUGIN_VERSION}-net9.zip" . + ''' + } + } + } + stage('Push to Harbor') { + steps { + container('dotnet') { + withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) { + sh ''' + set -euo pipefail + WORKDIR="$(pwd)/build" + ORAS_BIN="/usr/local/bin/oras" + curl -sSL "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin oras + ref_host="$(echo "${HARBOR_REPO}" | cut -d/ -f1)" + "${ORAS_BIN}" login "${ref_host}" -u "${HARBOR_USERNAME}" -p "${HARBOR_PASSWORD}" + artifact="${WORKDIR}/artifact/OIDC_Authentication_${PLUGIN_VERSION}-net9.zip" + "${ORAS_BIN}" push "${HARBOR_REPO}:${JELLYFIN_VERSION}" "${artifact}:application/zip" --artifact-type application/zip + "${ORAS_BIN}" push "${HARBOR_REPO}:latest" "${artifact}:application/zip" --artifact-type application/zip + ''' + } + } + } + } + } + post { + always { + container('dotnet') { + archiveArtifacts artifacts: 'build/artifact/*.zip', allowEmptyArchive: true + } + } + } +} diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index 958e8a8..615412e 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -66,6 +66,22 @@ data: } } } + pipelineJob('jellyfin-oidc-plugin') { + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/titan-iac.git') + credentials('gitea-pat') + } + branches('*/main') + } + } + scriptPath('services/jellyfin/oidc/Jenkinsfile') + } + } + } pipelineJob('ci-demo') { triggers { scm('H/1 * * * *') -- 2.47.2 From f68668f987f390f397b7b5b22b3aee1e1abbc54a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 13:51:46 -0300 Subject: [PATCH 033/684] jellyfin: fix oidc redirect to api/oidc/callback --- services/jellyfin/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index 86e313a..fdd244a 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -77,7 +77,7 @@ spec: - name: OIDC_ISSUER value: "https://sso.bstein.dev/realms/atlas" - name: OIDC_REDIRECT_URI - value: "https://stream.bstein.dev/oauth2/callback" + value: "https://stream.bstein.dev/api/oidc/callback" - name: OIDC_LOGOUT_URI value: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://stream.bstein.dev/" - name: OIDC_SCOPES -- 2.47.2 From c8adca5a5b1790f608a6103311130840d621ebbc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 14:10:34 -0300 Subject: [PATCH 034/684] ai: add ollama service and wire chat backend --- .../applications/ai-llm/kustomization.yaml | 23 +++++ .../applications/kustomization.yaml | 1 + services/ai-llm/deployment.yaml | 84 +++++++++++++++++++ services/ai-llm/kustomization.yaml | 9 ++ services/ai-llm/namespace.yaml | 5 ++ services/ai-llm/pvc.yaml | 13 +++ services/ai-llm/service.yaml | 14 ++++ .../bstein-dev-home/backend-deployment.yaml | 7 ++ 8 files changed, 156 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml create mode 100644 services/ai-llm/deployment.yaml create mode 100644 services/ai-llm/kustomization.yaml create mode 100644 services/ai-llm/namespace.yaml create mode 100644 services/ai-llm/pvc.yaml create mode 100644 services/ai-llm/service.yaml diff --git a/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml new file mode 100644 index 0000000..3572a6c --- /dev/null +++ b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml @@ -0,0 +1,23 @@ +# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: ai-llm + namespace: flux-system +spec: + interval: 10m + path: ./services/ai-llm + targetNamespace: ai + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + wait: true + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: ollama + namespace: ai + dependsOn: + - name: core diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 5825734..b5a5e62 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -22,3 +22,4 @@ resources: - jenkins/kustomization.yaml - ci-demo/kustomization.yaml - ci-demo/image-automation.yaml + - ai-llm/kustomization.yaml diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml new file mode 100644 index 0000000..f9098db --- /dev/null +++ b/services/ai-llm/deployment.yaml @@ -0,0 +1,84 @@ +# services/ai-llm/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ai +spec: + replicas: 1 + revisionHistoryLimit: 2 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + nodeSelector: + kubernetes.io/hostname: titan-24 + runtimeClassName: nvidia + volumes: + - name: models + persistentVolumeClaim: + claimName: ollama-models + initContainers: + - name: warm-model + image: ollama/ollama:latest + env: + - name: OLLAMA_HOST + value: 0.0.0.0 + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + - name: OLLAMA_MODELS + value: /root/.ollama + - name: OLLAMA_MODEL + value: phi3:mini-4k-instruct-q4_0 + command: + - /bin/sh + - -c + - | + set -e + ollama serve >/tmp/ollama.log 2>&1 & + sleep 6 + ollama pull "${OLLAMA_MODEL}" + pkill ollama || true + volumeMounts: + - name: models + mountPath: /root/.ollama + resources: + requests: + cpu: 250m + memory: 1Gi + containers: + - name: ollama + image: ollama/ollama:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 11434 + env: + - name: OLLAMA_HOST + value: 0.0.0.0 + - name: OLLAMA_KEEP_ALIVE + value: 6h + - name: OLLAMA_MODELS + value: /root/.ollama + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + volumeMounts: + - name: models + mountPath: /root/.ollama + resources: + requests: + cpu: "2" + memory: 8Gi + nvidia.com/gpu: 1 + limits: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: 1 diff --git a/services/ai-llm/kustomization.yaml b/services/ai-llm/kustomization.yaml new file mode 100644 index 0000000..46ea286 --- /dev/null +++ b/services/ai-llm/kustomization.yaml @@ -0,0 +1,9 @@ +# services/ai-llm/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: ai +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - service.yaml diff --git a/services/ai-llm/namespace.yaml b/services/ai-llm/namespace.yaml new file mode 100644 index 0000000..96f5a81 --- /dev/null +++ b/services/ai-llm/namespace.yaml @@ -0,0 +1,5 @@ +# services/ai-llm/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: ai diff --git a/services/ai-llm/pvc.yaml b/services/ai-llm/pvc.yaml new file mode 100644 index 0000000..51c0384 --- /dev/null +++ b/services/ai-llm/pvc.yaml @@ -0,0 +1,13 @@ +# services/ai-llm/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-models + namespace: ai +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 30Gi + storageClassName: astreae diff --git a/services/ai-llm/service.yaml b/services/ai-llm/service.yaml new file mode 100644 index 0000000..f086a90 --- /dev/null +++ b/services/ai-llm/service.yaml @@ -0,0 +1,14 @@ +# services/ai-llm/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ai +spec: + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: 11434 diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 1159487..c80a9ac 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,6 +24,13 @@ spec: - name: backend image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest imagePullPolicy: Always + env: + - name: AI_CHAT_API + value: http://ollama.ai.svc.cluster.local:11434 + - name: AI_CHAT_MODEL + value: phi3:mini-4k-instruct-q4_0 + - name: AI_CHAT_TIMEOUT_SEC + value: "20" ports: - name: http containerPort: 8080 -- 2.47.2 From 16ab7a963d88107a87c4402f768c078730a051e0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 14:16:22 -0300 Subject: [PATCH 035/684] ai: allow ollama to share titan-24 gpu --- services/ai-llm/deployment.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index f9098db..71a54ed 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -77,8 +77,6 @@ spec: requests: cpu: "2" memory: 8Gi - nvidia.com/gpu: 1 limits: cpu: "4" memory: 12Gi - nvidia.com/gpu: 1 -- 2.47.2 From 39a914effd8ae9d35f9726befacc19894bbe472c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 14:24:52 -0300 Subject: [PATCH 036/684] ai-llm: use phi3 mini model --- services/ai-llm/deployment.yaml | 2 +- services/bstein-dev-home/backend-deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index 71a54ed..25a7f2f 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -35,7 +35,7 @@ spec: - name: OLLAMA_MODELS value: /root/.ollama - name: OLLAMA_MODEL - value: phi3:mini-4k-instruct-q4_0 + value: phi3:mini command: - /bin/sh - -c diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index c80a9ac..4044efe 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -28,7 +28,7 @@ spec: - name: AI_CHAT_API value: http://ollama.ai.svc.cluster.local:11434 - name: AI_CHAT_MODEL - value: phi3:mini-4k-instruct-q4_0 + value: phi3:mini - name: AI_CHAT_TIMEOUT_SEC value: "20" ports: -- 2.47.2 From 9162f5789f7c134ee38e59bc5ea8f829a0e3bda5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 15:19:03 -0300 Subject: [PATCH 037/684] ai-llm: GPU qwen2.5-coder on titan-24; add chat.ai host --- services/ai-llm/deployment.yaml | 7 ++++++- services/bstein-dev-home/ingress.yaml | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index 25a7f2f..d78315b 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -35,7 +35,7 @@ spec: - name: OLLAMA_MODELS value: /root/.ollama - name: OLLAMA_MODEL - value: phi3:mini + value: qwen2.5-coder:7b-instruct-q4_0 command: - /bin/sh - -c @@ -52,6 +52,9 @@ spec: requests: cpu: 250m memory: 1Gi + nvidia.com/gpu: 1 + limits: + nvidia.com/gpu: 1 containers: - name: ollama image: ollama/ollama:latest @@ -77,6 +80,8 @@ spec: requests: cpu: "2" memory: 8Gi + nvidia.com/gpu: 1 limits: cpu: "4" memory: 12Gi + nvidia.com/gpu: 1 diff --git a/services/bstein-dev-home/ingress.yaml b/services/bstein-dev-home/ingress.yaml index 471f1bc..7e92941 100644 --- a/services/bstein-dev-home/ingress.yaml +++ b/services/bstein-dev-home/ingress.yaml @@ -9,9 +9,9 @@ metadata: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" cert-manager.io/cluster-issuer: letsencrypt -spec: + spec: tls: - - hosts: [ "bstein.dev" ] + - hosts: [ "bstein.dev", "chat.ai.bstein.dev" ] secretName: bstein-dev-home-tls rules: - host: bstein.dev @@ -29,3 +29,18 @@ spec: service: name: bstein-dev-home-frontend port: { number: 80 } + - host: chat.ai.bstein.dev + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: bstein-dev-home-backend + port: { number: 80 } + - path: / + pathType: Prefix + backend: + service: + name: bstein-dev-home-frontend + port: { number: 80 } -- 2.47.2 From 610ef7a5520267886186d524f46ebc82ed0a20ed Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 15:20:31 -0300 Subject: [PATCH 038/684] bstein-dev-home: fix ingress indent for chat.ai host --- services/bstein-dev-home/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/ingress.yaml b/services/bstein-dev-home/ingress.yaml index 7e92941..872a0df 100644 --- a/services/bstein-dev-home/ingress.yaml +++ b/services/bstein-dev-home/ingress.yaml @@ -9,7 +9,7 @@ metadata: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" cert-manager.io/cluster-issuer: letsencrypt - spec: +spec: tls: - hosts: [ "bstein.dev", "chat.ai.bstein.dev" ] secretName: bstein-dev-home-tls -- 2.47.2 From a99293944a8665bcf260fa55b92111e91b713617 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 15:22:05 -0300 Subject: [PATCH 039/684] bstein-dev-home: default chat model to qwen2.5-coder --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 4044efe..5d8fab5 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -28,7 +28,7 @@ spec: - name: AI_CHAT_API value: http://ollama.ai.svc.cluster.local:11434 - name: AI_CHAT_MODEL - value: phi3:mini + value: qwen2.5-coder:7b-instruct-q4_0 - name: AI_CHAT_TIMEOUT_SEC value: "20" ports: -- 2.47.2 From 1e72f2e3711b8842dbe635e15dbf6aa6b5a9301e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 18:08:30 -0300 Subject: [PATCH 040/684] jenkins: add RBAC serviceaccount and use for agents --- services/jenkins/configmap-jcasc.yaml | 2 +- services/jenkins/deployment.yaml | 2 +- services/jenkins/kustomization.yaml | 1 + services/jenkins/serviceaccount.yaml | 41 +++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 services/jenkins/serviceaccount.yaml diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index 615412e..99dadd8 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -171,7 +171,7 @@ data: label: "jenkins-jenkins-agent " nodeUsageMode: "NORMAL" podRetention: Never - serviceAccount: "default" + serviceAccount: "jenkins" slaveConnectTimeoutStr: "100" yamlMergeStrategy: override inheritYamlMergeStrategy: false diff --git a/services/jenkins/deployment.yaml b/services/jenkins/deployment.yaml index d9cf1ea..ec749e8 100644 --- a/services/jenkins/deployment.yaml +++ b/services/jenkins/deployment.yaml @@ -18,7 +18,7 @@ spec: labels: app: jenkins spec: - serviceAccountName: default + serviceAccountName: jenkins nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" diff --git a/services/jenkins/kustomization.yaml b/services/jenkins/kustomization.yaml index a0bd200..c183a4f 100644 --- a/services/jenkins/kustomization.yaml +++ b/services/jenkins/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: jenkins resources: - namespace.yaml + - serviceaccount.yaml - pvc.yaml - configmap-jcasc.yaml - configmap-init-scripts.yaml diff --git a/services/jenkins/serviceaccount.yaml b/services/jenkins/serviceaccount.yaml new file mode 100644 index 0000000..27caeed --- /dev/null +++ b/services/jenkins/serviceaccount.yaml @@ -0,0 +1,41 @@ +# services/jenkins/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: jenkins + namespace: jenkins + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: jenkins-agent + namespace: jenkins +rules: + - apiGroups: [""] + resources: + - pods + - pods/exec + - pods/log + - pods/portforward + - services + - endpoints + - persistentvolumeclaims + - configmaps + - secrets + verbs: ["get", "list", "watch", "create", "delete"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: jenkins-agent + namespace: jenkins +subjects: + - kind: ServiceAccount + name: jenkins + namespace: jenkins +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: jenkins-agent -- 2.47.2 From 4491a3681a4e03ed6262c1b95706594cc249016a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 18:42:16 -0300 Subject: [PATCH 041/684] jenkins: use main service for tunnel --- services/jenkins/configmap-jcasc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index 99dadd8..f72f6aa 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -135,7 +135,7 @@ data: connectTimeout: "5" readTimeout: "15" jenkinsUrl: "http://jenkins.jenkins.svc.cluster.local:8080" - jenkinsTunnel: "jenkins-agent.jenkins.svc.cluster.local:50000" + jenkinsTunnel: "jenkins.jenkins.svc.cluster.local:50000" skipTlsVerify: false maxRequestsPerHostStr: "32" retentionTimeout: "5" -- 2.47.2 From eaab2b79881d87b852488a2fb359b1fab2721840 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:22:08 -0300 Subject: [PATCH 042/684] flux: run bstein-dev-home image automation on sso-hardening --- .../applications/bstein-dev-home/image-automation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index e198db4..58098a2 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -13,14 +13,14 @@ spec: git: checkout: ref: - branch: main + branch: feature/sso-hardening commit: author: email: ops@bstein.dev name: flux-bot messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" push: - branch: main + branch: feature/sso-hardening update: strategy: Setters path: services/bstein-dev-home -- 2.47.2 From f23641be50301f4aae503016e157c618ff7016ba Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:24:29 -0300 Subject: [PATCH 043/684] flux: let bstein-dev-home automation read policies in app ns --- .../applications/bstein-dev-home/image-automation.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index 58098a2..89e78c8 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -22,5 +22,6 @@ spec: push: branch: feature/sso-hardening update: - strategy: Setters path: services/bstein-dev-home + policyNamespace: bstein-dev-home + strategy: Setters -- 2.47.2 From 3d19b54b129db85d978d1f5fd4ac6e4402f8c392 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:25:56 -0300 Subject: [PATCH 044/684] flux: place bstein-dev-home image automation in app namespace --- .../applications/bstein-dev-home/image-automation.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index 89e78c8..076c4b3 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -3,7 +3,7 @@ apiVersion: image.toolkit.fluxcd.io/v1 kind: ImageUpdateAutomation metadata: name: bstein-dev-home - namespace: flux-system + namespace: bstein-dev-home spec: interval: 1m0s sourceRef: @@ -22,6 +22,5 @@ spec: push: branch: feature/sso-hardening update: - path: services/bstein-dev-home - policyNamespace: bstein-dev-home strategy: Setters + path: services/bstein-dev-home -- 2.47.2 From de317a3396f3d9a2f3adfc2fc9fce0ba1f69b7cc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:26:42 -0300 Subject: [PATCH 045/684] flux: fix bstein-dev-home automation template --- .../applications/bstein-dev-home/image-automation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index 076c4b3..a562bf6 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -18,7 +18,7 @@ spec: author: email: ops@bstein.dev name: flux-bot - messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" + messageTemplate: "chore(bstein-dev-home): update images to {{range .Changed.Images}}{{.}}{{end}}" push: branch: feature/sso-hardening update: -- 2.47.2 From ba3b3a3d9f321ee4a4c40c4256b696791dcce563 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:27:24 -0300 Subject: [PATCH 046/684] flux: simplify bstein-dev-home image update message --- .../applications/bstein-dev-home/image-automation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index a562bf6..ddd55a1 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -18,7 +18,7 @@ spec: author: email: ops@bstein.dev name: flux-bot - messageTemplate: "chore(bstein-dev-home): update images to {{range .Changed.Images}}{{.}}{{end}}" + messageTemplate: "chore(bstein-dev-home): automated image update" push: branch: feature/sso-hardening update: -- 2.47.2 From 05fa4735822a37b95b5fd87dca83d24b76108677 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 02:27:44 +0000 Subject: [PATCH 047/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index dd4d6e7..b69e4fd 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From ba7563b0e50d3ea42cd4af35f75cf2bfa76fbb0b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:29:40 -0300 Subject: [PATCH 048/684] bstein-dev-home: fix image tags, pause automation --- .../applications/bstein-dev-home/image-automation.yaml | 1 + services/bstein-dev-home/kustomization.yaml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index ddd55a1..ab05c84 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -6,6 +6,7 @@ metadata: namespace: bstein-dev-home spec: interval: 1m0s + suspend: true sourceRef: kind: GitRepository name: flux-system diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index b69e4fd..e4509eb 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: 0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: 0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From b1ac53e1a1088303ca7e6572ff93a8106ac1239c Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 02:29:58 +0000 Subject: [PATCH 049/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index e4509eb..b69e4fd 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: 0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: 0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From a4a59042015e0f1e874be3808a451bfdea06e7ba Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:32:17 -0300 Subject: [PATCH 050/684] bstein-dev-home: pin images and stop automation churn --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index b69e4fd..a79f948 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: 0.1.1-16 - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-16 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: 0.1.1-16 -- 2.47.2 From b6acab8ee6474100b4629b3647bbacb5cb97d80a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 23:59:31 -0300 Subject: [PATCH 051/684] bstein-dev-home: re-enable image automation --- .../applications/bstein-dev-home/image-automation.yaml | 1 - services/bstein-dev-home/kustomization.yaml | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index ab05c84..ddd55a1 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -6,7 +6,6 @@ metadata: namespace: bstein-dev-home spec: interval: 1m0s - suspend: true sourceRef: kind: GitRepository name: flux-system diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index a79f948..dd4d6e7 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: 0.1.1-16 + newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: 0.1.1-16 + newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 204e92c1e83414baf321215d495680dbd2c11578 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 02:59:51 +0000 Subject: [PATCH 052/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index dd4d6e7..0bf4e6a 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 9043d735f14bd7a5f06c42882f6703a8934b2588 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:02:41 +0000 Subject: [PATCH 053/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 0bf4e6a..da8fa0f 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 9c90cfcc9cdc886b51b5ae4063f42c8cd18de1a2 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:03:45 +0000 Subject: [PATCH 054/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index da8fa0f..d001234 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -14,4 +14,4 @@ images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 0d1e3e8666de4137e47b1fabdf49dbac86379369 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 00:03:45 -0300 Subject: [PATCH 055/684] fix(bstein-dev-home): patch images via policies directly --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/frontend-deployment.yaml | 2 +- services/bstein-dev-home/kustomization.yaml | 5 ----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 5d8fab5..8c9968b 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 7189bee..1dde914 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:latest + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index d001234..80b6be0 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -10,8 +10,3 @@ resources: - backend-deployment.yaml - backend-service.yaml - ingress.yaml -images: - - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} -- 2.47.2 From 8a4e1993eccb29e803f873fc1359394fe814897d Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:05:57 +0000 Subject: [PATCH 056/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 8c9968b..da10665 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 1dde914..45adb83 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-17 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ef372bf8f2e6d1bfa2b4b3703e00e8f17da69ecf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 00:17:08 -0300 Subject: [PATCH 057/684] chore(bstein-dev-home): scale to 1 replica and pass ai meta env --- services/bstein-dev-home/backend-deployment.yaml | 9 ++++++++- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index da10665..1243faf 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -5,7 +5,7 @@ metadata: name: bstein-dev-home-backend namespace: bstein-dev-home spec: - replicas: 2 + replicas: 1 revisionHistoryLimit: 3 selector: matchLabels: @@ -31,6 +31,13 @@ spec: value: qwen2.5-coder:7b-instruct-q4_0 - name: AI_CHAT_TIMEOUT_SEC value: "20" + - name: AI_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: AI_NODE_GPU_MAP + value: | + {"titan-24": "RTX 3080 8GB (local GPU)", "titan-22": "RTX 3050 8GB (local GPU)"} ports: - name: http containerPort: 8080 diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 45adb83..b4a9af6 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -5,7 +5,7 @@ metadata: name: bstein-dev-home-frontend namespace: bstein-dev-home spec: - replicas: 2 + replicas: 1 revisionHistoryLimit: 3 selector: matchLabels: -- 2.47.2 From b519ef08bc6fe62ab912b4ce5a005f8fe7afbf2e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:19:43 +0000 Subject: [PATCH 058/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index b4a9af6..05d9a2c 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-19 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 05c84daf2a7924227f5c85c1031547f54f27c2bc Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:20:46 +0000 Subject: [PATCH 059/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 1243faf..f112cb7 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-18 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-19 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 4cf12144e910dccdacc458b2cb230ad11e0cd495 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:24:43 +0000 Subject: [PATCH 060/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 05d9a2c..6179fb9 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-19 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-20 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From d6f44330c92cf4401620e6d1d0d8f04b69f30a2b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:25:46 +0000 Subject: [PATCH 061/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index f112cb7..050fa06 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-19 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-20 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 60f2c65ad3765e4d13b7058dec4c24e62f46fa8e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:35:44 +0000 Subject: [PATCH 062/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 6179fb9..af15a73 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-20 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-21 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 24532fbdd5b929be1f78cc0caf314e81790a6f8f Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:36:47 +0000 Subject: [PATCH 063/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 050fa06..31b3f7c 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-20 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-21 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From de693bafbe8e71864250ecd5ed3045d788830e41 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 00:46:09 -0300 Subject: [PATCH 064/684] feat(bstein-dev-home): add SA/RBAC for ai pod discovery --- .../bstein-dev-home/backend-deployment.yaml | 1 + services/bstein-dev-home/kustomization.yaml | 1 + services/bstein-dev-home/rbac.yaml | 29 +++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 services/bstein-dev-home/rbac.yaml diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 31b3f7c..50af5cd 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -15,6 +15,7 @@ spec: labels: app: bstein-dev-home-backend spec: + serviceAccountName: bstein-dev-home nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 80b6be0..e15af3e 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -5,6 +5,7 @@ namespace: bstein-dev-home resources: - namespace.yaml - image.yaml + - rbac.yaml - frontend-deployment.yaml - frontend-service.yaml - backend-deployment.yaml diff --git a/services/bstein-dev-home/rbac.yaml b/services/bstein-dev-home/rbac.yaml new file mode 100644 index 0000000..a6fcd03 --- /dev/null +++ b/services/bstein-dev-home/rbac.yaml @@ -0,0 +1,29 @@ +# services/bstein-dev-home/rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: bstein-dev-home + namespace: bstein-dev-home +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: bstein-dev-home-ai-reader +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + resourceNames: [] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: bstein-dev-home-ai-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: bstein-dev-home-ai-reader +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home -- 2.47.2 From c6bae35bc6f7749aacbe6215626ad6308fcfd0a4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 00:47:57 -0300 Subject: [PATCH 065/684] chore(ai-llm): annotate pod with model and gpu --- services/ai-llm/deployment.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index d78315b..fb0d0e7 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -14,6 +14,9 @@ spec: metadata: labels: app: ollama + annotations: + ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0 + ai.bstein.dev/gpu: RTX 3080 8GB (titan-24) spec: nodeSelector: kubernetes.io/hostname: titan-24 -- 2.47.2 From b597613dc34b5f5c45933bd5601ee4326ecd33a0 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:49:45 +0000 Subject: [PATCH 066/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index af15a73..9bff4d1 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-21 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-22 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 85580ea12862403871e2c8acfb94decb1595e9a8 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 03:50:48 +0000 Subject: [PATCH 067/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 50af5cd..1b0360d 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-21 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-22 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From f37ce6fb85720dadf9d89a681d7ef4035bf6ebd3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 01:13:35 -0300 Subject: [PATCH 068/684] fix(ai): ensure backend token mount and annotate ollama pods --- services/bstein-dev-home/backend-deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 1b0360d..ee1e8d0 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -15,6 +15,7 @@ spec: labels: app: bstein-dev-home-backend spec: + automountServiceAccountToken: true serviceAccountName: bstein-dev-home nodeSelector: kubernetes.io/arch: arm64 -- 2.47.2 From a9bf9178e649459e497b6514cb17dd06524238a3 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 04:21:47 +0000 Subject: [PATCH 069/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 9bff4d1..cb65762 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-22 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-23 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 5f64778eebf62b67485e47c73bef849ec0747874 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 04:22:50 +0000 Subject: [PATCH 070/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index ee1e8d0..be8f4e4 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-22 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-23 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From e7abd30b1d535ed2d7c6b0b98f814df0e57db510 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 21 Dec 2025 01:31:03 -0300 Subject: [PATCH 071/684] fix(ai): increase chat timeout to 60s --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index be8f4e4..57c781e 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -32,7 +32,7 @@ spec: - name: AI_CHAT_MODEL value: qwen2.5-coder:7b-instruct-q4_0 - name: AI_CHAT_TIMEOUT_SEC - value: "20" + value: "60" - name: AI_NODE_NAME valueFrom: fieldRef: -- 2.47.2 From 5a06496fbef6b61b01361f8a56d770ebdd0ca8cf Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 04:31:48 +0000 Subject: [PATCH 072/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index cb65762..597be06 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-23 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-24 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 11463f63d12a4672cb9bf0f75a8ab6dd85bbc1de Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 04:32:51 +0000 Subject: [PATCH 073/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 57c781e..2fb624c 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-23 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-24 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From b05df744f21969e1fde3f397e26f547d62235730 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 04:39:48 +0000 Subject: [PATCH 074/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 597be06..26267dd 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-24 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-25 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 8d6d2fc8fcac9ca97f94e753e1aad8ed759089d8 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 21 Dec 2025 04:40:52 +0000 Subject: [PATCH 075/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 2fb624c..d0a584d 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-24 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-25 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 1526906d7e7f9263138abbc0e58a19aa563a5b3b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 11:24:44 -0300 Subject: [PATCH 076/684] jitsi: enable pods and fix colibri ws --- services/jitsi/deployment.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index ff81b33..0bb9c51 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -5,7 +5,7 @@ metadata: name: jitsi-prosody namespace: jitsi spec: - replicas: 0 + replicas: 1 selector: matchLabels: { app: jitsi-prosody } template: @@ -51,7 +51,7 @@ metadata: name: jitsi-jicofo namespace: jitsi spec: - replicas: 0 + replicas: 1 selector: matchLabels: { app: jitsi-jicofo } template: @@ -90,7 +90,7 @@ metadata: name: jitsi-jvb namespace: jitsi spec: - replicas: 0 + replicas: 1 selector: matchLabels: { app: jitsi-jvb } template: @@ -121,8 +121,8 @@ spec: - { name: ENABLE_COLIBRI_WEBSOCKET, value: "1" } # enables /colibri-ws # - { name: JVB_STUN_SERVERS, value: "stun.l.google.com:19302,stun1.l.google.com:19302,meet-jit-si-turnrelay.jitsi.net:443" } - { name: JVB_ENABLE_APIS, value: "rest,colibri" } - - { name: JVB_WS_DOMAIN, value: "meet.bstein.dev:443" } - - { name: JVB_WS_TLS, value: "true" } + - { name: JVB_WS_DOMAIN, value: "meet.bstein.dev" } + - { name: JVB_WS_TLS, value: "false" } # TLS ends at Traefik; keep WS HTTP - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112" } - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - { name: JVB_TCP_PORT, value: "4443" } @@ -140,7 +140,7 @@ metadata: name: jitsi-web namespace: jitsi spec: - replicas: 0 + replicas: 1 selector: matchLabels: { app: jitsi-web } template: -- 2.47.2 From d436ed73bccf26391c1fe26ab3cc6f63e720273f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 11:37:49 -0300 Subject: [PATCH 077/684] jitsi: advertise wss colibri --- services/jitsi/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 0bb9c51..53c5dc4 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -122,7 +122,7 @@ spec: # - { name: JVB_STUN_SERVERS, value: "stun.l.google.com:19302,stun1.l.google.com:19302,meet-jit-si-turnrelay.jitsi.net:443" } - { name: JVB_ENABLE_APIS, value: "rest,colibri" } - { name: JVB_WS_DOMAIN, value: "meet.bstein.dev" } - - { name: JVB_WS_TLS, value: "false" } # TLS ends at Traefik; keep WS HTTP + - { name: JVB_WS_TLS, value: "true" } # advertise wss:// for bridge channel - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112" } - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - { name: JVB_TCP_PORT, value: "4443" } -- 2.47.2 From 752e75dca4c554fc612bef48e253ab7743729445 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 11:49:31 -0300 Subject: [PATCH 078/684] jitsi: use recreate for hostPort rollout --- services/jitsi/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 53c5dc4..eaf7afa 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -91,6 +91,8 @@ metadata: namespace: jitsi spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: { app: jitsi-jvb } template: -- 2.47.2 From 356f0de253833a69b83be7829f22163c9060361e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 12:27:26 -0300 Subject: [PATCH 079/684] jitsi: advertise lan and public ips --- services/jitsi/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index eaf7afa..0bf45e0 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -125,7 +125,7 @@ spec: - { name: JVB_ENABLE_APIS, value: "rest,colibri" } - { name: JVB_WS_DOMAIN, value: "meet.bstein.dev" } - { name: JVB_WS_TLS, value: "true" } # advertise wss:// for bridge channel - - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112" } + - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112,192.168.22.22" } - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - { name: JVB_TCP_PORT, value: "4443" } volumeMounts: -- 2.47.2 From 3330eb75c7ba9778799fe37579a39ef661f83742 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:43:55 -0300 Subject: [PATCH 080/684] jitsi: add tcp harvester config for 4443 --- services/jitsi/deployment.yaml | 4 ++++ services/jitsi/kustomization.yaml | 1 + 2 files changed, 5 insertions(+) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 0bf45e0..434cd77 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -130,9 +130,13 @@ spec: - { name: JVB_TCP_PORT, value: "4443" } volumeMounts: - { name: cfg, mountPath: /config } + - { name: jvb-custom, mountPath: /config/custom-jvb.conf, subPath: custom-jvb.conf } volumes: - name: cfg persistentVolumeClaim: { claimName: jitsi-jvb-config } + - name: jvb-custom + configMap: + name: jitsi-jvb-custom-config --- diff --git a/services/jitsi/kustomization.yaml b/services/jitsi/kustomization.yaml index 8864598..c0f5ebb 100644 --- a/services/jitsi/kustomization.yaml +++ b/services/jitsi/kustomization.yaml @@ -8,3 +8,4 @@ resources: - pvc.yaml - ingress.yaml - secret.yaml + - jvb-configmap.yaml -- 2.47.2 From 638b37cb37fec30250aa9fb97bd21ec6aa2155b0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:44:07 -0300 Subject: [PATCH 081/684] jitsi: add tcp harvester config for 4443 (configmap) --- services/jitsi/jvb-configmap.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 services/jitsi/jvb-configmap.yaml diff --git a/services/jitsi/jvb-configmap.yaml b/services/jitsi/jvb-configmap.yaml new file mode 100644 index 0000000..d327a9e --- /dev/null +++ b/services/jitsi/jvb-configmap.yaml @@ -0,0 +1,17 @@ +# services/jitsi/jvb-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jitsi-jvb-custom-config + namespace: jitsi +data: + custom-jvb.conf: | + videobridge { + ice { + tcp { + enabled = true + port = 4443 + mapped-port = 4443 + } + } + } -- 2.47.2 From 5baf62c915d816f9058a5ffb6f3913fd9c2f73ed Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:45:50 -0300 Subject: [PATCH 082/684] jitsi: copy tcp custom config via init --- services/jitsi/deployment.yaml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 434cd77..3af4ed8 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -95,6 +95,18 @@ spec: type: Recreate selector: matchLabels: { app: jitsi-jvb } + initContainers: + - name: jvb-custom-config + image: busybox:1.36 + command: + - /bin/sh + - -c + - | + set -euo pipefail + cp /custom-config/custom-jvb.conf /config/custom-jvb.conf + volumeMounts: + - { name: cfg, mountPath: /config } + - { name: jvb-custom, mountPath: /custom-config } template: metadata: labels: { app: jitsi-jvb } @@ -130,7 +142,6 @@ spec: - { name: JVB_TCP_PORT, value: "4443" } volumeMounts: - { name: cfg, mountPath: /config } - - { name: jvb-custom, mountPath: /config/custom-jvb.conf, subPath: custom-jvb.conf } volumes: - name: cfg persistentVolumeClaim: { claimName: jitsi-jvb-config } -- 2.47.2 From 453776967a2cbc75ce7197a80b141a56b7dde437 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:47:18 -0300 Subject: [PATCH 083/684] jitsi: fix init container placement --- services/jitsi/deployment.yaml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 3af4ed8..bcfd4a4 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -95,22 +95,22 @@ spec: type: Recreate selector: matchLabels: { app: jitsi-jvb } - initContainers: - - name: jvb-custom-config - image: busybox:1.36 - command: - - /bin/sh - - -c - - | - set -euo pipefail - cp /custom-config/custom-jvb.conf /config/custom-jvb.conf - volumeMounts: - - { name: cfg, mountPath: /config } - - { name: jvb-custom, mountPath: /custom-config } template: metadata: labels: { app: jitsi-jvb } spec: + initContainers: + - name: jvb-custom-config + image: busybox:1.36 + command: + - /bin/sh + - -c + - | + set -euo pipefail + cp /custom-config/custom-jvb.conf /config/custom-jvb.conf + volumeMounts: + - { name: cfg, mountPath: /config } + - { name: jvb-custom, mountPath: /custom-config } nodeSelector: kubernetes.io/hostname: titan-22 kubernetes.io/arch: amd64 -- 2.47.2 From bde4002362a1561180c09d427ae21a9083735157 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:49:28 -0300 Subject: [PATCH 084/684] jitsi: force tcp harvester via system props --- services/jitsi/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index bcfd4a4..0840320 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -140,6 +140,8 @@ spec: - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112,192.168.22.22" } - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - { name: JVB_TCP_PORT, value: "4443" } + - name: JVB_OPTS + value: "-Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" volumeMounts: - { name: cfg, mountPath: /config } volumes: -- 2.47.2 From 25c32da81ea887aea04642f93179f7c7322f0750 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:51:05 -0300 Subject: [PATCH 085/684] jitsi: add sip-communicator tcp harvester props --- services/jitsi/deployment.yaml | 1 + services/jitsi/jvb-configmap.yaml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 0840320..cda62a3 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -108,6 +108,7 @@ spec: - | set -euo pipefail cp /custom-config/custom-jvb.conf /config/custom-jvb.conf + cp /custom-config/sip-communicator.properties /config/sip-communicator.properties volumeMounts: - { name: cfg, mountPath: /config } - { name: jvb-custom, mountPath: /custom-config } diff --git a/services/jitsi/jvb-configmap.yaml b/services/jitsi/jvb-configmap.yaml index d327a9e..b86d649 100644 --- a/services/jitsi/jvb-configmap.yaml +++ b/services/jitsi/jvb-configmap.yaml @@ -15,3 +15,7 @@ data: } } } + sip-communicator.properties: | + org.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false + org.jitsi.videobridge.TCP_HARVESTER_PORT=4443 + org.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443 -- 2.47.2 From 0cd6d47940bce33be23648923ea034a8a4dfb950 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 22 Dec 2025 19:57:34 +0000 Subject: [PATCH 086/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 26267dd..c93c5d6 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-25 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-26 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ebebd19a13b030f1351fc5e825eb26dea184de51 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 22 Dec 2025 19:58:37 +0000 Subject: [PATCH 087/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index d0a584d..36962d1 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-25 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-26 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From e9308b6bd19f0030fff8c21c8e3db2af0eeea539 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 22 Dec 2025 13:43:34 -0300 Subject: [PATCH 088/684] jitsi: add tcp harvester config for 4443 --- services/jitsi/deployment.yaml | 1 + services/jitsi/jvb-configmap.yaml | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index cda62a3..a8292ff 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -145,6 +145,7 @@ spec: value: "-Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" volumeMounts: - { name: cfg, mountPath: /config } + - { name: jvb-custom, mountPath: /config/custom-jvb.conf, subPath: custom-jvb.conf } volumes: - name: cfg persistentVolumeClaim: { claimName: jitsi-jvb-config } diff --git a/services/jitsi/jvb-configmap.yaml b/services/jitsi/jvb-configmap.yaml index b86d649..d327a9e 100644 --- a/services/jitsi/jvb-configmap.yaml +++ b/services/jitsi/jvb-configmap.yaml @@ -15,7 +15,3 @@ data: } } } - sip-communicator.properties: | - org.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false - org.jitsi.videobridge.TCP_HARVESTER_PORT=4443 - org.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443 -- 2.47.2 From cecde3e1977d8e595b21798914643ce6bc3c738d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 15:26:02 -0300 Subject: [PATCH 089/684] jellyfin: drop OIDC plugin and strip injected script --- services/jellyfin/deployment.yaml | 204 ++++++++---------------------- 1 file changed, 50 insertions(+), 154 deletions(-) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index fdd244a..14e9686 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -21,6 +21,31 @@ spec: labels: app: jellyfin spec: + # Clean up any lingering OIDC artifacts and strip the injected script tag + initContainers: + - name: strip-oidc + image: docker.io/jellyfin/jellyfin:10.11.5 + securityContext: + runAsUser: 0 + runAsGroup: 0 + command: + - /bin/sh + - -c + - | + set -euxo pipefail + cp -a /jellyfin/jellyfin-web/. /web-root + # remove injected OIDC script tags everywhere just in case + for f in $(find /web-root -type f -name 'index.html'); do + sed -i '/oidc\/inject/d' "$f" + printf '%s\n' "$f" + done + # clean any lingering OIDC plugin artifacts on the config volume + rm -rf "/config/plugins/OIDC Authentication_"* /config/plugins/configurations/JellyfinOIDCPlugin.v2.xml || true + volumeMounts: + - name: web-root + mountPath: /web-root + - name: config + mountPath: /config nodeSelector: jellyfin: "true" securityContext: @@ -28,142 +53,6 @@ spec: fsGroup: 65532 fsGroupChangePolicy: OnRootMismatch runAsGroup: 65532 - initContainers: - - name: fetch-oidc-plugin - image: alpine:3.20 - securityContext: - runAsUser: 0 - env: - - name: OIDC_PLUGIN_REPO - value: "registry.bstein.dev/streaming/oidc-plugin" - - name: OIDC_PLUGIN_TAG - value: "10.11.5" - - name: ORAS_USERNAME - valueFrom: - secretKeyRef: - name: harbor-robot - key: username - optional: true - - name: ORAS_PASSWORD - valueFrom: - secretKeyRef: - name: harbor-robot - key: password - optional: true - volumeMounts: - - name: oidc-plugin - mountPath: /plugin-src - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - apk add --no-cache curl tar - ORAS_VERSION=1.2.0 - curl -sSL "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin oras - ref="${OIDC_PLUGIN_REPO}:${OIDC_PLUGIN_TAG}" - cd /plugin-src - if [ -n "${ORAS_USERNAME:-}" ] && [ -n "${ORAS_PASSWORD:-}" ]; then - oras login "$(echo "${OIDC_PLUGIN_REPO}" | cut -d/ -f1)" -u "${ORAS_USERNAME}" -p "${ORAS_PASSWORD}" - fi - oras pull --allow-path-traversal "${ref}" - ls -lh /plugin-src - - name: install-oidc-plugin - image: alpine:3.20 - securityContext: - runAsUser: 0 - env: - - name: OIDC_PLUGIN_VERSION - value: "1.0.2.0" - - name: OIDC_ISSUER - value: "https://sso.bstein.dev/realms/atlas" - - name: OIDC_REDIRECT_URI - value: "https://stream.bstein.dev/api/oidc/callback" - - name: OIDC_LOGOUT_URI - value: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://stream.bstein.dev/" - - name: OIDC_SCOPES - value: "openid,profile,email" - - name: OIDC_ROLE_CLAIM - value: "groups" - - name: OIDC_CLIENT_ID - valueFrom: - secretKeyRef: - name: jellyfin-oidc - key: client-id - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: jellyfin-oidc - key: client-secret - volumeMounts: - - name: config - mountPath: /config - - name: oidc-plugin - mountPath: /plugin-src - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - if [ -z "${OIDC_CLIENT_ID:-}" ] || [ -z "${OIDC_CLIENT_SECRET:-}" ]; then - echo "OIDC_CLIENT_ID or OIDC_CLIENT_SECRET missing; create secret jellyfin-oidc" >&2 - exit 1 - fi - rm -rf "/config/plugins/LDAP Authentication_20.0.0.0" - apk add --no-cache unzip - plugin_dir="/config/plugins/OIDC Authentication_${OIDC_PLUGIN_VERSION}" - config_dir="/config/plugins/configurations" - plugin_zip="/plugin-src/OIDC_Authentication_${OIDC_PLUGIN_VERSION}-net9.zip" - if [ ! -s "${plugin_zip}" ]; then - echo "Plugin zip missing at ${plugin_zip}" >&2 - echo "Contents of /plugin-src:" >&2 - ls -lah /plugin-src >&2 || true - exit 1 - fi - rm -rf "${plugin_dir}" - mkdir -p "${plugin_dir}" "${config_dir}" - unzip -o "${plugin_zip}" -d "${plugin_dir}" - rm -f "${plugin_dir}"/Microsoft.Extensions.*.dll - cat >"${plugin_dir}/meta.json" <<'EOF' - { - "category": "Authentication", - "changelog": "OIDC SSO authentication plugin; auto user creation and role mapping", - "description": "OpenID Connect (OIDC) authentication provider for Jellyfin with SSO support.", - "guid": "a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6", - "name": "OIDC Authentication", - "overview": "Enable Single Sign-On (SSO) for Jellyfin using an OpenID Connect provider.", - "owner": "lolerskatez", - "targetAbi": "10.11.5.0", - "timestamp": "2025-12-17T04:00:00Z", - "version": "1.0.2.0", - "status": "Active", - "autoUpdate": false, - "imagePath": "", - "assemblies": [] - } - EOF - scope_lines="" - for s in $(echo "${OIDC_SCOPES}" | tr ',' ' '); do - trimmed="$(echo "${s}" | xargs)" - [ -z "${trimmed}" ] && continue - scope_lines="${scope_lines} ${trimmed}\n" - done - config_file="${config_dir}/JellyfinOIDCPlugin.v2.xml" - cat >"${config_file}" < - - ${OIDC_ISSUER} - ${OIDC_CLIENT_ID} - ${OIDC_CLIENT_SECRET} - - $(printf "%b" "${scope_lines}") - ${OIDC_ROLE_CLAIM} - ${OIDC_REDIRECT_URI} - ${OIDC_LOGOUT_URI} - - true - false - - EOF - chown -R 1000:65532 "${plugin_dir}" "${config_file}" runtimeClassName: nvidia containers: - name: jellyfin @@ -183,22 +72,6 @@ spec: value: "65532" - name: UMASK value: "002" - lifecycle: - postStart: - exec: - command: - - /bin/sh - - -c - - | - set -e - target="/jellyfin/jellyfin-web/index.html" - marker='api/oidc/inject' - if grep -q "${marker}" "${target}"; then - exit 0 - fi - tmp="$(mktemp)" - awk -v marker="${marker}" 'BEGIN{inserted=0} /<\/head>/ && !inserted {print " "; inserted=1} {print}' "${target}" > "${tmp}" - cp "${tmp}" "${target}" resources: limits: nvidia.com/gpu: 1 @@ -211,16 +84,35 @@ spec: volumeMounts: - name: config mountPath: /config + # Override LDAP plugin configuration from a secret to avoid embedding credentials in the PVC. + - name: ldap-config + mountPath: /config/plugins/configurations/LDAP-Auth.xml + subPath: ldap-config.xml - name: cache mountPath: /cache - name: media mountPath: /media + - name: web-root + mountPath: /jellyfin/jellyfin-web + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - | + set -eux + for f in $(find /jellyfin/jellyfin-web -type f -name 'index.html'); do + sed -i '/oidc\/inject/d' "$f" || true + done securityContext: runAsUser: 0 runAsGroup: 0 allowPrivilegeEscalation: false readOnlyRootFilesystem: false volumes: + - name: web-root + emptyDir: {} - name: config persistentVolumeClaim: claimName: jellyfin-config-astreae @@ -230,5 +122,9 @@ spec: - name: media persistentVolumeClaim: claimName: jellyfin-media-asteria-new - - name: oidc-plugin - emptyDir: {} + - name: ldap-config + secret: + secretName: jellyfin-ldap-config + items: + - key: ldap-config.xml + path: ldap-config.xml -- 2.47.2 From cd7ba1e8a8ea287bb324484de8c51b2f817e7d6d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 17:25:07 -0300 Subject: [PATCH 090/684] jellyfin: enforce ldap auth provider on start --- services/jellyfin/deployment.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index 14e9686..faec4f7 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -46,6 +46,28 @@ spec: mountPath: /web-root - name: config mountPath: /config + # Force all users to authenticate via the LDAP plugin provider by updating the DB on start. + # This keeps Flux enforcement for auth provider drift (e.g., after UI edits). + - name: set-ldap-auth-provider + image: docker.io/library/alpine:3.20 + securityContext: + runAsUser: 0 + runAsGroup: 0 + command: + - /bin/sh + - -c + - | + set -euxo pipefail + apk add --no-cache sqlite + db="/config/data/jellyfin.db" + if [ -f "$db" ]; then + sqlite3 "$db" "UPDATE Users SET AuthenticationProviderId='958aad66-3784-4d2a-b89a-a7b6fab6e25c', Password=NULL, EnableLocalPassword=0 WHERE AuthenticationProviderId!='958aad66-3784-4d2a-b89a-a7b6fab6e25c';" + else + echo "db not found at $db, skipping" + fi + volumeMounts: + - name: config + mountPath: /config nodeSelector: jellyfin: "true" securityContext: -- 2.47.2 From 39a8e551eb9975ad7916fc54fb2530559ff56b53 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 17:43:07 -0300 Subject: [PATCH 091/684] grafana: allow public overview via oidc --- services/monitoring/grafana-folders.yaml | 2 ++ services/monitoring/helmrelease.yaml | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/services/monitoring/grafana-folders.yaml b/services/monitoring/grafana-folders.yaml index 54b278f..24ce305 100644 --- a/services/monitoring/grafana-folders.yaml +++ b/services/monitoring/grafana-folders.yaml @@ -13,6 +13,8 @@ data: - uid: overview title: Overview permissions: + - role: Anonymous + permission: View - role: Viewer permission: View - role: Editor diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 3fd76db..d0bcda6 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -248,7 +248,8 @@ spec: service: type: ClusterIP env: - GF_AUTH_ANONYMOUS_ENABLED: "false" + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer" GF_SECURITY_ALLOW_EMBEDDING: "true" GF_AUTH_GENERIC_OAUTH_ENABLED: "true" GF_AUTH_GENERIC_OAUTH_NAME: "Keycloak" -- 2.47.2 From ad79ad0a3c6e9445a6bb72b55eeb8c500d0336ad Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 17:49:47 -0300 Subject: [PATCH 092/684] jitsi: include sip communicator tcp props --- services/jitsi/jvb-configmap.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/jitsi/jvb-configmap.yaml b/services/jitsi/jvb-configmap.yaml index d327a9e..b86d649 100644 --- a/services/jitsi/jvb-configmap.yaml +++ b/services/jitsi/jvb-configmap.yaml @@ -15,3 +15,7 @@ data: } } } + sip-communicator.properties: | + org.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false + org.jitsi.videobridge.TCP_HARVESTER_PORT=4443 + org.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443 -- 2.47.2 From 23f5f03047d6877858b0243ceba1c0158fb9c742 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 17:53:59 -0300 Subject: [PATCH 093/684] jitsi: keep tcp config on pvc only --- services/jitsi/deployment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index a8292ff..cda62a3 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -145,7 +145,6 @@ spec: value: "-Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" volumeMounts: - { name: cfg, mountPath: /config } - - { name: jvb-custom, mountPath: /config/custom-jvb.conf, subPath: custom-jvb.conf } volumes: - name: cfg persistentVolumeClaim: { claimName: jitsi-jvb-config } -- 2.47.2 From 0db786c3433b004109852102ad2b71d5a973c7fe Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 18:15:25 -0300 Subject: [PATCH 094/684] grafana,jitsi: enable pkce and tcp fallback --- services/jitsi/deployment.yaml | 2 +- services/jitsi/jvb-configmap.yaml | 1 + services/monitoring/helmrelease.yaml | 12 +++--------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index cda62a3..680ec8d 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -142,7 +142,7 @@ spec: - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - { name: JVB_TCP_PORT, value: "4443" } - name: JVB_OPTS - value: "-Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" + value: "-Dorg.jitsi.videobridge.DISABLE_TCP_HARVESTER=false -Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" volumeMounts: - { name: cfg, mountPath: /config } volumes: diff --git a/services/jitsi/jvb-configmap.yaml b/services/jitsi/jvb-configmap.yaml index b86d649..59cb165 100644 --- a/services/jitsi/jvb-configmap.yaml +++ b/services/jitsi/jvb-configmap.yaml @@ -16,6 +16,7 @@ data: } } sip-communicator.properties: | + org.jitsi.videobridge.DISABLE_TCP_HARVESTER=false org.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false org.jitsi.videobridge.TCP_HARVESTER_PORT=4443 org.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443 diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index d0bcda6..a07d207 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -248,6 +248,8 @@ spec: service: type: ClusterIP env: + GF_AUTH_GENERIC_OAUTH_CLIENT_ID: "grafana" + GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "" GF_AUTH_ANONYMOUS_ENABLED: "true" GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer" GF_SECURITY_ALLOW_EMBEDDING: "true" @@ -259,17 +261,9 @@ spec: GF_AUTH_GENERIC_OAUTH_TOKEN_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token" GF_AUTH_GENERIC_OAUTH_API_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo" GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: "contains(groups, 'admin') && 'Admin' || 'Viewer'" + GF_AUTH_GENERIC_OAUTH_USE_PKCE: "true" GF_AUTH_GENERIC_OAUTH_TLS_SKIP_VERIFY_INSECURE: "false" GF_AUTH_SIGNOUT_REDIRECT_URL: "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/logout?redirect_uri=https://metrics.bstein.dev/" - envValueFrom: - GF_AUTH_GENERIC_OAUTH_CLIENT_ID: - secretKeyRef: - name: grafana-oidc - key: client_id - GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: - secretKeyRef: - name: grafana-oidc - key: client_secret grafana.ini: server: domain: metrics.bstein.dev -- 2.47.2 From 7876e4389c2511e7e37478636217e4cbee179894 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 19:06:40 -0300 Subject: [PATCH 095/684] crypto: fetch p2pool binary at runtime --- services/crypto/xmr-miner/deployment.yaml | 37 +++++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/services/crypto/xmr-miner/deployment.yaml b/services/crypto/xmr-miner/deployment.yaml index dc24828..cf64fa4 100644 --- a/services/crypto/xmr-miner/deployment.yaml +++ b/services/crypto/xmr-miner/deployment.yaml @@ -30,11 +30,37 @@ spec: - key: hardware operator: In values: ["rpi4"] + initContainers: + - name: fetch-p2pool + image: alpine:3.20 + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + apk add --no-cache curl tar openssl >/dev/null + test -n "${P2POOL_URL}" + echo "Downloading ${P2POOL_URL}" + curl -fsSL "${P2POOL_URL}" -o /tmp/p2pool.tgz + if [ -n "${P2POOL_SHA256}" ]; then + echo "${P2POOL_SHA256} /tmp/p2pool.tgz" | sha256sum -c - + fi + mkdir -p /opt/p2pool + tar -xzf /tmp/p2pool.tgz -C /opt/p2pool + BIN="$(find /opt/p2pool -maxdepth 1 -type f -name 'p2pool*' | head -n1)" + test -n "${BIN}" + install -m0755 "${BIN}" /opt/p2pool/p2pool + env: + - name: P2POOL_URL + valueFrom: { configMapKeyRef: { name: xmr-miner-sources, key: P2POOL_URL } } + - name: P2POOL_SHA256 + valueFrom: { configMapKeyRef: { name: xmr-miner-sources, key: P2POOL_SHA256, optional: true } } + volumeMounts: + - { name: p2pool-bin, mountPath: /opt/p2pool } containers: - name: monero-p2pool - image: registry.bstein.dev/crypto/monero-p2pool:4.9 - imagePullPolicy: Always - command: ["p2pool"] + image: debian:bookworm-slim + imagePullPolicy: IfNotPresent + command: ["/opt/p2pool/p2pool"] args: - "--host" - "monerod.crypto.svc.cluster.local" @@ -61,3 +87,8 @@ spec: tcpSocket: { port: 3333 } initialDelaySeconds: 10 periodSeconds: 10 + volumeMounts: + - { name: p2pool-bin, mountPath: /opt/p2pool } + volumes: + - name: p2pool-bin + emptyDir: {} -- 2.47.2 From b71c145e6e16cbe0ffaa714fb2fa49b8e493cf17 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 19:09:40 -0300 Subject: [PATCH 096/684] crypto: download p2pool v4.9 arm64 at runtime --- services/crypto/xmr-miner/configmap-sources.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/crypto/xmr-miner/configmap-sources.yaml b/services/crypto/xmr-miner/configmap-sources.yaml index b7c7bbc..5335001 100644 --- a/services/crypto/xmr-miner/configmap-sources.yaml +++ b/services/crypto/xmr-miner/configmap-sources.yaml @@ -6,7 +6,7 @@ metadata: namespace: crypto data: # REQUIRED: set to the official p2pool ARM64 tarball URL - P2POOL_URL: "https://downloads.sourceforge.net/project/p2pool-xmr/Release/p2pool-v4.8.1-linux-aarch64.tar.gz" + P2POOL_URL: "https://github.com/SChernykh/p2pool/releases/download/v4.9/p2pool-v4.9-linux-aarch64.tar.gz" # OPTIONAL: p2pool SHA256 (exact 64-hex chars). Leave blank to skip verification. P2POOL_SHA256: "" -- 2.47.2 From 1bbb88d9a39f305cd9f18b5c8ab1fd5997a8d1e2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 19:14:44 -0300 Subject: [PATCH 097/684] crypto: fetch p2pool from github with debug --- services/crypto/xmr-miner/deployment.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/crypto/xmr-miner/deployment.yaml b/services/crypto/xmr-miner/deployment.yaml index cf64fa4..f7f4711 100644 --- a/services/crypto/xmr-miner/deployment.yaml +++ b/services/crypto/xmr-miner/deployment.yaml @@ -36,7 +36,7 @@ spec: command: ["/bin/sh","-c"] args: - | - set -euo pipefail + set -euxo pipefail apk add --no-cache curl tar openssl >/dev/null test -n "${P2POOL_URL}" echo "Downloading ${P2POOL_URL}" @@ -46,6 +46,7 @@ spec: fi mkdir -p /opt/p2pool tar -xzf /tmp/p2pool.tgz -C /opt/p2pool + ls -l /opt/p2pool BIN="$(find /opt/p2pool -maxdepth 1 -type f -name 'p2pool*' | head -n1)" test -n "${BIN}" install -m0755 "${BIN}" /opt/p2pool/p2pool -- 2.47.2 From bbe4fb2cff05626ffc1b8565899eb1e65cd4f684 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 19:16:47 -0300 Subject: [PATCH 098/684] crypto: handle nested p2pool archive layout --- services/crypto/xmr-miner/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/crypto/xmr-miner/deployment.yaml b/services/crypto/xmr-miner/deployment.yaml index f7f4711..efc00ca 100644 --- a/services/crypto/xmr-miner/deployment.yaml +++ b/services/crypto/xmr-miner/deployment.yaml @@ -47,7 +47,7 @@ spec: mkdir -p /opt/p2pool tar -xzf /tmp/p2pool.tgz -C /opt/p2pool ls -l /opt/p2pool - BIN="$(find /opt/p2pool -maxdepth 1 -type f -name 'p2pool*' | head -n1)" + BIN="$(find /opt/p2pool -maxdepth 2 -type f -name 'p2pool*' | head -n1)" test -n "${BIN}" install -m0755 "${BIN}" /opt/p2pool/p2pool env: -- 2.47.2 From cf2e4c8bb232c0e9a6850ca97930a33ad2574264 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 20:11:29 -0300 Subject: [PATCH 099/684] jitsi: require auth to start rooms; vault ui default oidc --- services/jitsi/deployment.yaml | 20 +++++++++++++++++++- services/jitsi/kustomization.yaml | 1 + services/jitsi/secret-auth-user.yaml | 9 +++++++++ services/vault/configmap.yaml | 3 +++ 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 services/jitsi/secret-auth-user.yaml diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 680ec8d..7476caf 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -15,6 +15,21 @@ spec: nodeSelector: kubernetes.io/hostname: titan-22 kubernetes.io/arch: amd64 + initContainers: + - name: prosody-bootstrap-auth + image: jitsi/prosody:stable + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + prosodyctl --config /config/prosody.cfg.lua register "${JITSI_AUTH_USER}" meet.jitsi "${JITSI_AUTH_PASSWORD}" || true + env: + - name: JITSI_AUTH_USER + valueFrom: { secretKeyRef: { name: jitsi-auth-user, key: username } } + - name: JITSI_AUTH_PASSWORD + valueFrom: { secretKeyRef: { name: jitsi-auth-user, key: password } } + volumeMounts: + - { name: cfg, mountPath: /config } containers: - name: prosody image: jitsi/prosody:stable @@ -27,8 +42,10 @@ spec: - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - - { name: ENABLE_AUTH, value: "0" } # open instance, no auth (fastest path) + - { name: ENABLE_AUTH, value: "1" } - { name: ENABLE_GUESTS, value: "1" } + - { name: AUTH_TYPE, value: "internal" } + - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - { name: JICOFO_AUTH_USER, value: "focus" } - { name: JVB_AUTH_USER, value: "jvb" } - name: JICOFO_AUTH_PASSWORD @@ -181,6 +198,7 @@ spec: - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } + - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - { name: XMPP_BOSH_URL_BASE, value: "https://meet.bstein.dev" } - { name: ENABLE_XMPP_WEBSOCKET, value: "1" } - { name: ENABLE_COLIBRI_WEBSOCKET, value: "1" } diff --git a/services/jitsi/kustomization.yaml b/services/jitsi/kustomization.yaml index c0f5ebb..117ef5e 100644 --- a/services/jitsi/kustomization.yaml +++ b/services/jitsi/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml + - secret-auth-user.yaml - deployment.yaml - service.yaml - pvc.yaml diff --git a/services/jitsi/secret-auth-user.yaml b/services/jitsi/secret-auth-user.yaml new file mode 100644 index 0000000..2a2949e --- /dev/null +++ b/services/jitsi/secret-auth-user.yaml @@ -0,0 +1,9 @@ +# services/jitsi/secret-auth-user.yaml +apiVersion: v1 +kind: Secret +metadata: + name: jitsi-auth-user + namespace: jitsi +stringData: + username: brad + password: qvUqX5foh2zyM0th diff --git a/services/vault/configmap.yaml b/services/vault/configmap.yaml index d4ffdb5..56beb4b 100644 --- a/services/vault/configmap.yaml +++ b/services/vault/configmap.yaml @@ -9,6 +9,9 @@ data: ui = true cluster_name = "vault-k8s" disable_mlock = true + ui { + default_auth_method = "oidc" + } listener "tcp" { address = "0.0.0.0:8200" -- 2.47.2 From bd21e775abc31eaf5ddc7dd8dc3fd775f6cd8c81 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 20:12:48 -0300 Subject: [PATCH 100/684] jitsi: fix prosody auth init shell --- services/jitsi/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 7476caf..66b397a 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -21,7 +21,7 @@ spec: command: ["/bin/sh","-c"] args: - | - set -euo pipefail + set -eu prosodyctl --config /config/prosody.cfg.lua register "${JITSI_AUTH_USER}" meet.jitsi "${JITSI_AUTH_PASSWORD}" || true env: - name: JITSI_AUTH_USER -- 2.47.2 From 4d47e2c693542404e3ef71acbf7198914dbd5225 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 20:16:33 -0300 Subject: [PATCH 101/684] vault: revert ui default auth block (not supported) --- services/vault/configmap.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/vault/configmap.yaml b/services/vault/configmap.yaml index 56beb4b..d4ffdb5 100644 --- a/services/vault/configmap.yaml +++ b/services/vault/configmap.yaml @@ -9,9 +9,6 @@ data: ui = true cluster_name = "vault-k8s" disable_mlock = true - ui { - default_auth_method = "oidc" - } listener "tcp" { address = "0.0.0.0:8200" -- 2.47.2 From 25758b1cd98f15de40513dc2f1038d1c58b2319f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 21:27:57 -0300 Subject: [PATCH 102/684] jitsi: enforce auth flags on web/jicofo/jvb --- services/jitsi/deployment.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 66b397a..f4f6cd3 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -86,6 +86,10 @@ spec: - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } + - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } + - { name: ENABLE_AUTH, value: "1" } + - { name: ENABLE_GUESTS, value: "1" } + - { name: AUTH_TYPE, value: "internal" } - { name: XMPP_SERVER, value: "jitsi-prosody.jitsi.svc.cluster.local" } - { name: JICOFO_AUTH_USER, value: "focus" } - name: JICOFO_AUTH_PASSWORD @@ -144,6 +148,7 @@ spec: - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } + - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - { name: XMPP_SERVER, value: "jitsi-prosody.jitsi.svc.cluster.local" } - { name: JVB_AUTH_USER, value: "jvb" } - name: JVB_AUTH_PASSWORD @@ -199,6 +204,9 @@ spec: - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } + - { name: ENABLE_AUTH, value: "1" } + - { name: ENABLE_GUESTS, value: "1" } + - { name: AUTH_TYPE, value: "internal" } - { name: XMPP_BOSH_URL_BASE, value: "https://meet.bstein.dev" } - { name: ENABLE_XMPP_WEBSOCKET, value: "1" } - { name: ENABLE_COLIBRI_WEBSOCKET, value: "1" } -- 2.47.2 From 9bbdbb5fabc922c685b095bc0357929cfc1d613f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 24 Dec 2025 21:49:59 -0300 Subject: [PATCH 103/684] ci-demo: fix image tag value --- services/ci-demo/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/ci-demo/kustomization.yaml b/services/ci-demo/kustomization.yaml index 3eb503f..3d68ef1 100644 --- a/services/ci-demo/kustomization.yaml +++ b/services/ci-demo/kustomization.yaml @@ -8,4 +8,4 @@ resources: - service.yaml images: - name: registry.bstein.dev/infra/ci-demo - newTag: registry.bstein.dev/infra/ci-demo:v0.0.0-3 # {"$imagepolicy": "flux-system:ci-demo"} + newTag: v0.0.0-3 # {"$imagepolicy": "flux-system:ci-demo"} -- 2.47.2 From fbe2490ef7d12989d7fd103f8a86d7c7bc790782 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 03:14:50 -0300 Subject: [PATCH 104/684] platform: add vault csi driver --- .../flux-system/platform/kustomization.yaml | 1 + .../platform/vault-csi/kustomization.yaml | 16 +++++++++++++++ .../sources/helm/kustomization.yaml | 1 + .../sources/helm/secrets-store-csi.yaml | 9 +++++++++ infrastructure/vault-csi/kustomization.yaml | 7 +++++++ .../vault-csi/secrets-store-csi-driver.yaml | 20 +++++++++++++++++++ .../vault-csi/vault-csi-provider.yaml | 17 ++++++++++++++++ 7 files changed, 71 insertions(+) create mode 100644 clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml create mode 100644 infrastructure/sources/helm/secrets-store-csi.yaml create mode 100644 infrastructure/vault-csi/kustomization.yaml create mode 100644 infrastructure/vault-csi/secrets-store-csi-driver.yaml create mode 100644 infrastructure/vault-csi/vault-csi-provider.yaml diff --git a/clusters/atlas/flux-system/platform/kustomization.yaml b/clusters/atlas/flux-system/platform/kustomization.yaml index 040e478..fbca36e 100644 --- a/clusters/atlas/flux-system/platform/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/kustomization.yaml @@ -8,3 +8,4 @@ resources: - gitops-ui/kustomization.yaml - monitoring/kustomization.yaml - longhorn-ui/kustomization.yaml + - ../platform/vault-csi/kustomization.yaml diff --git a/clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml b/clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml new file mode 100644 index 0000000..5a56941 --- /dev/null +++ b/clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml @@ -0,0 +1,16 @@ +# clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: vault-csi + namespace: flux-system +spec: + interval: 30m + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + path: ./infrastructure/vault-csi + prune: true + wait: true + targetNamespace: kube-system diff --git a/infrastructure/sources/helm/kustomization.yaml b/infrastructure/sources/helm/kustomization.yaml index 3ded0f1..7b2163b 100644 --- a/infrastructure/sources/helm/kustomization.yaml +++ b/infrastructure/sources/helm/kustomization.yaml @@ -10,3 +10,4 @@ resources: - harbor.yaml - prometheus.yaml - victoria-metrics.yaml + - secrets-store-csi.yaml diff --git a/infrastructure/sources/helm/secrets-store-csi.yaml b/infrastructure/sources/helm/secrets-store-csi.yaml new file mode 100644 index 0000000..1fc4ae5 --- /dev/null +++ b/infrastructure/sources/helm/secrets-store-csi.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/secrets-store-csi.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: secrets-store-csi-driver + namespace: flux-system +spec: + interval: 1h + url: https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts diff --git a/infrastructure/vault-csi/kustomization.yaml b/infrastructure/vault-csi/kustomization.yaml new file mode 100644 index 0000000..5598653 --- /dev/null +++ b/infrastructure/vault-csi/kustomization.yaml @@ -0,0 +1,7 @@ +# infrastructure/vault-csi/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kube-system +resources: + - secrets-store-csi-driver.yaml + - vault-csi-provider.yaml diff --git a/infrastructure/vault-csi/secrets-store-csi-driver.yaml b/infrastructure/vault-csi/secrets-store-csi-driver.yaml new file mode 100644 index 0000000..0b249fc --- /dev/null +++ b/infrastructure/vault-csi/secrets-store-csi-driver.yaml @@ -0,0 +1,20 @@ +# infrastructure/vault-csi/secrets-store-csi-driver.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: secrets-store-csi-driver + namespace: kube-system +spec: + interval: 15m + chart: + spec: + chart: secrets-store-csi-driver + version: "~1.3.0" + sourceRef: + kind: HelmRepository + name: secrets-store-csi-driver + namespace: flux-system + values: + syncSecret: + enabled: true + enableSecretRotation: false diff --git a/infrastructure/vault-csi/vault-csi-provider.yaml b/infrastructure/vault-csi/vault-csi-provider.yaml new file mode 100644 index 0000000..379d7ff --- /dev/null +++ b/infrastructure/vault-csi/vault-csi-provider.yaml @@ -0,0 +1,17 @@ +# infrastructure/vault-csi/vault-csi-provider.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: vault-csi-provider + namespace: kube-system +spec: + interval: 15m + chart: + spec: + chart: vault-csi-provider + version: "~1.1.0" + sourceRef: + kind: HelmRepository + name: hashicorp + namespace: flux-system + values: {} -- 2.47.2 From 5666eceec703d7185f33c071cc8dd46c4920e635 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 03:15:06 -0300 Subject: [PATCH 105/684] jitsi: use vault jwt via csi --- services/jitsi/deployment.yaml | 50 ++++++++++++++++--------- services/jitsi/kustomization.yaml | 3 +- services/jitsi/secret-auth-user.yaml | 9 ----- services/jitsi/secretproviderclass.yaml | 21 +++++++++++ services/jitsi/serviceaccount.yaml | 6 +++ 5 files changed, 61 insertions(+), 28 deletions(-) delete mode 100644 services/jitsi/secret-auth-user.yaml create mode 100644 services/jitsi/secretproviderclass.yaml create mode 100644 services/jitsi/serviceaccount.yaml diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index f4f6cd3..454fa14 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -12,24 +12,10 @@ spec: metadata: labels: { app: jitsi-prosody } spec: + serviceAccountName: jitsi nodeSelector: kubernetes.io/hostname: titan-22 kubernetes.io/arch: amd64 - initContainers: - - name: prosody-bootstrap-auth - image: jitsi/prosody:stable - command: ["/bin/sh","-c"] - args: - - | - set -eu - prosodyctl --config /config/prosody.cfg.lua register "${JITSI_AUTH_USER}" meet.jitsi "${JITSI_AUTH_PASSWORD}" || true - env: - - name: JITSI_AUTH_USER - valueFrom: { secretKeyRef: { name: jitsi-auth-user, key: username } } - - name: JITSI_AUTH_PASSWORD - valueFrom: { secretKeyRef: { name: jitsi-auth-user, key: password } } - volumeMounts: - - { name: cfg, mountPath: /config } containers: - name: prosody image: jitsi/prosody:stable @@ -44,8 +30,13 @@ spec: - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - { name: ENABLE_AUTH, value: "1" } - { name: ENABLE_GUESTS, value: "1" } - - { name: AUTH_TYPE, value: "internal" } + - { name: AUTH_TYPE, value: "jwt" } - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } + - { name: JWT_ACCEPTED_ISSUERS, value: "https://sso.bstein.dev/realms/atlas" } + - { name: JWT_ACCEPTED_AUDIENCES, value: "jitsi" } + - { name: JWT_APP_ID, value: "jitsi" } + - name: JWT_APP_SECRET + valueFrom: { secretKeyRef: { name: jitsi-jwt, key: app_secret } } - { name: JICOFO_AUTH_USER, value: "focus" } - { name: JVB_AUTH_USER, value: "jvb" } - name: JICOFO_AUTH_PASSWORD @@ -56,9 +47,16 @@ spec: valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JVB_AUTH_PASSWORD } } volumeMounts: - { name: cfg, mountPath: /config } + - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } volumes: - name: cfg persistentVolumeClaim: { claimName: jitsi-prosody-config } + - name: jwt + csi: + driver: secrets-store.csi.x-k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: jitsi-jwt --- @@ -75,6 +73,7 @@ spec: metadata: labels: { app: jitsi-jicofo } spec: + serviceAccountName: jitsi nodeSelector: kubernetes.io/hostname: titan-22 kubernetes.io/arch: amd64 @@ -89,7 +88,7 @@ spec: - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - { name: ENABLE_AUTH, value: "1" } - { name: ENABLE_GUESTS, value: "1" } - - { name: AUTH_TYPE, value: "internal" } + - { name: AUTH_TYPE, value: "jwt" } - { name: XMPP_SERVER, value: "jitsi-prosody.jitsi.svc.cluster.local" } - { name: JICOFO_AUTH_USER, value: "focus" } - name: JICOFO_AUTH_PASSWORD @@ -120,6 +119,7 @@ spec: metadata: labels: { app: jitsi-jvb } spec: + serviceAccountName: jitsi initContainers: - name: jvb-custom-config image: busybox:1.36 @@ -163,6 +163,7 @@ spec: - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112,192.168.22.22" } - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - { name: JVB_TCP_PORT, value: "4443" } + - { name: AUTH_TYPE, value: "jwt" } - name: JVB_OPTS value: "-Dorg.jitsi.videobridge.DISABLE_TCP_HARVESTER=false -Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" volumeMounts: @@ -189,6 +190,7 @@ spec: metadata: labels: { app: jitsi-web } spec: + serviceAccountName: jitsi nodeSelector: kubernetes.io/hostname: titan-22 kubernetes.io/arch: amd64 @@ -206,12 +208,24 @@ spec: - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - { name: ENABLE_AUTH, value: "1" } - { name: ENABLE_GUESTS, value: "1" } - - { name: AUTH_TYPE, value: "internal" } + - { name: AUTH_TYPE, value: "jwt" } + - { name: JWT_APP_ID, value: "jitsi" } + - { name: JWT_ACCEPTED_ISSUERS, value: "https://sso.bstein.dev/realms/atlas" } + - { name: JWT_ACCEPTED_AUDIENCES, value: "jitsi" } + - name: JWT_APP_SECRET + valueFrom: { secretKeyRef: { name: jitsi-jwt, key: app_secret } } - { name: XMPP_BOSH_URL_BASE, value: "https://meet.bstein.dev" } - { name: ENABLE_XMPP_WEBSOCKET, value: "1" } - { name: ENABLE_COLIBRI_WEBSOCKET, value: "1" } volumeMounts: - { name: cfg, mountPath: /config } + - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } volumes: - name: cfg persistentVolumeClaim: { claimName: jitsi-web-config } + - name: jwt + csi: + driver: secrets-store.csi.x-k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: jitsi-jwt diff --git a/services/jitsi/kustomization.yaml b/services/jitsi/kustomization.yaml index 117ef5e..cfa5622 100644 --- a/services/jitsi/kustomization.yaml +++ b/services/jitsi/kustomization.yaml @@ -3,7 +3,8 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml - - secret-auth-user.yaml + - serviceaccount.yaml + - secretproviderclass.yaml - deployment.yaml - service.yaml - pvc.yaml diff --git a/services/jitsi/secret-auth-user.yaml b/services/jitsi/secret-auth-user.yaml deleted file mode 100644 index 2a2949e..0000000 --- a/services/jitsi/secret-auth-user.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# services/jitsi/secret-auth-user.yaml -apiVersion: v1 -kind: Secret -metadata: - name: jitsi-auth-user - namespace: jitsi -stringData: - username: brad - password: qvUqX5foh2zyM0th diff --git a/services/jitsi/secretproviderclass.yaml b/services/jitsi/secretproviderclass.yaml new file mode 100644 index 0000000..365af60 --- /dev/null +++ b/services/jitsi/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/jitsi/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: jitsi-jwt + namespace: jitsi +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: jitsi-jwt + objects: | + - objectName: "jwt" + secretPath: "kv/data/jitsi/jwt-hs256" + secretKey: "app_secret" + secretObjects: + - secretName: jitsi-jwt + type: Opaque + data: + - objectName: "jwt" + key: app_secret diff --git a/services/jitsi/serviceaccount.yaml b/services/jitsi/serviceaccount.yaml new file mode 100644 index 0000000..ce1a1c9 --- /dev/null +++ b/services/jitsi/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/jitsi/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: jitsi + namespace: jitsi -- 2.47.2 From 2acc7a06b2d27d16942f051f44df63d2288bd32b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 03:20:13 -0300 Subject: [PATCH 106/684] vault-csi: deploy vault provider daemonset --- infrastructure/vault-csi/kustomization.yaml | 1 - .../vault-csi/vault-csi-provider.yaml | 116 ++++++++++++++++-- 2 files changed, 105 insertions(+), 12 deletions(-) diff --git a/infrastructure/vault-csi/kustomization.yaml b/infrastructure/vault-csi/kustomization.yaml index 5598653..a5d223d 100644 --- a/infrastructure/vault-csi/kustomization.yaml +++ b/infrastructure/vault-csi/kustomization.yaml @@ -1,7 +1,6 @@ # infrastructure/vault-csi/kustomization.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namespace: kube-system resources: - secrets-store-csi-driver.yaml - vault-csi-provider.yaml diff --git a/infrastructure/vault-csi/vault-csi-provider.yaml b/infrastructure/vault-csi/vault-csi-provider.yaml index 379d7ff..0b63d1c 100644 --- a/infrastructure/vault-csi/vault-csi-provider.yaml +++ b/infrastructure/vault-csi/vault-csi-provider.yaml @@ -1,17 +1,111 @@ # infrastructure/vault-csi/vault-csi-provider.yaml -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease +apiVersion: v1 +kind: ServiceAccount metadata: name: vault-csi-provider namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vault-csi-provider-clusterrole +rules: + - apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vault-csi-provider-clusterrolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vault-csi-provider-clusterrole +subjects: + - kind: ServiceAccount + name: vault-csi-provider + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: vault-csi-provider-role + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get"] + resourceNames: ["vault-csi-provider-hmac-key"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: vault-csi-provider-rolebinding + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: vault-csi-provider-role +subjects: + - kind: ServiceAccount + name: vault-csi-provider + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: vault-csi-provider + namespace: kube-system + labels: { app.kubernetes.io/name: vault-csi-provider } spec: - interval: 15m - chart: + updateStrategy: + type: RollingUpdate + selector: + matchLabels: { app.kubernetes.io/name: vault-csi-provider } + template: + metadata: + labels: { app.kubernetes.io/name: vault-csi-provider } spec: - chart: vault-csi-provider - version: "~1.1.0" - sourceRef: - kind: HelmRepository - name: hashicorp - namespace: flux-system - values: {} + serviceAccountName: vault-csi-provider + containers: + - name: provider-vault-installer + image: hashicorp/vault-csi-provider:1.7.0 + imagePullPolicy: IfNotPresent + args: + - -endpoint=/provider/vault.sock + - -log-level=info + resources: + requests: { cpu: 50m, memory: 100Mi } + limits: { cpu: 50m, memory: 100Mi } + volumeMounts: + - { name: providervol, mountPath: "/provider" } + livenessProbe: + httpGet: + path: "/health/ready" + port: 8080 + scheme: "HTTP" + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: "/health/ready" + port: 8080 + scheme: "HTTP" + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + volumes: + - name: providervol + hostPath: + path: "/var/run/secrets-store-csi-providers" + nodeSelector: + kubernetes.io/os: linux -- 2.47.2 From bb93f730d5de9a23b9db71abcfd7d2816cca7e10 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 03:36:55 -0300 Subject: [PATCH 107/684] jitsi: fix secrets-store csi driver name --- services/jitsi/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml index 454fa14..43d552c 100644 --- a/services/jitsi/deployment.yaml +++ b/services/jitsi/deployment.yaml @@ -53,7 +53,7 @@ spec: persistentVolumeClaim: { claimName: jitsi-prosody-config } - name: jwt csi: - driver: secrets-store.csi.x-k8s.io + driver: secrets-store.csi.k8s.io readOnly: true volumeAttributes: secretProviderClass: jitsi-jwt @@ -225,7 +225,7 @@ spec: persistentVolumeClaim: { claimName: jitsi-web-config } - name: jwt csi: - driver: secrets-store.csi.x-k8s.io + driver: secrets-store.csi.k8s.io readOnly: true volumeAttributes: secretProviderClass: jitsi-jwt -- 2.47.2 From 77ecf3229e17a6bb435aa7401e412414556acbb3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 03:43:17 -0300 Subject: [PATCH 108/684] vault: use dedicated service account for k8s auth --- services/vault/kustomization.yaml | 2 ++ services/vault/rbac.yaml | 13 +++++++++++++ services/vault/serviceaccount.yaml | 6 ++++++ services/vault/statefulset.yaml | 1 + 4 files changed, 22 insertions(+) create mode 100644 services/vault/rbac.yaml create mode 100644 services/vault/serviceaccount.yaml diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 9fdb061..b39fc48 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -4,6 +4,8 @@ kind: Kustomization namespace: vault resources: - namespace.yaml + - serviceaccount.yaml + - rbac.yaml - configmap.yaml - statefulset.yaml - service.yaml diff --git a/services/vault/rbac.yaml b/services/vault/rbac.yaml new file mode 100644 index 0000000..d1caa18 --- /dev/null +++ b/services/vault/rbac.yaml @@ -0,0 +1,13 @@ +# services/vault/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vault-auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: + - kind: ServiceAccount + name: vault + namespace: vault diff --git a/services/vault/serviceaccount.yaml b/services/vault/serviceaccount.yaml new file mode 100644 index 0000000..56c4181 --- /dev/null +++ b/services/vault/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/vault/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vault + namespace: vault diff --git a/services/vault/statefulset.yaml b/services/vault/statefulset.yaml index d1b22c8..bd15607 100644 --- a/services/vault/statefulset.yaml +++ b/services/vault/statefulset.yaml @@ -17,6 +17,7 @@ spec: labels: app: vault spec: + serviceAccountName: vault nodeSelector: node-role.kubernetes.io/worker: "true" kubernetes.io/arch: arm64 -- 2.47.2 From a55203a90957c257bd4ab64a594e7ba59a7b12e2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 16:33:56 -0300 Subject: [PATCH 109/684] jitsi: add vault-backed jwt launcher --- services/jitsi/kustomization.yaml | 4 + services/jitsi/launcher-configmap.yaml | 118 ++++++++++++++++++++++++ services/jitsi/launcher-deployment.yaml | 52 +++++++++++ services/jitsi/launcher-ingress.yaml | 24 +++++ services/jitsi/launcher-service.yaml | 12 +++ 5 files changed, 210 insertions(+) create mode 100644 services/jitsi/launcher-configmap.yaml create mode 100644 services/jitsi/launcher-deployment.yaml create mode 100644 services/jitsi/launcher-ingress.yaml create mode 100644 services/jitsi/launcher-service.yaml diff --git a/services/jitsi/kustomization.yaml b/services/jitsi/kustomization.yaml index cfa5622..805a967 100644 --- a/services/jitsi/kustomization.yaml +++ b/services/jitsi/kustomization.yaml @@ -6,6 +6,10 @@ resources: - serviceaccount.yaml - secretproviderclass.yaml - deployment.yaml + - launcher-configmap.yaml + - launcher-deployment.yaml + - launcher-service.yaml + - launcher-ingress.yaml - service.yaml - pvc.yaml - ingress.yaml diff --git a/services/jitsi/launcher-configmap.yaml b/services/jitsi/launcher-configmap.yaml new file mode 100644 index 0000000..c36f167 --- /dev/null +++ b/services/jitsi/launcher-configmap.yaml @@ -0,0 +1,118 @@ +# services/jitsi/launcher-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: jitsi-launcher + namespace: jitsi +data: + app.py: | + import base64 + import hashlib + import hmac + import json + import os + import time + from fastapi import FastAPI, HTTPException, Request + from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse + + ISSUER = os.getenv("JWT_ISSUER", "https://sso.bstein.dev/realms/atlas") + AUDIENCE = os.getenv("JWT_AUDIENCE", "jitsi") + APP_ID = os.getenv("JWT_APP_ID", "jitsi") + PUBLIC_URL = os.getenv("PUBLIC_URL", "https://meet.bstein.dev") + SECRET_FILE = os.getenv("JWT_SECRET_FILE", "/var/lib/jitsi-jwt/jwt") + ALLOWED_GROUPS = {g for g in os.getenv("ALLOWED_GROUPS", "").split(",") if g} + TOKEN_TTL = int(os.getenv("JWT_TTL_SECONDS", "600")) + + app = FastAPI() + + + def _b64url(data: bytes) -> bytes: + return base64.urlsafe_b64encode(data).rstrip(b"=") + + + def _read_secret() -> bytes: + raw = open(SECRET_FILE, "rb").read().strip() + try: + return bytes.fromhex(raw.decode()) + except ValueError: + return raw + + + def _sign(room: str, user: str, groups: list[str]) -> str: + now = int(time.time()) + header = {"alg": "HS256", "typ": "JWT"} + payload = { + "iss": ISSUER, + "aud": AUDIENCE, + "sub": "meet.jitsi", + "room": room, + "exp": now + TOKEN_TTL, + "nbf": now - 10, + "context": { + "user": { + "name": user, + "email": user, + "affiliation": "owner", + "groups": groups, + } + }, + "app_id": APP_ID, + } + secret = _read_secret() + signing_input = b".".join( + [ + _b64url(json.dumps(header, separators=(",", ":")).encode()), + _b64url(json.dumps(payload, separators=(",", ":")).encode()), + ] + ) + sig = _b64url(hmac.new(secret, signing_input, hashlib.sha256).digest()) + return b".".join([signing_input, sig]).decode() + + + def _render_form(message: str = "") -> HTMLResponse: + body = f""" + + +

Start a Jitsi room

+ {'

'+message+'

' if message else ''} +
+ + + +
+ + + """ + return HTMLResponse(body) + + + def _extract_groups(request: Request) -> set[str]: + raw = request.headers.get("x-auth-request-groups", "") + # Traefik forwardAuth returns comma-separated groups + return {g.strip() for g in raw.split(",") if g.strip()} + + + @app.get("/launch") + async def launch(request: Request, room: str | None = None): + user = request.headers.get("x-auth-request-email") or request.headers.get( + "x-auth-request-user", "" + ) + groups = _extract_groups(request) + if ALLOWED_GROUPS and not (groups & ALLOWED_GROUPS): + raise HTTPException(status_code=403, detail="forbidden") + if not room: + return _render_form() + room = room.strip() + if not room or "/" in room or ".." in room: + raise HTTPException(status_code=400, detail="invalid room") + token = _sign(room, user or "moderator", sorted(groups)) + join_url = f"{PUBLIC_URL}/{room}#config.jwt={token}" + accept = request.headers.get("accept", "") + if "text/html" in accept: + return RedirectResponse(join_url, status_code=302) + return JSONResponse({"room": room, "join_url": join_url, "token": token}) + + + @app.get("/") + async def root(): + return RedirectResponse("/launch") diff --git a/services/jitsi/launcher-deployment.yaml b/services/jitsi/launcher-deployment.yaml new file mode 100644 index 0000000..3d207c7 --- /dev/null +++ b/services/jitsi/launcher-deployment.yaml @@ -0,0 +1,52 @@ +# services/jitsi/launcher-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jitsi-launcher + namespace: jitsi +spec: + replicas: 1 + selector: + matchLabels: { app: jitsi-launcher } + template: + metadata: + labels: { app: jitsi-launcher } + spec: + serviceAccountName: jitsi + nodeSelector: + kubernetes.io/hostname: titan-22 + kubernetes.io/arch: amd64 + containers: + - name: launcher + image: ghcr.io/tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim + imagePullPolicy: IfNotPresent + env: + - { name: JWT_SECRET_FILE, value: "/var/lib/jitsi-jwt/jwt" } + - { name: JWT_ISSUER, value: "https://sso.bstein.dev/realms/atlas" } + - { name: JWT_AUDIENCE, value: "jitsi" } + - { name: JWT_APP_ID, value: "jitsi" } + - { name: PUBLIC_URL, value: "https://meet.bstein.dev" } + - { name: ALLOWED_GROUPS, value: "admin,jitsi-moderator" } + - { name: JWT_TTL_SECONDS, value: "600" } + ports: + - { name: http, containerPort: 80 } + volumeMounts: + - { name: app, mountPath: /app/main.py, subPath: app.py } + - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } + readinessProbe: + httpGet: + path: /launch + port: 80 + initialDelaySeconds: 5 + periodSeconds: 10 + volumes: + - name: app + configMap: + name: jitsi-launcher + defaultMode: 0444 + - name: jwt + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: jitsi-jwt diff --git a/services/jitsi/launcher-ingress.yaml b/services/jitsi/launcher-ingress.yaml new file mode 100644 index 0000000..c9f1f55 --- /dev/null +++ b/services/jitsi/launcher-ingress.yaml @@ -0,0 +1,24 @@ +# services/jitsi/launcher-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: jitsi-launcher + namespace: jitsi + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd +spec: + ingressClassName: traefik + tls: + - hosts: [ "meet.bstein.dev" ] + secretName: jitsi-meet-tls + rules: + - host: meet.bstein.dev + http: + paths: + - path: /launch + pathType: Prefix + backend: + service: + name: jitsi-launcher + port: { number: 80 } diff --git a/services/jitsi/launcher-service.yaml b/services/jitsi/launcher-service.yaml new file mode 100644 index 0000000..3ed7f5a --- /dev/null +++ b/services/jitsi/launcher-service.yaml @@ -0,0 +1,12 @@ +# services/jitsi/launcher-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: jitsi-launcher + namespace: jitsi +spec: + selector: { app: jitsi-launcher } + ports: + - name: http + port: 80 + targetPort: 80 -- 2.47.2 From a8fdcc59314d0ff9ae71d9369ab24f5465512257 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 16:35:44 -0300 Subject: [PATCH 110/684] jitsi-launcher: pull image from docker hub --- services/jitsi/launcher-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jitsi/launcher-deployment.yaml b/services/jitsi/launcher-deployment.yaml index 3d207c7..151a222 100644 --- a/services/jitsi/launcher-deployment.yaml +++ b/services/jitsi/launcher-deployment.yaml @@ -18,7 +18,7 @@ spec: kubernetes.io/arch: amd64 containers: - name: launcher - image: ghcr.io/tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim + image: docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim imagePullPolicy: IfNotPresent env: - { name: JWT_SECRET_FILE, value: "/var/lib/jitsi-jwt/jwt" } -- 2.47.2 From 0e3d36a5ae2920b524667e07b0816849ada40a66 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 16:40:37 -0300 Subject: [PATCH 111/684] jitsi-launcher: add health endpoint and readiness --- services/jitsi/launcher-configmap.yaml | 5 +++++ services/jitsi/launcher-deployment.yaml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/services/jitsi/launcher-configmap.yaml b/services/jitsi/launcher-configmap.yaml index c36f167..5877219 100644 --- a/services/jitsi/launcher-configmap.yaml +++ b/services/jitsi/launcher-configmap.yaml @@ -116,3 +116,8 @@ data: @app.get("/") async def root(): return RedirectResponse("/launch") + + + @app.get("/health") + async def health(): + return {"status": "ok"} diff --git a/services/jitsi/launcher-deployment.yaml b/services/jitsi/launcher-deployment.yaml index 151a222..53cf545 100644 --- a/services/jitsi/launcher-deployment.yaml +++ b/services/jitsi/launcher-deployment.yaml @@ -35,7 +35,7 @@ spec: - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } readinessProbe: httpGet: - path: /launch + path: /health port: 80 initialDelaySeconds: 5 periodSeconds: 10 -- 2.47.2 From c9ebcfc8692cc4aab071922452a0bd23f72a9fc9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 16:54:33 -0300 Subject: [PATCH 112/684] jitsi-launcher: allow any authenticated user (no group gate) --- services/jitsi/launcher-deployment.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/jitsi/launcher-deployment.yaml b/services/jitsi/launcher-deployment.yaml index 53cf545..b957e24 100644 --- a/services/jitsi/launcher-deployment.yaml +++ b/services/jitsi/launcher-deployment.yaml @@ -26,7 +26,8 @@ spec: - { name: JWT_AUDIENCE, value: "jitsi" } - { name: JWT_APP_ID, value: "jitsi" } - { name: PUBLIC_URL, value: "https://meet.bstein.dev" } - - { name: ALLOWED_GROUPS, value: "admin,jitsi-moderator" } + # Allow any authenticated user to mint; tighten later by setting comma list + - { name: ALLOWED_GROUPS, value: "" } - { name: JWT_TTL_SECONDS, value: "600" } ports: - { name: http, containerPort: 80 } -- 2.47.2 From c0a53e59b5b62fad7cdb5ffaba8e6957b6829911 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 25 Dec 2025 16:57:40 -0300 Subject: [PATCH 113/684] jitsi-launcher: add oauth2-proxy error middleware for redirects --- services/jitsi/launcher-ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jitsi/launcher-ingress.yaml b/services/jitsi/launcher-ingress.yaml index c9f1f55..b0b58e2 100644 --- a/services/jitsi/launcher-ingress.yaml +++ b/services/jitsi/launcher-ingress.yaml @@ -6,7 +6,7 @@ metadata: namespace: jitsi annotations: cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd spec: ingressClassName: traefik tls: -- 2.47.2 From a6bd6b8cc81c7a985f01cd4c95f2fe6c224e97ec Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 12:00:12 -0300 Subject: [PATCH 114/684] communication: add Othrys stack via Flux --- .../atlas/applications/kustomization.yaml | 2 +- .../communication/kustomization.yaml | 17 + .../applications/kustomization.yaml | 2 +- .../flux-system/platform/kustomization.yaml | 1 + .../metallb}/kustomization.yaml | 15 +- .../platform/traefik/kustomization.yaml | 1 + clusters/atlas/platform/kustomization.yaml | 1 + infrastructure/metallb/ippool.yaml | 20 + infrastructure/metallb/kustomization.yaml | 9 + infrastructure/metallb/metallb-rendered.yaml | 2411 +++++++++++++++++ infrastructure/metallb/namespace.yaml | 5 + .../metallb/patches/node-placement.yaml | 30 + infrastructure/traefik/clusterrole.yaml | 3 +- infrastructure/traefik/kustomization.yaml | 1 + .../traefik/traefik-service-lb.yaml | 24 + .../communication/atlasbot-configmap.yaml | 131 + .../communication/atlasbot-credentials.yaml | 9 + .../communication/atlasbot-deployment.yaml | 61 + services/communication/chat-ai-keys.yaml | 9 + services/communication/coturn.yaml | 323 +++ .../communication/element-call-config.yaml | 25 + .../element-call-deployment.yaml | 78 + services/communication/element-rendered.yaml | 223 ++ services/communication/guest-name-job.yaml | 89 + services/communication/kustomization.yaml | 24 + services/communication/livekit-config.yaml | 21 + services/communication/livekit-ingress.yaml | 28 + .../communication/livekit-middlewares.yaml | 48 + .../livekit-token-deployment.yaml | 69 + services/communication/livekit.yaml | 120 + services/communication/namespace.yaml | 5 + services/communication/pin-othrys-job.yaml | 68 + services/communication/seed-othrys-room.yaml | 135 + services/communication/synapse-rendered.yaml | 1155 ++++++++ services/communication/values-element.yaml | 59 + services/communication/values-synapse.yaml | 132 + services/communication/wellknown.yaml | 109 + services/mailu/front-lb.yaml | 42 + services/mailu/kustomization.yaml | 1 + 39 files changed, 5494 insertions(+), 12 deletions(-) create mode 100644 clusters/atlas/flux-system/applications/communication/kustomization.yaml rename clusters/atlas/flux-system/{applications/jitsi => platform/metallb}/kustomization.yaml (53%) create mode 100644 infrastructure/metallb/ippool.yaml create mode 100644 infrastructure/metallb/kustomization.yaml create mode 100644 infrastructure/metallb/metallb-rendered.yaml create mode 100644 infrastructure/metallb/namespace.yaml create mode 100644 infrastructure/metallb/patches/node-placement.yaml create mode 100644 infrastructure/traefik/traefik-service-lb.yaml create mode 100644 services/communication/atlasbot-configmap.yaml create mode 100644 services/communication/atlasbot-credentials.yaml create mode 100644 services/communication/atlasbot-deployment.yaml create mode 100644 services/communication/chat-ai-keys.yaml create mode 100644 services/communication/coturn.yaml create mode 100644 services/communication/element-call-config.yaml create mode 100644 services/communication/element-call-deployment.yaml create mode 100644 services/communication/element-rendered.yaml create mode 100644 services/communication/guest-name-job.yaml create mode 100644 services/communication/kustomization.yaml create mode 100644 services/communication/livekit-config.yaml create mode 100644 services/communication/livekit-ingress.yaml create mode 100644 services/communication/livekit-middlewares.yaml create mode 100644 services/communication/livekit-token-deployment.yaml create mode 100644 services/communication/livekit.yaml create mode 100644 services/communication/namespace.yaml create mode 100644 services/communication/pin-othrys-job.yaml create mode 100644 services/communication/seed-othrys-room.yaml create mode 100644 services/communication/synapse-rendered.yaml create mode 100644 services/communication/values-element.yaml create mode 100644 services/communication/values-synapse.yaml create mode 100644 services/communication/wellknown.yaml create mode 100644 services/mailu/front-lb.yaml diff --git a/clusters/atlas/applications/kustomization.yaml b/clusters/atlas/applications/kustomization.yaml index a32ec81..c25257b 100644 --- a/clusters/atlas/applications/kustomization.yaml +++ b/clusters/atlas/applications/kustomization.yaml @@ -5,7 +5,7 @@ resources: - ../../services/crypto - ../../services/gitea - ../../services/jellyfin - - ../../services/jitsi + - ../../services/communication - ../../services/monitoring - ../../services/pegasus - ../../services/vault diff --git a/clusters/atlas/flux-system/applications/communication/kustomization.yaml b/clusters/atlas/flux-system/applications/communication/kustomization.yaml new file mode 100644 index 0000000..0d3b07a --- /dev/null +++ b/clusters/atlas/flux-system/applications/communication/kustomization.yaml @@ -0,0 +1,17 @@ +# clusters/atlas/flux-system/applications/communication/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: communication + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/communication + targetNamespace: communication + timeout: 2m + dependsOn: + - name: traefik diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index b5a5e62..e1d1feb 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -4,7 +4,7 @@ kind: Kustomization resources: - gitea/kustomization.yaml - vault/kustomization.yaml - - jitsi/kustomization.yaml + - communication/kustomization.yaml - crypto/kustomization.yaml - monerod/kustomization.yaml - pegasus/kustomization.yaml diff --git a/clusters/atlas/flux-system/platform/kustomization.yaml b/clusters/atlas/flux-system/platform/kustomization.yaml index fbca36e..e1c5d23 100644 --- a/clusters/atlas/flux-system/platform/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization resources: - core/kustomization.yaml - helm/kustomization.yaml + - metallb/kustomization.yaml - traefik/kustomization.yaml - gitops-ui/kustomization.yaml - monitoring/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/jitsi/kustomization.yaml b/clusters/atlas/flux-system/platform/metallb/kustomization.yaml similarity index 53% rename from clusters/atlas/flux-system/applications/jitsi/kustomization.yaml rename to clusters/atlas/flux-system/platform/metallb/kustomization.yaml index 8e96feb..98baaff 100644 --- a/clusters/atlas/flux-system/applications/jitsi/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/metallb/kustomization.yaml @@ -1,19 +1,16 @@ -# clusters/atlas/flux-system/applications/jitsi/kustomization.yaml +# clusters/atlas/flux-system/platform/metallb/kustomization.yaml apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: jitsi + name: metallb namespace: flux-system spec: - interval: 10m - path: ./services/jitsi - targetNamespace: jitsi - prune: true + interval: 30m sourceRef: kind: GitRepository name: flux-system namespace: flux-system - dependsOn: - - name: core + path: ./infrastructure/metallb + prune: true wait: true - timeout: 5m + targetNamespace: metallb-system diff --git a/clusters/atlas/flux-system/platform/traefik/kustomization.yaml b/clusters/atlas/flux-system/platform/traefik/kustomization.yaml index 0f53de7..336eb89 100644 --- a/clusters/atlas/flux-system/platform/traefik/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/traefik/kustomization.yaml @@ -15,4 +15,5 @@ spec: namespace: flux-system dependsOn: - name: core + - name: metallb wait: true diff --git a/clusters/atlas/platform/kustomization.yaml b/clusters/atlas/platform/kustomization.yaml index c7b144a..43fa993 100644 --- a/clusters/atlas/platform/kustomization.yaml +++ b/clusters/atlas/platform/kustomization.yaml @@ -5,3 +5,4 @@ resources: - ../../../infrastructure/modules/base - ../../../infrastructure/modules/profiles/atlas-ha - ../../../infrastructure/sources/cert-manager/letsencrypt.yaml + - ../../../infrastructure/metallb diff --git a/infrastructure/metallb/ippool.yaml b/infrastructure/metallb/ippool.yaml new file mode 100644 index 0000000..e792280 --- /dev/null +++ b/infrastructure/metallb/ippool.yaml @@ -0,0 +1,20 @@ +# infrastructure/metallb/ippool.yaml +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: communication-pool + namespace: metallb-system +spec: + addresses: + - 192.168.22.4-192.168.22.6 + - 192.168.22.9-192.168.22.9 + autoAssign: true +--- +apiVersion: metallb.io/v1beta1 +kind: L2Advertisement +metadata: + name: communication-adv + namespace: metallb-system +spec: + ipAddressPools: + - communication-pool diff --git a/infrastructure/metallb/kustomization.yaml b/infrastructure/metallb/kustomization.yaml new file mode 100644 index 0000000..f6df7e6 --- /dev/null +++ b/infrastructure/metallb/kustomization.yaml @@ -0,0 +1,9 @@ +# infrastructure/metallb/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - metallb-rendered.yaml + - ippool.yaml +patchesStrategicMerge: + - patches/node-placement.yaml diff --git a/infrastructure/metallb/metallb-rendered.yaml b/infrastructure/metallb/metallb-rendered.yaml new file mode 100644 index 0000000..0f8ad10 --- /dev/null +++ b/infrastructure/metallb/metallb-rendered.yaml @@ -0,0 +1,2411 @@ +--- +# Source: metallb/templates/service-accounts.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: metallb-controller + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: controller +--- +# Source: metallb/templates/service-accounts.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: metallb-speaker + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: speaker +--- +# Source: metallb/templates/webhooks.yaml +apiVersion: v1 +kind: Secret +metadata: + name: metallb-webhook-cert + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +--- +# Source: metallb/templates/exclude-l2-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: metallb-excludel2 + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +data: + excludel2.yaml: | + announcedInterfacesToExclude: + - ^docker.* + - ^cbr.* + - ^dummy.* + - ^virbr.* + - ^lxcbr.* + - ^veth.* + - ^lo$ + - ^cali.* + - ^tunl.* + - ^flannel.* + - ^kube-ipvs.* + - ^cni.* + - ^nodelocaldns.* + - ^lxc.* +--- +# Source: metallb/templates/speaker.yaml +# FRR expects to have these files owned by frr:frr on startup. +# Having them in a ConfigMap allows us to modify behaviors: for example enabling more daemons on startup. +apiVersion: v1 +kind: ConfigMap +metadata: + name: metallb-frr-startup + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: speaker +data: + daemons: | + # This file tells the frr package which daemons to start. + # + # Sample configurations for these daemons can be found in + # /usr/share/doc/frr/examples/. + # + # ATTENTION: + # + # When activating a daemon for the first time, a config file, even if it is + # empty, has to be present *and* be owned by the user and group "frr", else + # the daemon will not be started by /etc/init.d/frr. The permissions should + # be u=rw,g=r,o=. + # When using "vtysh" such a config file is also needed. It should be owned by + # group "frrvty" and set to ug=rw,o= though. Check /etc/pam.d/frr, too. + # + # The watchfrr and zebra daemons are always started. + # + bgpd=yes + ospfd=no + ospf6d=no + ripd=no + ripngd=no + isisd=no + pimd=no + ldpd=no + nhrpd=no + eigrpd=no + babeld=no + sharpd=no + pbrd=no + bfdd=yes + fabricd=no + vrrpd=no + + # + # If this option is set the /etc/init.d/frr script automatically loads + # the config via "vtysh -b" when the servers are started. + # Check /etc/pam.d/frr if you intend to use "vtysh"! + # + vtysh_enable=yes + zebra_options=" -A 127.0.0.1 -s 90000000 --limit-fds 100000" + bgpd_options=" -A 127.0.0.1 -p 0 --limit-fds 100000" + ospfd_options=" -A 127.0.0.1" + ospf6d_options=" -A ::1" + ripd_options=" -A 127.0.0.1" + ripngd_options=" -A ::1" + isisd_options=" -A 127.0.0.1" + pimd_options=" -A 127.0.0.1" + ldpd_options=" -A 127.0.0.1" + nhrpd_options=" -A 127.0.0.1" + eigrpd_options=" -A 127.0.0.1" + babeld_options=" -A 127.0.0.1" + sharpd_options=" -A 127.0.0.1" + pbrd_options=" -A 127.0.0.1" + staticd_options="-A 127.0.0.1 --limit-fds 100000" + bfdd_options=" -A 127.0.0.1 --limit-fds 100000" + fabricd_options="-A 127.0.0.1" + vrrpd_options=" -A 127.0.0.1" + + # configuration profile + # + #frr_profile="traditional" + #frr_profile="datacenter" + + # + # This is the maximum number of FD's that will be available. + # Upon startup this is read by the control files and ulimit + # is called. Uncomment and use a reasonable value for your + # setup if you are expecting a large number of peers in + # say BGP. + #MAX_FDS=1024 + + # The list of daemons to watch is automatically generated by the init script. + #watchfrr_options="" + + # for debugging purposes, you can specify a "wrap" command to start instead + # of starting the daemon directly, e.g. to use valgrind on ospfd: + # ospfd_wrap="/usr/bin/valgrind" + # or you can use "all_wrap" for all daemons, e.g. to use perf record: + # all_wrap="/usr/bin/perf record --call-graph -" + # the normal daemon command is added to this at the end. + vtysh.conf: |+ + service integrated-vtysh-config + frr.conf: |+ + ! This file gets overriden the first time the speaker renders a config. + ! So anything configured here is only temporary. + frr version 8.0 + frr defaults traditional + hostname Router + line vty + log file /etc/frr/frr.log informational +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: bfdprofiles.metallb.io +spec: + group: metallb.io + names: + kind: BFDProfile + listKind: BFDProfileList + plural: bfdprofiles + singular: bfdprofile + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.passiveMode + name: Passive Mode + type: boolean + - jsonPath: .spec.transmitInterval + name: Transmit Interval + type: integer + - jsonPath: .spec.receiveInterval + name: Receive Interval + type: integer + - jsonPath: .spec.detectMultiplier + name: Multiplier + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + BFDProfile represents the settings of the bfd session that can be + optionally associated with a BGP session. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: BFDProfileSpec defines the desired state of BFDProfile. + properties: + detectMultiplier: + description: |- + Configures the detection multiplier to determine + packet loss. The remote transmission interval will be multiplied + by this value to determine the connection loss detection timer. + format: int32 + maximum: 255 + minimum: 2 + type: integer + echoInterval: + description: |- + Configures the minimal echo receive transmission + interval that this system is capable of handling in milliseconds. + Defaults to 50ms + format: int32 + maximum: 60000 + minimum: 10 + type: integer + echoMode: + description: |- + Enables or disables the echo transmission mode. + This mode is disabled by default, and not supported on multi + hops setups. + type: boolean + minimumTtl: + description: |- + For multi hop sessions only: configure the minimum + expected TTL for an incoming BFD control packet. + format: int32 + maximum: 254 + minimum: 1 + type: integer + passiveMode: + description: |- + Mark session as passive: a passive session will not + attempt to start the connection and will wait for control packets + from peer before it begins replying. + type: boolean + receiveInterval: + description: |- + The minimum interval that this system is capable of + receiving control packets in milliseconds. + Defaults to 300ms. + format: int32 + maximum: 60000 + minimum: 10 + type: integer + transmitInterval: + description: |- + The minimum transmission interval (less jitter) + that this system wants to use to send BFD control packets in + milliseconds. Defaults to 300ms + format: int32 + maximum: 60000 + minimum: 10 + type: integer + type: object + status: + description: BFDProfileStatus defines the observed state of BFDProfile. + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: bgpadvertisements.metallb.io +spec: + group: metallb.io + names: + kind: BGPAdvertisement + listKind: BGPAdvertisementList + plural: bgpadvertisements + singular: bgpadvertisement + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.ipAddressPools + name: IPAddressPools + type: string + - jsonPath: .spec.ipAddressPoolSelectors + name: IPAddressPool Selectors + type: string + - jsonPath: .spec.peers + name: Peers + type: string + - jsonPath: .spec.nodeSelectors + name: Node Selectors + priority: 10 + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + BGPAdvertisement allows to advertise the IPs coming + from the selected IPAddressPools via BGP, setting the parameters of the + BGP Advertisement. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: BGPAdvertisementSpec defines the desired state of BGPAdvertisement. + properties: + aggregationLength: + default: 32 + description: The aggregation-length advertisement option lets you “roll up” the /32s into a larger prefix. Defaults to 32. Works for IPv4 addresses. + format: int32 + minimum: 1 + type: integer + aggregationLengthV6: + default: 128 + description: The aggregation-length advertisement option lets you “roll up” the /128s into a larger prefix. Defaults to 128. Works for IPv6 addresses. + format: int32 + type: integer + communities: + description: |- + The BGP communities to be associated with the announcement. Each item can be a standard community of the + form 1234:1234, a large community of the form large:1234:1234:1234 or the name of an alias defined in the + Community CRD. + items: + type: string + type: array + ipAddressPoolSelectors: + description: |- + A selector for the IPAddressPools which would get advertised via this advertisement. + If no IPAddressPool is selected by this or by the list, the advertisement is applied to all the IPAddressPools. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + ipAddressPools: + description: The list of IPAddressPools to advertise via this advertisement, selected by name. + items: + type: string + type: array + localPref: + description: |- + The BGP LOCAL_PREF attribute which is used by BGP best path algorithm, + Path with higher localpref is preferred over one with lower localpref. + format: int32 + type: integer + nodeSelectors: + description: NodeSelectors allows to limit the nodes to announce as next hops for the LoadBalancer IP. When empty, all the nodes having are announced as next hops. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + peers: + description: |- + Peers limits the bgppeer to advertise the ips of the selected pools to. + When empty, the loadbalancer IP is announced to all the BGPPeers configured. + items: + type: string + type: array + type: object + status: + description: BGPAdvertisementStatus defines the observed state of BGPAdvertisement. + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: bgppeers.metallb.io +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /convert + conversionReviewVersions: + - v1beta1 + - v1beta2 + group: metallb.io + names: + kind: BGPPeer + listKind: BGPPeerList + plural: bgppeers + singular: bgppeer + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.peerAddress + name: Address + type: string + - jsonPath: .spec.peerASN + name: ASN + type: string + - jsonPath: .spec.bfdProfile + name: BFD Profile + type: string + - jsonPath: .spec.ebgpMultiHop + name: Multi Hops + type: string + deprecated: true + deprecationWarning: v1beta1 is deprecated, please use v1beta2 + name: v1beta1 + schema: + openAPIV3Schema: + description: BGPPeer is the Schema for the peers API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: BGPPeerSpec defines the desired state of Peer. + properties: + bfdProfile: + type: string + ebgpMultiHop: + description: EBGP peer is multi-hops away + type: boolean + holdTime: + description: Requested BGP hold time, per RFC4271. + type: string + keepaliveTime: + description: Requested BGP keepalive time, per RFC4271. + type: string + myASN: + description: AS number to use for the local end of the session. + format: int32 + maximum: 4294967295 + minimum: 0 + type: integer + nodeSelectors: + description: |- + Only connect to this peer on nodes that match one of these + selectors. + items: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + minItems: 1 + type: array + required: + - key + - operator + - values + type: object + type: array + matchLabels: + additionalProperties: + type: string + type: object + type: object + type: array + password: + description: Authentication password for routers enforcing TCP MD5 authenticated sessions + type: string + peerASN: + description: AS number to expect from the remote end of the session. + format: int32 + maximum: 4294967295 + minimum: 0 + type: integer + peerAddress: + description: Address to dial when establishing the session. + type: string + peerPort: + description: Port to dial when establishing the session. + maximum: 16384 + minimum: 0 + type: integer + routerID: + description: BGP router ID to advertise to the peer + type: string + sourceAddress: + description: Source address to use when establishing the session. + type: string + required: + - myASN + - peerASN + - peerAddress + type: object + status: + description: BGPPeerStatus defines the observed state of Peer. + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .spec.peerAddress + name: Address + type: string + - jsonPath: .spec.peerASN + name: ASN + type: string + - jsonPath: .spec.bfdProfile + name: BFD Profile + type: string + - jsonPath: .spec.ebgpMultiHop + name: Multi Hops + type: string + name: v1beta2 + schema: + openAPIV3Schema: + description: BGPPeer is the Schema for the peers API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: BGPPeerSpec defines the desired state of Peer. + properties: + bfdProfile: + description: The name of the BFD Profile to be used for the BFD session associated to the BGP session. If not set, the BFD session won't be set up. + type: string + connectTime: + description: Requested BGP connect time, controls how long BGP waits between connection attempts to a neighbor. + type: string + x-kubernetes-validations: + - message: connect time should be between 1 seconds to 65535 + rule: duration(self).getSeconds() >= 1 && duration(self).getSeconds() <= 65535 + - message: connect time should contain a whole number of seconds + rule: duration(self).getMilliseconds() % 1000 == 0 + disableMP: + default: false + description: |- + To set if we want to disable MP BGP that will separate IPv4 and IPv6 route exchanges into distinct BGP sessions. + Deprecated: DisableMP is deprecated in favor of dualStackAddressFamily. + type: boolean + dualStackAddressFamily: + default: false + description: |- + To set if we want to enable the neighbor not only for the ipfamily related to its session, + but also the other one. This allows to advertise/receive IPv4 prefixes over IPv6 sessions and vice versa. + type: boolean + dynamicASN: + description: |- + DynamicASN detects the AS number to use for the remote end of the session + without explicitly setting it via the ASN field. Limited to: + internal - if the neighbor's ASN is different than MyASN connection is denied. + external - if the neighbor's ASN is the same as MyASN the connection is denied. + ASN and DynamicASN are mutually exclusive and one of them must be specified. + enum: + - internal + - external + type: string + ebgpMultiHop: + description: To set if the BGPPeer is multi-hops away. Needed for FRR mode only. + type: boolean + enableGracefulRestart: + description: |- + EnableGracefulRestart allows BGP peer to continue to forward data packets + along known routes while the routing protocol information is being + restored. This field is immutable because it requires restart of the BGP + session. Supported for FRR mode only. + type: boolean + x-kubernetes-validations: + - message: EnableGracefulRestart cannot be changed after creation + rule: self == oldSelf + holdTime: + description: Requested BGP hold time, per RFC4271. + type: string + interface: + description: |- + Interface is the node interface over which the unnumbered BGP peering will + be established. No API validation takes place as that string value + represents an interface name on the host and if user provides an invalid + value, only the actual BGP session will not be established. + Address and Interface are mutually exclusive and one of them must be specified. + type: string + keepaliveTime: + description: Requested BGP keepalive time, per RFC4271. + type: string + myASN: + description: AS number to use for the local end of the session. + format: int32 + maximum: 4294967295 + minimum: 0 + type: integer + nodeSelectors: + description: |- + Only connect to this peer on nodes that match one of these + selectors. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + password: + description: Authentication password for routers enforcing TCP MD5 authenticated sessions + type: string + passwordSecret: + description: |- + passwordSecret is name of the authentication secret for BGP Peer. + the secret must be of type "kubernetes.io/basic-auth", and created in the + same namespace as the MetalLB deployment. The password is stored in the + secret as the key "password". + properties: + name: + description: name is unique within a namespace to reference a secret resource. + type: string + namespace: + description: namespace defines the space within which the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + peerASN: + description: |- + AS number to expect from the remote end of the session. + ASN and DynamicASN are mutually exclusive and one of them must be specified. + format: int32 + maximum: 4294967295 + minimum: 0 + type: integer + peerAddress: + description: Address to dial when establishing the session. + type: string + peerPort: + default: 179 + description: Port to dial when establishing the session. + maximum: 16384 + minimum: 1 + type: integer + routerID: + description: BGP router ID to advertise to the peer + type: string + sourceAddress: + description: Source address to use when establishing the session. + type: string + vrf: + description: |- + To set if we want to peer with the BGPPeer using an interface belonging to + a host vrf + type: string + required: + - myASN + type: object + status: + description: BGPPeerStatus defines the observed state of Peer. + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: communities.metallb.io +spec: + group: metallb.io + names: + kind: Community + listKind: CommunityList + plural: communities + singular: community + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: |- + Community is a collection of aliases for communities. + Users can define named aliases to be used in the BGPPeer CRD. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CommunitySpec defines the desired state of Community. + properties: + communities: + items: + properties: + name: + description: The name of the alias for the community. + type: string + value: + description: |- + The BGP community value corresponding to the given name. Can be a standard community of the form 1234:1234 + or a large community of the form large:1234:1234:1234. + type: string + type: object + type: array + type: object + status: + description: CommunityStatus defines the observed state of Community. + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: configurationstates.metallb.io +spec: + group: metallb.io + names: + kind: ConfigurationState + listKind: ConfigurationStateList + plural: configurationstates + singular: configurationstate + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.result + name: Result + type: string + - jsonPath: .status.errorSummary + name: ErrorSummary + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + ConfigurationState is a status-only CRD that reports configuration validation results from MetalLB components. + Labels: + - metallb.io/component-type: "controller" or "speaker" + - metallb.io/node-name: node name (only for speaker) + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + status: + description: ConfigurationStateStatus defines the observed state of ConfigurationState. + properties: + conditions: + description: Conditions contains the status conditions from the reconcilers running in this component. + items: + description: Condition contains details for one aspect of the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + errorSummary: + description: |- + ErrorSummary contains the aggregated error messages from reconciliation failures. + This field is empty when Result is "Valid". + type: string + result: + description: Result indicates the configuration validation result. + enum: + - Valid + - Invalid + - Unknown + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: ipaddresspools.metallb.io +spec: + group: metallb.io + names: + kind: IPAddressPool + listKind: IPAddressPoolList + plural: ipaddresspools + singular: ipaddresspool + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.autoAssign + name: Auto Assign + type: boolean + - jsonPath: .spec.avoidBuggyIPs + name: Avoid Buggy IPs + type: boolean + - jsonPath: .spec.addresses + name: Addresses + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + IPAddressPool represents a pool of IP addresses that can be allocated + to LoadBalancer services. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: IPAddressPoolSpec defines the desired state of IPAddressPool. + properties: + addresses: + description: |- + A list of IP address ranges over which MetalLB has authority. + You can list multiple ranges in a single pool, they will all share the + same settings. Each range can be either a CIDR prefix, or an explicit + start-end range of IPs. + items: + type: string + type: array + autoAssign: + default: true + description: |- + AutoAssign flag used to prevent MetallB from automatic allocation + for a pool. + type: boolean + avoidBuggyIPs: + default: false + description: |- + AvoidBuggyIPs prevents addresses ending with .0 and .255 + to be used by a pool. + type: boolean + serviceAllocation: + description: |- + AllocateTo makes ip pool allocation to specific namespace and/or service. + The controller will use the pool with lowest value of priority in case of + multiple matches. A pool with no priority set will be used only if the + pools with priority can't be used. If multiple matching IPAddressPools are + available it will check for the availability of IPs sorting the matching + IPAddressPools by priority, starting from the highest to the lowest. If + multiple IPAddressPools have the same priority, choice will be random. + properties: + namespaceSelectors: + description: |- + NamespaceSelectors list of label selectors to select namespace(s) for ip pool, + an alternative to using namespace list. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + namespaces: + description: Namespaces list of namespace(s) on which ip pool can be attached. + items: + type: string + type: array + priority: + description: Priority priority given for ip pool while ip allocation on a service. + type: integer + serviceSelectors: + description: |- + ServiceSelectors list of label selector to select service(s) for which ip pool + can be used for ip allocation. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + type: object + required: + - addresses + type: object + status: + description: IPAddressPoolStatus defines the observed state of IPAddressPool. + properties: + assignedIPv4: + description: AssignedIPv4 is the number of assigned IPv4 addresses. + format: int64 + type: integer + assignedIPv6: + description: AssignedIPv6 is the number of assigned IPv6 addresses. + format: int64 + type: integer + availableIPv4: + description: AvailableIPv4 is the number of available IPv4 addresses. + format: int64 + type: integer + availableIPv6: + description: AvailableIPv6 is the number of available IPv6 addresses. + format: int64 + type: integer + required: + - assignedIPv4 + - assignedIPv6 + - availableIPv4 + - availableIPv6 + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: l2advertisements.metallb.io +spec: + group: metallb.io + names: + kind: L2Advertisement + listKind: L2AdvertisementList + plural: l2advertisements + singular: l2advertisement + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.ipAddressPools + name: IPAddressPools + type: string + - jsonPath: .spec.ipAddressPoolSelectors + name: IPAddressPool Selectors + type: string + - jsonPath: .spec.interfaces + name: Interfaces + type: string + - jsonPath: .spec.nodeSelectors + name: Node Selectors + priority: 10 + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + L2Advertisement allows to advertise the LoadBalancer IPs provided + by the selected pools via L2. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: L2AdvertisementSpec defines the desired state of L2Advertisement. + properties: + interfaces: + description: |- + A list of interfaces to announce from. The LB IP will be announced only from these interfaces. + If the field is not set, we advertise from all the interfaces on the host. + items: + type: string + type: array + ipAddressPoolSelectors: + description: |- + A selector for the IPAddressPools which would get advertised via this advertisement. + If no IPAddressPool is selected by this or by the list, the advertisement is applied to all the IPAddressPools. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + ipAddressPools: + description: The list of IPAddressPools to advertise via this advertisement, selected by name. + items: + type: string + type: array + nodeSelectors: + description: NodeSelectors allows to limit the nodes to announce as next hops for the LoadBalancer IP. When empty, all the nodes having are announced as next hops. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + type: object + status: + description: L2AdvertisementStatus defines the observed state of L2Advertisement. + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: servicebgpstatuses.metallb.io +spec: + group: metallb.io + names: + kind: ServiceBGPStatus + listKind: ServiceBGPStatusList + plural: servicebgpstatuses + singular: servicebgpstatus + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.node + name: Node + type: string + - jsonPath: .status.serviceName + name: Service Name + type: string + - jsonPath: .status.serviceNamespace + name: Service Namespace + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: ServiceBGPStatus exposes the BGP peers a service is configured to be advertised to, per relevant node. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ServiceBGPStatusSpec defines the desired state of ServiceBGPStatus. + type: object + status: + description: MetalLBServiceBGPStatus defines the observed state of ServiceBGPStatus. + properties: + node: + description: Node indicates the node announcing the service. + type: string + x-kubernetes-validations: + - message: Value is immutable + rule: self == oldSelf + peers: + description: |- + Peers indicate the BGP peers for which the service is configured to be advertised to. + The service being actually advertised to a given peer depends on the session state and is not indicated here. + items: + type: string + type: array + serviceName: + description: ServiceName indicates the service this status represents. + type: string + x-kubernetes-validations: + - message: Value is immutable + rule: self == oldSelf + serviceNamespace: + description: ServiceNamespace indicates the namespace of the service. + type: string + x-kubernetes-validations: + - message: Value is immutable + rule: self == oldSelf + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/charts/crds/templates/crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: servicel2statuses.metallb.io +spec: + group: metallb.io + names: + kind: ServiceL2Status + listKind: ServiceL2StatusList + plural: servicel2statuses + singular: servicel2status + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.node + name: Allocated Node + type: string + - jsonPath: .status.serviceName + name: Service Name + type: string + - jsonPath: .status.serviceNamespace + name: Service Namespace + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: ServiceL2Status reveals the actual traffic status of loadbalancer services in layer2 mode. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ServiceL2StatusSpec defines the desired state of ServiceL2Status. + type: object + status: + description: MetalLBServiceL2Status defines the observed state of ServiceL2Status. + properties: + interfaces: + description: Interfaces indicates the interfaces that receive the directed traffic + items: + description: InterfaceInfo defines interface info of layer2 announcement. + properties: + name: + description: Name the name of network interface card + type: string + type: object + type: array + node: + description: Node indicates the node that receives the directed traffic + type: string + x-kubernetes-validations: + - message: Value is immutable + rule: self == oldSelf + serviceName: + description: ServiceName indicates the service this status represents + type: string + x-kubernetes-validations: + - message: Value is immutable + rule: self == oldSelf + serviceNamespace: + description: ServiceNamespace indicates the namespace of the service + type: string + x-kubernetes-validations: + - message: Value is immutable + rule: self == oldSelf + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metallb:controller + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +rules: +- apiGroups: [""] + resources: ["services", "namespaces"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["nodes"] + verbs: ["list"] +- apiGroups: [""] + resources: ["services/status"] + verbs: ["update"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] +- apiGroups: ["admissionregistration.k8s.io"] + resources: ["validatingwebhookconfigurations"] + resourceNames: ["metallb-webhook-configuration"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] +- apiGroups: ["admissionregistration.k8s.io"] + resources: ["validatingwebhookconfigurations"] + verbs: ["list", "watch"] +- apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + resourceNames: ["bfdprofiles.metallb.io","bgpadvertisements.metallb.io", + "bgppeers.metallb.io","ipaddresspools.metallb.io","l2advertisements.metallb.io","communities.metallb.io","configurationstates.metallb.io"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] +- apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["configurationstates"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] +- apiGroups: ["metallb.io"] + resources: ["configurationstates/status"] + verbs: ["get", "patch", "update"] +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metallb:speaker + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +rules: +- apiGroups: [""] + resources: ["services", "endpoints", "nodes", "namespaces"] + verbs: ["get", "list", "watch"] +- apiGroups: ["discovery.k8s.io"] + resources: ["endpointslices"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] +- apiGroups: ["metallb.io"] + resources: ["servicel2statuses","servicel2statuses/status","configurationstates","configurationstates/status"] + verbs: ["*"] +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metallb:controller + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +subjects: +- kind: ServiceAccount + name: metallb-controller + namespace: metallb-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metallb:controller +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metallb:speaker + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +subjects: +- kind: ServiceAccount + name: metallb-speaker + namespace: metallb-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metallb:speaker +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: metallb-pod-lister + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["list", "get"] +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["bfdprofiles"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["bgppeers"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["l2advertisements"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["bgpadvertisements"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["ipaddresspools"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["communities"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["servicebgpstatuses","servicebgpstatuses/status"] + verbs: ["*"] +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: metallb-controller + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +rules: +- apiGroups: [""] + resources: ["secrets"] + verbs: ["create", "get", "list", "watch"] +- apiGroups: [""] + resources: ["secrets"] + resourceNames: ["metallb-memberlist"] + verbs: ["list"] +- apiGroups: ["apps"] + resources: ["deployments"] + resourceNames: ["metallb-controller"] + verbs: ["get"] +- apiGroups: [""] + resources: ["secrets"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] +- apiGroups: ["metallb.io"] + resources: ["ipaddresspools"] + verbs: ["get", "list", "watch"] +- apiGroups: ["metallb.io"] + resources: ["ipaddresspools/status"] + verbs: ["update"] +- apiGroups: ["metallb.io"] + resources: ["bgppeers"] + verbs: ["get", "list"] +- apiGroups: ["metallb.io"] + resources: ["bgpadvertisements"] + verbs: ["get", "list"] +- apiGroups: ["metallb.io"] + resources: ["l2advertisements"] + verbs: ["get", "list"] +- apiGroups: ["metallb.io"] + resources: ["communities"] + verbs: ["get", "list","watch"] +- apiGroups: ["metallb.io"] + resources: ["bfdprofiles"] + verbs: ["get", "list","watch"] +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: metallb-pod-lister + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: metallb-pod-lister +subjects: +- kind: ServiceAccount + name: metallb-speaker +--- +# Source: metallb/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: metallb-controller + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: metallb-controller +subjects: +- kind: ServiceAccount + name: metallb-controller +--- +# Source: metallb/templates/webhooks.yaml +apiVersion: v1 +kind: Service +metadata: + name: metallb-webhook-service + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +spec: + ports: + - port: 443 + targetPort: 9443 + selector: + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/component: controller +--- +# Source: metallb/templates/speaker.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: metallb-speaker + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: speaker +spec: + updateStrategy: + type: RollingUpdate + selector: + matchLabels: + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/component: speaker + template: + metadata: + labels: + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/component: speaker + spec: + serviceAccountName: metallb-speaker + terminationGracePeriodSeconds: 0 + hostNetwork: true + volumes: + - name: memberlist + secret: + secretName: metallb-memberlist + defaultMode: 420 + - name: metallb-excludel2 + configMap: + defaultMode: 256 + name: metallb-excludel2 + - name: frr-sockets + emptyDir: {} + - name: frr-startup + configMap: + name: metallb-frr-startup + - name: frr-conf + emptyDir: {} + - name: reloader + emptyDir: {} + - name: metrics + emptyDir: {} + - name: frr-tmp + emptyDir: {} + - name: frr-lib + emptyDir: {} + - name: frr-log + emptyDir: {} + initContainers: + # Copies the initial config files with the right permissions to the shared volume. + - name: cp-frr-files + image: quay.io/frrouting/frr:10.4.1 + securityContext: + runAsUser: 100 + runAsGroup: 101 + command: ["/bin/sh", "-c", "cp -rLf /tmp/frr/* /etc/frr/"] + volumeMounts: + - name: frr-startup + mountPath: /tmp/frr + - name: frr-conf + mountPath: /etc/frr + # Copies the reloader to the shared volume between the speaker and reloader. + - name: cp-reloader + image: quay.io/metallb/speaker:v0.15.3 + command: ["/cp-tool","/frr-reloader.sh","/etc/frr_reloader/frr-reloader.sh"] + volumeMounts: + - name: reloader + mountPath: /etc/frr_reloader + # Copies the metrics exporter + - name: cp-metrics + image: quay.io/metallb/speaker:v0.15.3 + command: ["/cp-tool","/frr-metrics","/etc/frr_metrics/frr-metrics"] + volumeMounts: + - name: metrics + mountPath: /etc/frr_metrics + shareProcessNamespace: true + containers: + - name: speaker + image: quay.io/metallb/speaker:v0.15.3 + args: + - --port=7472 + - --log-level=info + env: + - name: METALLB_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: METALLB_HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: METALLB_ML_BIND_ADDR + valueFrom: + fieldRef: + fieldPath: status.podIP + + - name: METALLB_ML_LABELS + value: "app.kubernetes.io/name=metallb,app.kubernetes.io/component=speaker" + - name: METALLB_ML_BIND_PORT + value: "7946" + - name: METALLB_ML_SECRET_KEY_PATH + value: "/etc/ml_secret_key" + - name: FRR_CONFIG_FILE + value: /etc/frr_reloader/frr.conf + - name: FRR_RELOADER_PID_FILE + value: /etc/frr_reloader/reloader.pid + - name: METALLB_BGP_TYPE + value: frr + - name: METALLB_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + ports: + - name: monitoring + containerPort: 7472 + - name: memberlist-tcp + containerPort: 7946 + protocol: TCP + - name: memberlist-udp + containerPort: 7946 + protocol: UDP + livenessProbe: + httpGet: + path: /metrics + port: monitoring + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /metrics + port: monitoring + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + add: + - NET_RAW + volumeMounts: + - name: memberlist + mountPath: /etc/ml_secret_key + - name: reloader + mountPath: /etc/frr_reloader + - name: metallb-excludel2 + mountPath: /etc/metallb + - name: frr + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + add: + - NET_ADMIN + - NET_RAW + - SYS_ADMIN + - NET_BIND_SERVICE + image: quay.io/frrouting/frr:10.4.1 + env: + - name: TINI_SUBREAPER + value: "true" + volumeMounts: + - name: frr-sockets + mountPath: /var/run/frr + - name: frr-conf + mountPath: /etc/frr + - name: frr-tmp + mountPath: /var/tmp/frr + - name: frr-lib + mountPath: /var/lib/frr + # The command is FRR's default entrypoint & waiting for the log file to appear and tailing it. + # If the log file isn't created in 60 seconds the tail fails and the container is restarted. + # This workaround is needed to have the frr logs as part of kubectl logs -c frr < speaker_pod_name >. + command: + - /bin/sh + - -c + - | + /sbin/tini -- /usr/lib/frr/docker-start & + attempts=0 + until [[ -f /etc/frr/frr.log || $attempts -eq 60 ]]; do + sleep 1 + attempts=$(( $attempts + 1 )) + done + tail -f /etc/frr/frr.log + livenessProbe: + httpGet: + path: livez + port: 7473 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + startupProbe: + httpGet: + path: /livez + port: 7473 + failureThreshold: 30 + periodSeconds: 5 + - name: reloader + image: quay.io/frrouting/frr:10.4.1 + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + command: ["/etc/frr_reloader/frr-reloader.sh"] + volumeMounts: + - name: frr-sockets + mountPath: /var/run/frr + - name: frr-conf + mountPath: /etc/frr + - name: reloader + mountPath: /etc/frr_reloader + - name: frr-log + mountPath: /var/log/frr + - name: frr-metrics + image: quay.io/frrouting/frr:10.4.1 + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + command: ["/etc/frr_metrics/frr-metrics"] + args: + - --metrics-port=7473 + env: + - name: VTYSH_HISTFILE + value: /dev/null + ports: + - containerPort: 7473 + name: frrmetrics + volumeMounts: + - name: frr-sockets + mountPath: /var/run/frr + - name: frr-conf + mountPath: /etc/frr + - name: metrics + mountPath: /etc/frr_metrics + nodeSelector: + "kubernetes.io/os": linux + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + operator: Exists + - key: node-role.kubernetes.io/control-plane + effect: NoSchedule + operator: Exists +--- +# Source: metallb/templates/controller.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: metallb-controller + namespace: "metallb-system" + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: controller +spec: + strategy: + type: RollingUpdate + selector: + matchLabels: + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/component: controller + template: + metadata: + labels: + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/component: controller + spec: + serviceAccountName: metallb-controller + terminationGracePeriodSeconds: 0 + securityContext: + fsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + containers: + - name: controller + image: quay.io/metallb/controller:v0.15.3 + args: + - --port=7472 + - --log-level=info + - --webhook-mode=enabled + - --tls-min-version=VersionTLS12 + env: + - name: METALLB_ML_SECRET_NAME + value: metallb-memberlist + - name: METALLB_DEPLOYMENT + value: metallb-controller + - name: METALLB_BGP_TYPE + value: frr + ports: + - name: monitoring + containerPort: 7472 + - containerPort: 9443 + name: webhook-server + protocol: TCP + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + livenessProbe: + httpGet: + path: /metrics + port: monitoring + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /metrics + port: monitoring + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + nodeSelector: + "kubernetes.io/os": linux + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: metallb-webhook-cert +--- +# Source: metallb/templates/webhooks.yaml +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: metallb-webhook-configuration + labels: + helm.sh/chart: metallb-0.15.3 + app.kubernetes.io/name: metallb + app.kubernetes.io/instance: metallb + app.kubernetes.io/version: "v0.15.3" + app.kubernetes.io/managed-by: Helm +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /validate-metallb-io-v1beta2-bgppeer + failurePolicy: Fail + name: bgppeervalidationwebhook.metallb.io + rules: + - apiGroups: + - metallb.io + apiVersions: + - v1beta2 + operations: + - CREATE + - UPDATE + resources: + - bgppeers + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /validate-metallb-io-v1beta1-ipaddresspool + failurePolicy: Fail + name: ipaddresspoolvalidationwebhook.metallb.io + rules: + - apiGroups: + - metallb.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - ipaddresspools + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /validate-metallb-io-v1beta1-bgpadvertisement + failurePolicy: Fail + name: bgpadvertisementvalidationwebhook.metallb.io + rules: + - apiGroups: + - metallb.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - bgpadvertisements + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /validate-metallb-io-v1beta1-community + failurePolicy: Fail + name: communityvalidationwebhook.metallb.io + rules: + - apiGroups: + - metallb.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - communities + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /validate-metallb-io-v1beta1-bfdprofile + failurePolicy: Fail + name: bfdprofilevalidationwebhook.metallb.io + rules: + - apiGroups: + - metallb.io + apiVersions: + - v1beta1 + operations: + - CREATE + - DELETE + resources: + - bfdprofiles + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: metallb-webhook-service + namespace: metallb-system + path: /validate-metallb-io-v1beta1-l2advertisement + failurePolicy: Fail + name: l2advertisementvalidationwebhook.metallb.io + rules: + - apiGroups: + - metallb.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - l2advertisements + sideEffects: None diff --git a/infrastructure/metallb/namespace.yaml b/infrastructure/metallb/namespace.yaml new file mode 100644 index 0000000..02b2add --- /dev/null +++ b/infrastructure/metallb/namespace.yaml @@ -0,0 +1,5 @@ +# infrastructure/metallb/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: metallb-system diff --git a/infrastructure/metallb/patches/node-placement.yaml b/infrastructure/metallb/patches/node-placement.yaml new file mode 100644 index 0000000..e32337e --- /dev/null +++ b/infrastructure/metallb/patches/node-placement.yaml @@ -0,0 +1,30 @@ +# infrastructure/metallb/patches/node-placement.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: metallb-controller + namespace: metallb-system +spec: + template: + spec: + containers: + - name: controller + args: + - --port=7472 + - --log-level=info + - --webhook-mode=enabled + - --tls-min-version=VersionTLS12 + - --lb-class=metallb + nodeSelector: + hardware: rpi5 +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: metallb-speaker + namespace: metallb-system +spec: + template: + spec: + nodeSelector: + hardware: rpi5 diff --git a/infrastructure/traefik/clusterrole.yaml b/infrastructure/traefik/clusterrole.yaml index 52ed126..353eaff 100644 --- a/infrastructure/traefik/clusterrole.yaml +++ b/infrastructure/traefik/clusterrole.yaml @@ -71,9 +71,10 @@ rules: - tlsoptions - tlsstores - serverstransports + - serverstransporttcps - traefikservices + - middlewaretcps verbs: - get - list - watch - diff --git a/infrastructure/traefik/kustomization.yaml b/infrastructure/traefik/kustomization.yaml index 1dce445..4e36574 100644 --- a/infrastructure/traefik/kustomization.yaml +++ b/infrastructure/traefik/kustomization.yaml @@ -10,3 +10,4 @@ resources: - clusterrole.yaml - clusterrolebinding.yaml - service.yaml + - traefik-service-lb.yaml diff --git a/infrastructure/traefik/traefik-service-lb.yaml b/infrastructure/traefik/traefik-service-lb.yaml new file mode 100644 index 0000000..e4929f1 --- /dev/null +++ b/infrastructure/traefik/traefik-service-lb.yaml @@ -0,0 +1,24 @@ +# infrastructure/traefik/traefik-service-lb.yaml +apiVersion: v1 +kind: Service +metadata: + name: traefik + namespace: kube-system + annotations: + metallb.universe.tf/address-pool: communication-pool +spec: + type: LoadBalancer + loadBalancerClass: metallb + loadBalancerIP: 192.168.22.9 + ports: + - name: web + port: 80 + targetPort: web + protocol: TCP + - name: websecure + port: 443 + targetPort: websecure + protocol: TCP + selector: + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml new file mode 100644 index 0000000..92f77ba --- /dev/null +++ b/services/communication/atlasbot-configmap.yaml @@ -0,0 +1,131 @@ +# services/communication/atlasbot-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlasbot +data: + bot.py: | + import json, os, time, collections + from urllib import request, parse, error + + BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") + USER = os.environ["BOT_USER"] + PASSWORD = os.environ["BOT_PASS"] + ROOM_ALIAS = "#othrys:live.bstein.dev" + OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") + MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") + API_KEY = os.environ.get("CHAT_API_KEY", "") + + def req(method: str, path: str, token: str | None = None, body=None, timeout=60): + url = BASE + path + data = None + headers = {} + if body is not None: + data = json.dumps(body).encode() + headers["Content-Type"] = "application/json" + if token: + headers["Authorization"] = f"Bearer {token}" + r = request.Request(url, data=data, headers=headers, method=method) + with request.urlopen(r, timeout=timeout) as resp: + raw = resp.read() + return json.loads(raw.decode()) if raw else {} + + def login() -> str: + payload = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": USER}, + "password": PASSWORD, + } + res = req("POST", "/_matrix/client/v3/login", body=payload) + return res["access_token"] + + def resolve_alias(token: str, alias: str) -> str: + enc = parse.quote(alias) + res = req("GET", f"/_matrix/client/v3/directory/room/{enc}", token) + return res["room_id"] + + def join_room(token: str, room: str): + req("POST", f"/_matrix/client/v3/rooms/{parse.quote(room)}/join", token, body={}) + + def send_msg(token: str, room: str, text: str): + path = f"/_matrix/client/v3/rooms/{parse.quote(room)}/send/m.room.message" + req("POST", path, token, body={"msgtype": "m.text", "body": text}) + + history = collections.defaultdict(list) # room_id -> list of str (short transcript) + greeted = set() + + def ollama_reply(room_id: str, prompt: str) -> str: + try: + # Keep short context as plain text transcript + transcript = "\n".join(history[room_id][-12:] + [f"User: {prompt}"]) + payload = {"model": MODEL, "message": transcript} + headers = {"Content-Type": "application/json"} + if API_KEY: + headers["x-api-key"] = API_KEY + r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers) + with request.urlopen(r, timeout=15) as resp: + data = json.loads(resp.read().decode()) + reply = data.get("message") or data.get("response") or data.get("reply") or "I'm here to help." + history[room_id].append(f"Atlas: {reply}") + return reply + except Exception: + return "Hi! I'm Atlas." + + def sync_loop(token: str, room_id: str): + since = None + while True: + params = {"timeout": 30000} + if since: + params["since"] = since + query = parse.urlencode(params) + try: + res = req("GET", f"/_matrix/client/v3/sync?{query}", token, timeout=35) + except Exception: + time.sleep(5) + continue + since = res.get("next_batch", since) + + # invites + for rid, data in res.get("rooms", {}).get("invite", {}).items(): + try: + join_room(token, rid) + send_msg(token, rid, "Atlas online.") + except Exception: + pass + + # messages + for rid, data in res.get("rooms", {}).get("join", {}).items(): + if rid not in greeted and room_id and rid == room_id: + greeted.add(rid) + send_msg(token, rid, "Atlas online.") + timeline = data.get("timeline", {}).get("events", []) + for ev in timeline: + if ev.get("type") != "m.room.message": + continue + content = ev.get("content", {}) + body = content.get("body", "") + if not body.strip(): + continue + sender = ev.get("sender", "") + if sender == f"@{USER}:live.bstein.dev": + continue + # Only respond if bot is mentioned or in a DM + joined_count = data.get("summary", {}).get("m.joined_member_count") + is_dm = joined_count is not None and joined_count <= 2 + mentioned = f"@{USER}" in body or "atlas" in body.lower() + history[rid].append(f"{sender}: {body}") + if is_dm or mentioned: + reply = ollama_reply(rid, body) + send_msg(token, rid, reply) + + def main(): + token = login() + try: + room_id = resolve_alias(token, ROOM_ALIAS) + join_room(token, room_id) + except Exception: + room_id = None + sync_loop(token, room_id) + + if __name__ == "__main__": + main() diff --git a/services/communication/atlasbot-credentials.yaml b/services/communication/atlasbot-credentials.yaml new file mode 100644 index 0000000..6676d4e --- /dev/null +++ b/services/communication/atlasbot-credentials.yaml @@ -0,0 +1,9 @@ +# services/communication/atlasbot-credentials.yaml +apiVersion: v1 +kind: Secret +metadata: + name: atlasbot-credentials +type: Opaque +stringData: + bot-password: "x8eU9xwsjJ2S7Xv1G4mQ" + seeder-password: "Qv5sjyH8nD6pPz7Lk3R0" diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml new file mode 100644 index 0000000..bd39ae7 --- /dev/null +++ b/services/communication/atlasbot-deployment.yaml @@ -0,0 +1,61 @@ +# services/communication/atlasbot-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: atlasbot + namespace: communication + labels: + app: atlasbot +spec: + replicas: 1 + selector: + matchLabels: + app: atlasbot + template: + metadata: + labels: + app: atlasbot + spec: + nodeSelector: + hardware: rpi5 + containers: + - name: atlasbot + image: python:3.11-slim + command: ["/bin/sh","-c"] + args: + - | + python /app/bot.py + env: + - name: MATRIX_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: BOT_USER + value: atlasbot + - name: BOT_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials + key: bot-password + - name: CHAT_API_KEY + valueFrom: + secretKeyRef: + name: chat-ai-keys + key: matrix + - name: OLLAMA_URL + value: https://chat.ai.bstein.dev/ + - name: OLLAMA_MODEL + value: qwen2.5-coder:7b-instruct-q4_0 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumeMounts: + - name: code + mountPath: /app/bot.py + subPath: bot.py + volumes: + - name: code + configMap: + name: atlasbot diff --git a/services/communication/chat-ai-keys.yaml b/services/communication/chat-ai-keys.yaml new file mode 100644 index 0000000..ac6c4e8 --- /dev/null +++ b/services/communication/chat-ai-keys.yaml @@ -0,0 +1,9 @@ +# services/communication/chat-ai-keys.yaml +apiVersion: v1 +kind: Secret +metadata: + name: chat-ai-keys + namespace: communication +type: Opaque +stringData: + matrix: "3d9b1e5e80f146f2b3f6a9fbe01b7b77" diff --git a/services/communication/coturn.yaml b/services/communication/coturn.yaml new file mode 100644 index 0000000..73a4d14 --- /dev/null +++ b/services/communication/coturn.yaml @@ -0,0 +1,323 @@ +# services/communication/coturn.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coturn + labels: + app: coturn +spec: + replicas: 1 + selector: + matchLabels: + app: coturn + template: + metadata: + labels: + app: coturn + spec: + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + containers: + - name: coturn + image: ghcr.io/coturn/coturn:4.6.2 + command: + - /bin/sh + - -c + - | + exec /usr/bin/turnserver \ + --no-cli \ + --fingerprint \ + --lt-cred-mech \ + --user=livekit:"${TURN_STATIC_AUTH_SECRET}" \ + --realm=live.bstein.dev \ + --listening-port=3478 \ + --tls-listening-port=5349 \ + --min-port=50000 \ + --max-port=50050 \ + --cert=/etc/coturn/tls/tls.crt \ + --pkey=/etc/coturn/tls/tls.key \ + --log-file=stdout \ + --no-software-attribute + env: + - name: TURN_STATIC_AUTH_SECRET + valueFrom: + secretKeyRef: + name: turn-shared-secret + key: TURN_STATIC_AUTH_SECRET + ports: + - name: turn-udp + containerPort: 3478 + protocol: UDP + - name: turn-tcp + containerPort: 3478 + protocol: TCP + - name: turn-tls + containerPort: 5349 + protocol: TCP + volumeMounts: + - name: tls + mountPath: /etc/coturn/tls + readOnly: true + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: "2" + memory: 512Mi + volumes: + - name: tls + secret: + secretName: turn-live-tls +--- +apiVersion: v1 +kind: Service +metadata: + name: coturn + annotations: + metallb.universe.tf/address-pool: communication-pool +spec: + type: LoadBalancer + loadBalancerClass: metallb + loadBalancerIP: 192.168.22.5 + selector: + app: coturn + ports: + - name: turn-udp + port: 3478 + targetPort: 3478 + protocol: UDP + - name: turn-tcp + port: 3478 + targetPort: 3478 + protocol: TCP + - name: turn-tls + port: 5349 + targetPort: 5349 + protocol: TCP + # Expose relay range for UDP media + - name: relay-50000 + port: 50000 + targetPort: 50000 + protocol: UDP + - name: relay-50001 + port: 50001 + targetPort: 50001 + protocol: UDP + - name: relay-50002 + port: 50002 + targetPort: 50002 + protocol: UDP + - name: relay-50003 + port: 50003 + targetPort: 50003 + protocol: UDP + - name: relay-50004 + port: 50004 + targetPort: 50004 + protocol: UDP + - name: relay-50005 + port: 50005 + targetPort: 50005 + protocol: UDP + - name: relay-50006 + port: 50006 + targetPort: 50006 + protocol: UDP + - name: relay-50007 + port: 50007 + targetPort: 50007 + protocol: UDP + - name: relay-50008 + port: 50008 + targetPort: 50008 + protocol: UDP + - name: relay-50009 + port: 50009 + targetPort: 50009 + protocol: UDP + - name: relay-50010 + port: 50010 + targetPort: 50010 + protocol: UDP + - name: relay-50011 + port: 50011 + targetPort: 50011 + protocol: UDP + - name: relay-50012 + port: 50012 + targetPort: 50012 + protocol: UDP + - name: relay-50013 + port: 50013 + targetPort: 50013 + protocol: UDP + - name: relay-50014 + port: 50014 + targetPort: 50014 + protocol: UDP + - name: relay-50015 + port: 50015 + targetPort: 50015 + protocol: UDP + - name: relay-50016 + port: 50016 + targetPort: 50016 + protocol: UDP + - name: relay-50017 + port: 50017 + targetPort: 50017 + protocol: UDP + - name: relay-50018 + port: 50018 + targetPort: 50018 + protocol: UDP + - name: relay-50019 + port: 50019 + targetPort: 50019 + protocol: UDP + - name: relay-50020 + port: 50020 + targetPort: 50020 + protocol: UDP + - name: relay-50021 + port: 50021 + targetPort: 50021 + protocol: UDP + - name: relay-50022 + port: 50022 + targetPort: 50022 + protocol: UDP + - name: relay-50023 + port: 50023 + targetPort: 50023 + protocol: UDP + - name: relay-50024 + port: 50024 + targetPort: 50024 + protocol: UDP + - name: relay-50025 + port: 50025 + targetPort: 50025 + protocol: UDP + - name: relay-50026 + port: 50026 + targetPort: 50026 + protocol: UDP + - name: relay-50027 + port: 50027 + targetPort: 50027 + protocol: UDP + - name: relay-50028 + port: 50028 + targetPort: 50028 + protocol: UDP + - name: relay-50029 + port: 50029 + targetPort: 50029 + protocol: UDP + - name: relay-50030 + port: 50030 + targetPort: 50030 + protocol: UDP + - name: relay-50031 + port: 50031 + targetPort: 50031 + protocol: UDP + - name: relay-50032 + port: 50032 + targetPort: 50032 + protocol: UDP + - name: relay-50033 + port: 50033 + targetPort: 50033 + protocol: UDP + - name: relay-50034 + port: 50034 + targetPort: 50034 + protocol: UDP + - name: relay-50035 + port: 50035 + targetPort: 50035 + protocol: UDP + - name: relay-50036 + port: 50036 + targetPort: 50036 + protocol: UDP + - name: relay-50037 + port: 50037 + targetPort: 50037 + protocol: UDP + - name: relay-50038 + port: 50038 + targetPort: 50038 + protocol: UDP + - name: relay-50039 + port: 50039 + targetPort: 50039 + protocol: UDP + - name: relay-50040 + port: 50040 + targetPort: 50040 + protocol: UDP + - name: relay-50041 + port: 50041 + targetPort: 50041 + protocol: UDP + - name: relay-50042 + port: 50042 + targetPort: 50042 + protocol: UDP + - name: relay-50043 + port: 50043 + targetPort: 50043 + protocol: UDP + - name: relay-50044 + port: 50044 + targetPort: 50044 + protocol: UDP + - name: relay-50045 + port: 50045 + targetPort: 50045 + protocol: UDP + - name: relay-50046 + port: 50046 + targetPort: 50046 + protocol: UDP + - name: relay-50047 + port: 50047 + targetPort: 50047 + protocol: UDP + - name: relay-50048 + port: 50048 + targetPort: 50048 + protocol: UDP + - name: relay-50049 + port: 50049 + targetPort: 50049 + protocol: UDP + - name: relay-50050 + port: 50050 + targetPort: 50050 + protocol: UDP +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: turn-live-cert +spec: + secretName: turn-live-tls + issuerRef: + name: letsencrypt + kind: ClusterIssuer + dnsNames: + - turn.live.bstein.dev diff --git a/services/communication/element-call-config.yaml b/services/communication/element-call-config.yaml new file mode 100644 index 0000000..c86bbb6 --- /dev/null +++ b/services/communication/element-call-config.yaml @@ -0,0 +1,25 @@ +# services/communication/element-call-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: element-call-config + namespace: communication +data: + config.json: | + { + "default_server_config": { + "m.homeserver": { + "base_url": "https://matrix.live.bstein.dev", + "server_name": "live.bstein.dev" + }, + "m.identity_server": { + "base_url": "https://vector.im" + } + }, + "livekit": { + "livekit_service_url": "https://kit.live.bstein.dev/livekit/jwt" + }, + "branding": { + "app_name": "Othrys Call" + } + } diff --git a/services/communication/element-call-deployment.yaml b/services/communication/element-call-deployment.yaml new file mode 100644 index 0000000..f5752ac --- /dev/null +++ b/services/communication/element-call-deployment.yaml @@ -0,0 +1,78 @@ +# services/communication/element-call-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: element-call + namespace: communication + labels: + app: element-call +spec: + replicas: 1 + selector: + matchLabels: + app: element-call + template: + metadata: + labels: + app: element-call + spec: + nodeSelector: + hardware: rpi5 + containers: + - name: element-call + image: ghcr.io/element-hq/element-call:latest + ports: + - containerPort: 8080 + name: http + volumeMounts: + - name: config + mountPath: /app/config.json + subPath: config.json + volumes: + - name: config + configMap: + name: element-call-config + items: + - key: config.json + path: config.json + optional: false +--- +apiVersion: v1 +kind: Service +metadata: + name: element-call + namespace: communication +spec: + selector: + app: element-call + ports: + - name: http + port: 80 + targetPort: 8080 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: element-call + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - call.live.bstein.dev + secretName: call-live-tls + rules: + - host: call.live.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: element-call + port: + number: 80 diff --git a/services/communication/element-rendered.yaml b/services/communication/element-rendered.yaml new file mode 100644 index 0000000..c0b03c1 --- /dev/null +++ b/services/communication/element-rendered.yaml @@ -0,0 +1,223 @@ +--- +# Source: element-web/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: othrys-element-element-web + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm +--- +# Source: element-web/templates/configuration-nginx.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-element-element-web-nginx + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm +data: + default.conf: | + server { + listen 8080; + listen [::]:8080; + server_name localhost; + + root /usr/share/nginx/html; + index index.html; + + add_header X-Frame-Options SAMEORIGIN; + add_header X-Content-Type-Options nosniff; + add_header X-XSS-Protection "1; mode=block"; + add_header Content-Security-Policy "frame-ancestors 'self'"; + + # Set no-cache for the index.html only so that browsers always check for a new copy of Element Web. + location = /index.html { + add_header Cache-Control "no-cache"; + } + + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + } +--- +# Source: element-web/templates/configuration.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-element-element-web + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm +data: + config.json: | + {"brand":"Othrys","default_server_config":{"m.homeserver":{"base_url":"https://matrix.live.bstein.dev","server_name":"live.bstein.dev"},"m.identity_server":{"base_url":"https://vector.im"}},"default_theme":"dark","disable_custom_urls":true,"disable_login_language_selector":true,"disable_guests":false,"show_labs_settings":true,"features":{"feature_group_calls":true,"feature_video_rooms":true,"feature_element_call_video_rooms":true},"room_directory":{"servers":["live.bstein.dev"]},"jitsi":{},"element_call":{"url":"https://call.live.bstein.dev","participant_limit":16,"brand":"Othrys Call"}} +--- +# Source: element-web/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: othrys-element-element-web + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element +--- +# Source: element-web/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: othrys-element-element-web + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + template: + metadata: + annotations: + checksum/config: manual-rtc-enable-1 + checksum/config-nginx: 085061d0925f4840c3770233509dc0b00fe8fa1a5fef8bf282a514fd101c76fa + labels: + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + spec: + serviceAccountName: othrys-element-element-web + securityContext: + {} + containers: + - name: element-web + securityContext: + {} + image: "ghcr.io/element-hq/element-web:v1.12.6" + imagePullPolicy: IfNotPresent + env: + - name: ELEMENT_WEB_PORT + value: '8080' + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 256Mi + volumeMounts: + - mountPath: /app/config.json + name: config + subPath: config.json + - mountPath: /etc/nginx/conf.d/config.json + name: config-nginx + subPath: config.json + volumes: + - name: config + configMap: + name: othrys-element-element-web + - name: config-nginx + configMap: + name: othrys-element-element-web-nginx + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + weight: 50 +--- +# Source: element-web/templates/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: othrys-element-element-web + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure +spec: + ingressClassName: traefik + tls: + - hosts: + - "live.bstein.dev" + secretName: live-othrys-tls + rules: + - host: "live.bstein.dev" + http: + paths: + - path: / + backend: + service: + name: othrys-element-element-web + port: + number: 80 + pathType: Prefix +--- +# Source: element-web/templates/tests/test-connection.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "othrys-element-element-web-test-connection" + labels: + helm.sh/chart: element-web-1.4.26 + app.kubernetes.io/name: element-web + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/version: "1.12.6" + app.kubernetes.io/managed-by: Helm + annotations: + "helm.sh/hook": test-success +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['othrys-element-element-web:80'] + restartPolicy: Never diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml new file mode 100644 index 0000000..fcd44e7 --- /dev/null +++ b/services/communication/guest-name-job.yaml @@ -0,0 +1,89 @@ +# services/communication/guest-name-job.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: guest-name-randomizer + namespace: communication +spec: + schedule: "*/1 * * * *" + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: rename + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials + key: seeder-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import os, random, requests, urllib.parse + + ADJ = ["brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty"] + NOUN = ["otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit"] + + BASE = os.environ["SYNAPSE_BASE"] + OTHRYS = "!orejZnVfvbAmwQDYba:live.bstein.dev" + + def login(user, password): + r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }) + r.raise_for_status() + return r.json()["access_token"] + + def list_guests(token): + headers = {"Authorization": f"Bearer {token}"} + users = [] + from_token = None + while True: + url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" + if from_token: + url += f"&from={from_token}" + res = requests.get(url, headers=headers) + res.raise_for_status() + data = res.json() + for u in data.get("users", []): + disp = u.get("displayname", "") + if u.get("is_guest") and (not disp or disp.isdigit()): + users.append(u["name"]) + from_token = data.get("next_token") + if not from_token: + break + return users + + def set_displayname(token, user_id, name): + headers = {"Authorization": f"Bearer {token}"} + payload = {"displayname": name} + # Update global profile + r = requests.put(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname", headers=headers, json=payload) + r.raise_for_status() + # Update Othrys member event so clients see the change quickly + state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(OTHRYS)}/state/m.room.member/{urllib.parse.quote(user_id)}" + r2 = requests.get(state_url, headers=headers) + content = r2.json() if r2.status_code == 200 else {"membership": "join"} + content["displayname"] = name + requests.put(state_url, headers=headers, json=content) + + token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + guests = list_guests(token) + for g in guests: + new = f"{random.choice(ADJ)}-{random.choice(NOUN)}" + set_displayname(token, g, new) + PY diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml new file mode 100644 index 0000000..9cd7f38 --- /dev/null +++ b/services/communication/kustomization.yaml @@ -0,0 +1,24 @@ +# services/communication/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: communication +resources: + - namespace.yaml + - synapse-rendered.yaml + - element-rendered.yaml + - livekit-config.yaml + - livekit.yaml + - coturn.yaml + - livekit-token-deployment.yaml + - livekit-ingress.yaml + - livekit-middlewares.yaml + - element-call-config.yaml + - element-call-deployment.yaml + - pin-othrys-job.yaml + - guest-name-job.yaml + - chat-ai-keys.yaml + - atlasbot-credentials.yaml + - atlasbot-configmap.yaml + - atlasbot-deployment.yaml + - seed-othrys-room.yaml + - wellknown.yaml diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml new file mode 100644 index 0000000..e9c13bb --- /dev/null +++ b/services/communication/livekit-config.yaml @@ -0,0 +1,21 @@ +# services/communication/livekit-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: livekit-config +data: + livekit.yaml: | + port: 7880 + rtc: + udp_port: 7882 + tcp_port: 7881 + use_external_ip: true + turn_servers: + - host: turn.live.bstein.dev + port: 5349 + protocol: tls + - host: turn.live.bstein.dev + port: 3478 + protocol: udp + room: + auto_create: true diff --git a/services/communication/livekit-ingress.yaml b/services/communication/livekit-ingress.yaml new file mode 100644 index 0000000..796eb3d --- /dev/null +++ b/services/communication/livekit-ingress.yaml @@ -0,0 +1,28 @@ +# services/communication/livekit-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: livekit-ingress + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + traefik.ingress.kubernetes.io/router.middlewares: communication-livekit-sfu-strip@kubernetescrd + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - kit.live.bstein.dev + secretName: kit-live-tls + rules: + - host: kit.live.bstein.dev + http: + paths: + - path: /livekit/sfu + pathType: Prefix + backend: + service: + name: livekit + port: + number: 7880 diff --git a/services/communication/livekit-middlewares.yaml b/services/communication/livekit-middlewares.yaml new file mode 100644 index 0000000..49a3e8f --- /dev/null +++ b/services/communication/livekit-middlewares.yaml @@ -0,0 +1,48 @@ +# services/communication/livekit-middlewares.yaml +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: livekit-sfu-strip + namespace: communication +spec: + stripPrefix: + prefixes: + - /livekit/sfu +--- +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: livekit-jwt-strip + namespace: communication +spec: + stripPrefix: + prefixes: + - /livekit/jwt +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: livekit-jwt-ingress + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + traefik.ingress.kubernetes.io/router.middlewares: communication-livekit-jwt-strip@kubernetescrd + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - kit.live.bstein.dev + secretName: kit-live-tls + rules: + - host: kit.live.bstein.dev + http: + paths: + - path: /livekit/jwt + pathType: Prefix + backend: + service: + name: livekit-token-service + port: + number: 8080 diff --git a/services/communication/livekit-token-deployment.yaml b/services/communication/livekit-token-deployment.yaml new file mode 100644 index 0000000..f9d1a87 --- /dev/null +++ b/services/communication/livekit-token-deployment.yaml @@ -0,0 +1,69 @@ +# services/communication/livekit-token-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: livekit-token-service + labels: + app: livekit-token-service +spec: + replicas: 1 + selector: + matchLabels: + app: livekit-token-service + template: + metadata: + labels: + app: livekit-token-service + spec: + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + hostAliases: + - ip: 10.43.60.6 + hostnames: + - live.bstein.dev + containers: + - name: token-service + image: ghcr.io/element-hq/lk-jwt-service:0.3.0 + env: + - name: LIVEKIT_URL + value: wss://kit.live.bstein.dev/livekit/sfu + - name: LIVEKIT_KEY + value: primary + - name: LIVEKIT_SECRET + valueFrom: + secretKeyRef: + name: livekit-api + key: primary + - name: LIVEKIT_FULL_ACCESS_HOMESERVERS + value: live.bstein.dev + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 300m + memory: 256Mi +--- +apiVersion: v1 +kind: Service +metadata: + name: livekit-token-service +spec: + selector: + app: livekit-token-service + ports: + - name: http + port: 8080 + targetPort: 8080 diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml new file mode 100644 index 0000000..1f3d7e9 --- /dev/null +++ b/services/communication/livekit.yaml @@ -0,0 +1,120 @@ +# services/communication/livekit.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: livekit + labels: + app: livekit +spec: + replicas: 1 + selector: + matchLabels: + app: livekit + template: + metadata: + labels: + app: livekit + spec: + enableServiceLinks: false + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + containers: + - name: livekit + image: livekit/livekit-server:v1.9.0 + command: + - /bin/sh + - -c + - | + set -euo pipefail + umask 077 + printf "%s: %s\n" "${LIVEKIT_API_KEY_ID}" "${LIVEKIT_API_SECRET}" > /var/run/livekit/keys + chmod 600 /var/run/livekit/keys + exec /livekit-server --config /etc/livekit/livekit.yaml --key-file /var/run/livekit/keys + env: + - name: LIVEKIT_API_KEY_ID + value: primary + - name: LIVEKIT_API_SECRET + valueFrom: + secretKeyRef: + name: livekit-api + key: primary + - name: LIVEKIT_RTC__TURN_SERVERS_0__USERNAME + value: livekit + - name: LIVEKIT_RTC__TURN_SERVERS_0__CREDENTIAL + valueFrom: + secretKeyRef: + name: turn-shared-secret + key: TURN_STATIC_AUTH_SECRET + - name: LIVEKIT_RTC__TURN_SERVERS_1__USERNAME + value: livekit + - name: LIVEKIT_RTC__TURN_SERVERS_1__CREDENTIAL + valueFrom: + secretKeyRef: + name: turn-shared-secret + key: TURN_STATIC_AUTH_SECRET + ports: + - containerPort: 7880 + name: http + protocol: TCP + - containerPort: 7881 + name: tcp-media + protocol: TCP + - containerPort: 7882 + name: udp-media + protocol: UDP + volumeMounts: + - name: config + mountPath: /etc/livekit + - name: runtime-keys + mountPath: /var/run/livekit + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: "2" + memory: 1Gi + volumes: + - name: config + configMap: + name: livekit-config + items: + - key: livekit.yaml + path: livekit.yaml + - name: runtime-keys + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: livekit + annotations: + metallb.universe.tf/address-pool: communication-pool +spec: + type: LoadBalancer + loadBalancerClass: metallb + loadBalancerIP: 192.168.22.6 + selector: + app: livekit + ports: + - name: http + port: 7880 + targetPort: 7880 + protocol: TCP + - name: tcp-media + port: 7881 + targetPort: 7881 + protocol: TCP + - name: udp-media + port: 7882 + targetPort: 7882 + protocol: UDP diff --git a/services/communication/namespace.yaml b/services/communication/namespace.yaml new file mode 100644 index 0000000..d566429 --- /dev/null +++ b/services/communication/namespace.yaml @@ -0,0 +1,5 @@ +# services/communication/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: communication diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml new file mode 100644 index 0000000..a3178e2 --- /dev/null +++ b/services/communication/pin-othrys-job.yaml @@ -0,0 +1,68 @@ +# services/communication/pin-othrys-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: pin-othrys-invite + namespace: communication +spec: + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: OnFailure + containers: + - name: pin + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials + key: seeder-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import requests, urllib.parse, os + + BASE = os.environ["SYNAPSE_BASE"] + def login(user, password): + r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }) + r.raise_for_status() + return r.json()["access_token"] + + def resolve(alias, token): + enc = urllib.parse.quote(alias) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers={"Authorization": f"Bearer {token}"}) + r.raise_for_status() + return r.json()["room_id"] + + def send(room_id, token, body): + r = requests.post(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", + headers={"Authorization": f"Bearer {token}"}, + json={"msgtype": "m.text", "body": body}) + r.raise_for_status() + return r.json()["event_id"] + + def pin(room_id, token, event_id): + r = requests.put(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events", + headers={"Authorization": f"Bearer {token}"}, + json={"pinned": [event_id]}) + r.raise_for_status() + + token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + room_id = resolve("#othrys:live.bstein.dev", token) + msg = "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." + eid = send(room_id, token, msg) + pin(room_id, token, eid) + PY diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml new file mode 100644 index 0000000..a3d4a1d --- /dev/null +++ b/services/communication/seed-othrys-room.yaml @@ -0,0 +1,135 @@ +# services/communication/seed-othrys-room.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: seed-othrys-room + namespace: communication +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: seed + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: REG_SECRET + valueFrom: + secretKeyRef: + name: othrys-synapse-matrix-synapse + key: config.yaml + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials + key: seeder-password + - name: BOT_USER + value: atlasbot + - name: BOT_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials + key: bot-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests pyyaml matrix-synapse >/dev/null + python - <<'PY' + import os, subprocess, requests, yaml + + BASE = os.environ["SYNAPSE_BASE"] + CONFIG = "/config/config.yaml" + + def register(user, password, admin=False): + args = ["register_new_matrix_user", "-c", CONFIG, "-u", user, "-p", password] + if admin: + args.append("-a") + args.append(BASE) + res = subprocess.run(args, capture_output=True, text=True) + if res.returncode not in (0, 1): # 1 = already exists + raise SystemExit(f"register {user} failed: {res.returncode} {res.stderr}") + + def login(user, password): + r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }) + if r.status_code != 200: + raise SystemExit(f"login failed: {r.status_code} {r.text}") + return r.json()["access_token"] + + def ensure_room(token): + headers = {"Authorization": f"Bearer {token}"} + alias = "#othrys:live.bstein.dev" + alias_enc = "%23othrys%3Alive.bstein.dev" + exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers) + if exists.status_code == 200: + room_id = exists.json()["room_id"] + else: + create = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=headers, json={ + "preset": "public_chat", + "name": "Othrys", + "room_alias_name": "othrys", + "initial_state": [], + "power_level_content_override": {"events_default": 0, "users_default": 0, "state_default": 50}, + }) + if create.status_code not in (200, 409): + raise SystemExit(f"create room failed: {create.status_code} {create.text}") + exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers) + room_id = exists.json()["room_id"] + state_events = [ + ("m.room.join_rules", {"join_rule": "public"}), + ("m.room.guest_access", {"guest_access": "can_join"}), + ("m.room.history_visibility", {"history_visibility": "shared"}), + ("m.room.canonical_alias", {"alias": alias}), + ] + for ev_type, content in state_events: + requests.put(f"{BASE}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}", headers=headers, json=content) + requests.put(f"{BASE}/_matrix/client/v3/directory/list/room/{room_id}", headers=headers, json={"visibility": "public"}) + return room_id + + def join_user(token, room_id, user_id): + headers = {"Authorization": f"Bearer {token}"} + requests.post(f"{BASE}/_synapse/admin/v1/join/{room_id}", headers=headers, json={"user_id": user_id}) + + def join_all_locals(token, room_id): + headers = {"Authorization": f"Bearer {token}"} + users = [] + from_token = None + while True: + url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" + if from_token: + url += f"&from={from_token}" + res = requests.get(url, headers=headers).json() + users.extend([u["name"] for u in res.get("users", [])]) + from_token = res.get("next_token") + if not from_token: + break + for uid in users: + join_user(token, room_id, uid) + + register(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True) + register(os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False) + token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + room_id = ensure_room(token) + join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev") + join_all_locals(token, room_id) + PY + volumeMounts: + - name: synapse-config + mountPath: /config + readOnly: true + volumes: + - name: synapse-config + secret: + secretName: othrys-synapse-matrix-synapse diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml new file mode 100644 index 0000000..db24fd4 --- /dev/null +++ b/services/communication/synapse-rendered.yaml @@ -0,0 +1,1155 @@ +--- +# Source: matrix-synapse/charts/redis/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: true +metadata: + name: othrys-synapse-redis + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 +--- +# Source: matrix-synapse/templates/secrets.yaml +apiVersion: v1 +kind: Secret +metadata: + name: othrys-synapse-matrix-synapse + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm +stringData: + config.yaml: | + ## Registration ## + + registration_shared_secret: "PlxXRFAiRfLDp8RbAS6aHN7b" + + ## API Configuration ## + + ## Database configuration ## + + database: + name: "psycopg2" + args: + user: "synapse" + password: "@@POSTGRES_PASSWORD@@" + database: "synapse" + host: "postgres-service.postgres.svc.cluster.local" + port: 5432 + sslmode: "prefer" + cp_min: 5 + cp_max: 10 + + + ## Redis configuration ## + + redis: + enabled: true + host: "othrys-synapse-redis-master" + port: 6379 + password: "@@REDIS_PASSWORD@@" +--- +# Source: matrix-synapse/charts/redis/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-synapse-redis-configuration + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 +data: + redis.conf: |- + # User-supplied common configuration: + # Enable AOF https://redis.io/topics/persistence#append-only-file + appendonly yes + # Disable RDB persistence, AOF persistence already enabled. + save "" + # End of common configuration + master.conf: |- + dir /data + # User-supplied master configuration: + rename-command FLUSHDB "" + rename-command FLUSHALL "" + # End of master configuration + replica.conf: |- + dir /data + # User-supplied replica configuration: + rename-command FLUSHDB "" + rename-command FLUSHALL "" + # End of replica configuration +--- +# Source: matrix-synapse/charts/redis/templates/health-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-synapse-redis-health + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 +data: + ping_readiness_local.sh: |- + #!/bin/bash + + [[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" + [[ -n "$REDIS_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_PASSWORD" + response=$( + timeout -s 15 $1 \ + redis-cli \ + -h localhost \ + -p $REDIS_PORT \ + ping + ) + if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 + fi + if [ "$response" != "PONG" ]; then + echo "$response" + exit 1 + fi + ping_liveness_local.sh: |- + #!/bin/bash + + [[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" + [[ -n "$REDIS_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_PASSWORD" + response=$( + timeout -s 15 $1 \ + redis-cli \ + -h localhost \ + -p $REDIS_PORT \ + ping + ) + if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 + fi + responseFirstWord=$(echo $response | head -n1 | awk '{print $1;}') + if [ "$response" != "PONG" ] && [ "$responseFirstWord" != "LOADING" ] && [ "$responseFirstWord" != "MASTERDOWN" ]; then + echo "$response" + exit 1 + fi + ping_readiness_master.sh: |- + #!/bin/bash + + [[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")" + [[ -n "$REDIS_MASTER_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_MASTER_PASSWORD" + response=$( + timeout -s 15 $1 \ + redis-cli \ + -h $REDIS_MASTER_HOST \ + -p $REDIS_MASTER_PORT_NUMBER \ + ping + ) + if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 + fi + if [ "$response" != "PONG" ]; then + echo "$response" + exit 1 + fi + ping_liveness_master.sh: |- + #!/bin/bash + + [[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")" + [[ -n "$REDIS_MASTER_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_MASTER_PASSWORD" + response=$( + timeout -s 15 $1 \ + redis-cli \ + -h $REDIS_MASTER_HOST \ + -p $REDIS_MASTER_PORT_NUMBER \ + ping + ) + if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 + fi + responseFirstWord=$(echo $response | head -n1 | awk '{print $1;}') + if [ "$response" != "PONG" ] && [ "$responseFirstWord" != "LOADING" ]; then + echo "$response" + exit 1 + fi + ping_readiness_local_and_master.sh: |- + script_dir="$(dirname "$0")" + exit_status=0 + "$script_dir/ping_readiness_local.sh" $1 || exit_status=$? + "$script_dir/ping_readiness_master.sh" $1 || exit_status=$? + exit $exit_status + ping_liveness_local_and_master.sh: |- + script_dir="$(dirname "$0")" + exit_status=0 + "$script_dir/ping_liveness_local.sh" $1 || exit_status=$? + "$script_dir/ping_liveness_master.sh" $1 || exit_status=$? + exit $exit_status +--- +# Source: matrix-synapse/charts/redis/templates/scripts-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-synapse-redis-scripts + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 +data: + start-master.sh: | + #!/bin/bash + + [[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" + if [[ -f /opt/bitnami/redis/mounted-etc/master.conf ]];then + cp /opt/bitnami/redis/mounted-etc/master.conf /opt/bitnami/redis/etc/master.conf + fi + if [[ -f /opt/bitnami/redis/mounted-etc/redis.conf ]];then + cp /opt/bitnami/redis/mounted-etc/redis.conf /opt/bitnami/redis/etc/redis.conf + fi + ARGS=("--port" "${REDIS_PORT}") + ARGS+=("--requirepass" "${REDIS_PASSWORD}") + ARGS+=("--masterauth" "${REDIS_PASSWORD}") + ARGS+=("--include" "/opt/bitnami/redis/etc/redis.conf") + ARGS+=("--include" "/opt/bitnami/redis/etc/master.conf") + exec redis-server "${ARGS[@]}" +--- +# Source: matrix-synapse/templates/configuration.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-synapse-matrix-synapse + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm +data: + log.yaml: | + version: 1 + formatters: + precise: + format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s- %(message)s' + filters: + context: + (): synapse.util.logcontext.LoggingContextFilter + request: "" + handlers: + console: + class: logging.StreamHandler + formatter: precise + filters: [context] + level: INFO + loggers: + synapse: + level: INFO + root: + level: INFO + handlers: [console] + homeserver.yaml: | + # NOTE: + # Secrets are stored in separate configs to better fit K8s concepts + + ## Server ## + + server_name: "live.bstein.dev" + public_baseurl: "https://matrix.live.bstein.dev" + pid_file: /homeserver.pid + web_client: False + soft_file_limit: 0 + log_config: "/synapse/config/log.yaml" + report_stats: false + + instance_map: + main: + host: othrys-synapse-replication + port: 9093 + + ## Ports ## + + listeners: + - port: 8008 + tls: false + bind_addresses: ["::"] + type: http + x_forwarded: true + + resources: + - names: + - client + - federation + compress: false + + - port: 9090 + tls: false + bind_addresses: ["::"] + type: http + + resources: + - names: [metrics] + compress: false + + - port: 9093 + tls: false + bind_addresses: ["::"] + type: http + + resources: + - names: [replication] + compress: false + + ## Files ## + + media_store_path: "/synapse/data/media" + uploads_path: "/synapse/data/uploads" + + ## Registration ## + + enable_registration: false + + ## Metrics ### + + enable_metrics: true + + ## Signing Keys ## + + signing_key_path: "/synapse/keys/signing.key" + + # The trusted servers to download signing keys from. + trusted_key_servers: + - server_name: matrix.org + + ## Workers ## + + ## Extra config ## + + allow_guest_access: true + allow_public_rooms_without_auth: true + auto_join_rooms: + - "#othrys:live.bstein.dev" + autocreate_auto_join_rooms: true + default_room_version: "11" + experimental_features: + msc3266_enabled: true + msc4143_enabled: true + msc4222_enabled: true + max_event_delay_duration: 24h + password_config: + enabled: true + turn_uris: + - "turn:turn.live.bstein.dev:3478?transport=udp" + - "turn:turn.live.bstein.dev:3478?transport=tcp" + - "turns:turn.live.bstein.dev:5349?transport=tcp" + turn_shared_secret: "@@TURN_SECRET@@" + turn_allow_guests: true + turn_user_lifetime: 86400000 + rc_login: + address: + burst_count: 20 + per_second: 5 + account: + burst_count: 20 + per_second: 5 + failed_attempts: + burst_count: 20 + per_second: 5 + rc_message: + per_second: 0.5 + burst_count: 30 + rc_delayed_event_mgmt: + per_second: 1 + burst_count: 20 + room_list_publication_rules: + - action: allow + well_known_client: + "m.homeserver": + "base_url": "https://matrix.live.bstein.dev" + "org.matrix.msc4143.rtc_foci": + - type: "livekit" + livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" + oidc_enabled: true + oidc_providers: + - allow_existing_users: true + authorization_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth + client_auth_method: client_secret_post + client_id: synapse + client_secret: "@@OIDC_CLIENT_SECRET@@" + idp_id: keycloak + idp_name: Keycloak + issuer: https://sso.bstein.dev/realms/atlas + scopes: + - openid + - profile + - email + token_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token + user_mapping_provider: + config: + display_name_template: '{{ user.name }}' + localpart_template: '{{ user.preferred_username }}' + userinfo_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo +--- +# Source: matrix-synapse/templates/pvc.yaml +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: othrys-synapse-matrix-synapse + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm +spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "50Gi" + storageClassName: "asteria" +--- +# Source: matrix-synapse/charts/redis/templates/headless-svc.yaml +apiVersion: v1 +kind: Service +metadata: + name: othrys-synapse-redis-headless + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 + annotations: + +spec: + type: ClusterIP + clusterIP: None + ports: + - name: tcp-redis + port: 6379 + targetPort: redis + selector: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis +--- +# Source: matrix-synapse/charts/redis/templates/master/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: othrys-synapse-redis-master + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 + app.kubernetes.io/component: master +spec: + type: ClusterIP + internalTrafficPolicy: Cluster + sessionAffinity: None + ports: + - name: tcp-redis + port: 6379 + targetPort: redis + nodePort: null + selector: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + app.kubernetes.io/component: master +--- +# Source: matrix-synapse/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: othrys-synapse-matrix-synapse + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8008 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/component: synapse + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse +--- +# Source: matrix-synapse/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: othrys-synapse-replication + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9093 + targetPort: replication + protocol: TCP + name: replication + selector: + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/component: synapse +--- +# Source: matrix-synapse/charts/redis/templates/master/application.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: othrys-synapse-redis-master + namespace: "communication" + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 + app.kubernetes.io/component: master +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + app.kubernetes.io/component: master + strategy: + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 + app.kubernetes.io/component: master + annotations: + checksum/configmap: 86bcc953bb473748a3d3dc60b7c11f34e60c93519234d4c37f42e22ada559d47 + checksum/health: aff24913d801436ea469d8d374b2ddb3ec4c43ee7ab24663d5f8ff1a1b6991a9 + checksum/scripts: 560c33ff34d845009b51830c332aa05fa211444d1877d3526d3599be7543aaa5 + checksum/secret: 44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a + spec: + + securityContext: + fsGroup: 1001 + serviceAccountName: othrys-synapse-redis + automountServiceAccountToken: true + affinity: + podAffinity: + + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + app.kubernetes.io/component: master + topologyKey: kubernetes.io/hostname + weight: 1 + nodeAffinity: + + enableServiceLinks: true + terminationGracePeriodSeconds: 30 + containers: + - name: redis + image: docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34 + imagePullPolicy: "IfNotPresent" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsGroup: 0 + runAsNonRoot: true + runAsUser: 1001 + seccompProfile: + type: RuntimeDefault + command: + - /bin/bash + args: + - -c + - /opt/bitnami/scripts/start-scripts/start-master.sh + env: + - name: BITNAMI_DEBUG + value: "false" + - name: REDIS_REPLICATION_MODE + value: master + - name: ALLOW_EMPTY_PASSWORD + value: "no" + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: synapse-redis + key: redis-password + - name: REDIS_TLS_ENABLED + value: "no" + - name: REDIS_PORT + value: "6379" + ports: + - name: redis + containerPort: 6379 + livenessProbe: + initialDelaySeconds: 20 + periodSeconds: 5 + # One second longer than command timeout should prevent generation of zombie processes. + timeoutSeconds: 6 + successThreshold: 1 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /health/ping_liveness_local.sh 5 + readinessProbe: + initialDelaySeconds: 20 + periodSeconds: 5 + timeoutSeconds: 2 + successThreshold: 1 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /health/ping_readiness_local.sh 1 + resources: + limits: {} + requests: {} + volumeMounts: + - name: start-scripts + mountPath: /opt/bitnami/scripts/start-scripts + - name: health + mountPath: /health + - name: redis-data + mountPath: /data + - name: config + mountPath: /opt/bitnami/redis/mounted-etc + - name: redis-tmp-conf + mountPath: /opt/bitnami/redis/etc/ + - name: tmp + mountPath: /tmp + volumes: + - name: start-scripts + configMap: + name: othrys-synapse-redis-scripts + defaultMode: 0755 + - name: health + configMap: + name: othrys-synapse-redis-health + defaultMode: 0755 + - name: config + configMap: + name: othrys-synapse-redis-configuration + - name: redis-tmp-conf + emptyDir: {} + - name: tmp + emptyDir: {} + - name: redis-data + emptyDir: {} +--- +# Source: matrix-synapse/templates/deployment.yaml +# Server: live.bstein.dev +apiVersion: apps/v1 +kind: Deployment +metadata: + name: othrys-synapse-matrix-synapse + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: synapse +spec: + replicas: 1 + strategy: + type: RollingUpdate + selector: + matchLabels: + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/component: synapse + template: + metadata: + annotations: + checksum/config: manual-rtc-enable-1 + checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 + labels: + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/component: synapse + spec: + serviceAccountName: default + + securityContext: + fsGroup: 666 + runAsGroup: 666 + runAsUser: 666 + containers: + - name: synapse + command: + - sh + - -c + - | + export POSTGRES_PASSWORD=$(echo "${POSTGRES_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') && \ + export REDIS_PASSWORD=$(echo "${REDIS_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') && \ + export OIDC_CLIENT_SECRET_ESCAPED=$(echo "${OIDC_CLIENT_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ + export TURN_SECRET_ESCAPED=$(echo "${TURN_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ + cat /synapse/secrets/*.yaml | \ + sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ + -e "s/@@REDIS_PASSWORD@@/${REDIS_PASSWORD:-}/" \ + > /synapse/config/conf.d/secrets.yaml + + cp /synapse/config/homeserver.yaml /synapse/runtime-config/homeserver.yaml && \ + if [ -n "${OIDC_CLIENT_SECRET_ESCAPED}" ]; then \ + sed -i "s/@@OIDC_CLIENT_SECRET@@/${OIDC_CLIENT_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ + fi; \ + if [ -n "${TURN_SECRET_ESCAPED}" ]; then \ + sed -i "s/@@TURN_SECRET@@/${TURN_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ + fi + exec python -B -m synapse.app.homeserver \ + -c /synapse/runtime-config/homeserver.yaml \ + -c /synapse/config/conf.d/ + env: + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: synapse-redis + key: redis-password + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: synapse-oidc + key: client-secret + - name: TURN_SECRET + valueFrom: + secretKeyRef: + name: turn-shared-secret + key: TURN_STATIC_AUTH_SECRET + image: "ghcr.io/element-hq/synapse:v1.144.0" + imagePullPolicy: IfNotPresent + securityContext: + {} + ports: + - name: http + containerPort: 8008 + protocol: TCP + - name: replication + containerPort: 9093 + protocol: TCP + - name: metrics + containerPort: 9090 + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: http + readinessProbe: + httpGet: + path: /health + port: http + startupProbe: + failureThreshold: 12 + httpGet: + path: /health + port: http + volumeMounts: + - name: config + mountPath: /synapse/config + - name: runtime-config + mountPath: /synapse/runtime-config + - name: tmpconf + mountPath: /synapse/config/conf.d + - name: secrets + mountPath: /synapse/secrets + - name: signingkey + mountPath: /synapse/keys + - name: media + mountPath: /synapse/data + - name: tmpdir + mountPath: /tmp + resources: + limits: + cpu: "2" + memory: 3Gi + requests: + cpu: 500m + memory: 1Gi + volumes: + - name: config + configMap: + name: othrys-synapse-matrix-synapse + - name: secrets + secret: + secretName: othrys-synapse-matrix-synapse + - name: signingkey + secret: + secretName: "othrys-synapse-signingkey" + items: + - key: "signing.key" + path: signing.key + - name: tmpconf + emptyDir: {} + - name: tmpdir + emptyDir: {} + - name: runtime-config + emptyDir: {} + - name: media + persistentVolumeClaim: + claimName: othrys-synapse-matrix-synapse + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + weight: 50 +--- +# Source: matrix-synapse/templates/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: othrys-synapse-matrix-synapse + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure +spec: + ingressClassName: traefik + tls: + - hosts: + - "matrix.live.bstein.dev" + - "live.bstein.dev" + secretName: matrix-live-tls + rules: + - host: "live.bstein.dev" + http: + paths: + - path: /_matrix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + pathType: Prefix + - path: /.well-known/matrix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + pathType: Prefix + - host: "matrix.live.bstein.dev" + http: + paths: + - path: /_matrix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + pathType: Prefix + - path: /_synapse + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + pathType: Prefix + - host: "bstein.dev" + http: + paths: + - path: /.well-known/matrix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + pathType: Prefix +--- +# Source: matrix-synapse/templates/signing-key-job.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: othrys-synapse-signingkey-job + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: signingkey-job + annotations: + helm.sh/hook: pre-install + helm.sh/hook-delete-policy: hook-succeeded +--- +# Source: matrix-synapse/templates/signing-key-job.yaml +# Create secret if signing key job is enabled, or if we're running in ArgoCD and we don't have an existing secret +apiVersion: v1 +kind: Secret +metadata: + annotations: + helm.sh/hook: pre-install + helm.sh/hook-delete-policy: never + helm.sh/resource-policy: keep + # If for some reason we didn't detect ArgoCD, but are running in it, we want to make sure we don't delete the secret + argocd.argoproj.io/hook: Skip + name: othrys-synapse-signingkey + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: signingkey-job +--- +# Source: matrix-synapse/templates/signing-key-job.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: othrys-synapse-matrix-synapse-scripts + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + annotations: + helm.sh/hook: pre-install + helm.sh/hook-delete-policy: hook-succeeded +data: + signing-key.sh: | + #!/bin/sh + + set -eu + + check_key() { + set +e + + echo "Checking for existing signing key..." + key="$(kubectl get secret "$SECRET_NAME" -o jsonpath="{.data['signing\.key']}" 2> /dev/null)" + [ $? -ne 0 ] && return 1 + [ -z "$key" ] && return 2 + return 0 + } + + create_key() { + echo "Waiting for new signing key to be generated..." + begin=$(date +%s) + end=$((begin + 300)) # 5 minutes + while true; do + [ -f /synapse/keys/signing.key ] && return 0 + [ "$(date +%s)" -gt $end ] && return 1 + sleep 5 + done + } + + store_key() { + echo "Storing signing key in Kubernetes secret..." + kubectl patch secret "$SECRET_NAME" -p "{\"data\":{\"signing.key\":\"$(base64 /synapse/keys/signing.key | tr -d '\n')\"}}" + } + + if check_key; then + echo "Key already in place, exiting." + exit + fi + + if ! create_key; then + echo "Timed out waiting for a signing key to appear." + exit 1 + fi + + store_key +--- +# Source: matrix-synapse/templates/signing-key-job.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: othrys-synapse-signingkey-job + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: signingkey-job + annotations: + helm.sh/hook: pre-install + helm.sh/hook-delete-policy: hook-succeeded +rules: + - apiGroups: + - "" + resources: + - secrets + resourceNames: + - othrys-synapse-signingkey + verbs: + - get + - update + - patch +--- +# Source: matrix-synapse/templates/signing-key-job.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: othrys-synapse-signingkey-job + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: signingkey-job + annotations: + helm.sh/hook: pre-install + helm.sh/hook-delete-policy: hook-succeeded +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: othrys-synapse-signingkey-job +subjects: + - kind: ServiceAccount + name: othrys-synapse-signingkey-job + namespace: communication +--- +# Source: matrix-synapse/templates/tests/test-connection.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "othrys-synapse-matrix-synapse-test-connection" + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + annotations: + "helm.sh/hook": test-success +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['othrys-synapse-matrix-synapse:8008/_matrix/client/versions'] + restartPolicy: Never +--- +# Source: matrix-synapse/templates/signing-key-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: othrys-synapse-signingkey-job + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: signingkey-job + annotations: + helm.sh/hook: pre-install + helm.sh/hook-delete-policy: hook-succeeded +spec: + ttlSecondsAfterFinished: 0 + template: + metadata: + labels: + helm.sh/chart: matrix-synapse-3.12.17 + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/version: "1.144.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: signingkey-job + spec: + containers: + - command: + - sh + - -c + - | + echo "Generating signing key..." + if which generate_signing_key.py >/dev/null; then + generate_signing_key.py -o /synapse/keys/signing.key + else + generate_signing_key -o /synapse/keys/signing.key + fi + image: "matrixdotorg/synapse:latest" + imagePullPolicy: IfNotPresent + name: signing-key-generate + resources: + {} + securityContext: + {} + volumeMounts: + - mountPath: /synapse/keys + name: matrix-synapse-keys + - command: + - sh + - -c + - | + printf "Checking rights to update secret... " + kubectl auth can-i update secret/${SECRET_NAME} + /scripts/signing-key.sh + env: + - name: SECRET_NAME + value: othrys-synapse-signingkey + image: "bitnami/kubectl:latest" + imagePullPolicy: IfNotPresent + name: signing-key-upload + resources: + {} + securityContext: + {} + volumeMounts: + - mountPath: /scripts + name: scripts + readOnly: true + - mountPath: /synapse/keys + name: matrix-synapse-keys + readOnly: true + securityContext: + {} + restartPolicy: Never + serviceAccount: othrys-synapse-signingkey-job + volumes: + - name: scripts + configMap: + name: othrys-synapse-matrix-synapse-scripts + defaultMode: 0755 + - name: matrix-synapse-keys + emptyDir: {} + parallelism: 1 + completions: 1 + backoffLimit: 1 diff --git a/services/communication/values-element.yaml b/services/communication/values-element.yaml new file mode 100644 index 0000000..9ab91de --- /dev/null +++ b/services/communication/values-element.yaml @@ -0,0 +1,59 @@ +# services/communication/values-element.yaml +replicaCount: 1 + +defaultServer: + url: https://matrix.live.bstein.dev + name: live.bstein.dev + +config: + default_theme: dark + brand: Othrys + disable_custom_urls: true + disable_login_language_selector: true + disable_guests: false + show_labs_settings: true + features: + feature_group_calls: true + feature_video_rooms: true + feature_element_call_video_rooms: true + room_directory: + servers: + - live.bstein.dev + jitsi: {} + element_call: + url: https://call.live.bstein.dev + participant_limit: 16 + brand: Othrys Call + +ingress: + enabled: true + className: traefik + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + hosts: + - live.bstein.dev + tls: + - secretName: live-othrys-tls + hosts: [live.bstein.dev] + +resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + +nodeSelector: + hardware: rpi5 + +affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] diff --git a/services/communication/values-synapse.yaml b/services/communication/values-synapse.yaml new file mode 100644 index 0000000..7df16b6 --- /dev/null +++ b/services/communication/values-synapse.yaml @@ -0,0 +1,132 @@ +# services/communication/values-synapse.yaml +serverName: live.bstein.dev +publicServerName: matrix.live.bstein.dev + +config: + publicBaseurl: https://matrix.live.bstein.dev + +externalPostgresql: + host: postgres-service.postgres.svc.cluster.local + port: 5432 + username: synapse + existingSecret: synapse-db + existingSecretPasswordKey: POSTGRES_PASSWORD + database: synapse + +redis: + enabled: true + auth: + enabled: true + existingSecret: synapse-redis + existingSecretPasswordKey: redis-password + +postgresql: + enabled: false + +persistence: + enabled: true + storageClass: asteria + accessMode: ReadWriteOnce + size: 50Gi + +synapse: + podSecurityContext: + fsGroup: 666 + runAsUser: 666 + runAsGroup: 666 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "2" + memory: 3Gi + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + +ingress: + enabled: true + className: traefik + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + csHosts: + - matrix.live.bstein.dev + hosts: + - matrix.live.bstein.dev + wkHosts: + - live.bstein.dev + - bstein.dev + tls: + - secretName: matrix-live-tls + hosts: + - matrix.live.bstein.dev + - live.bstein.dev + +extraConfig: + allow_guest_access: true + allow_public_rooms_without_auth: true + auto_join_rooms: + - "#othrys:live.bstein.dev" + autocreate_auto_join_rooms: true + default_room_version: "11" + experimental_features: + msc3266_enabled: true + msc4143_enabled: true + msc4222_enabled: true + max_event_delay_duration: 24h + password_config: + enabled: true + oidc_enabled: true + oidc_providers: + - idp_id: keycloak + idp_name: Keycloak + issuer: https://sso.bstein.dev/realms/atlas + client_id: synapse + client_secret: "@@OIDC_CLIENT_SECRET@@" + client_auth_method: client_secret_post + scopes: ["openid", "profile", "email"] + authorization_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth + token_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token + userinfo_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo + user_mapping_provider: + config: + localpart_template: "{{ user.preferred_username }}" + display_name_template: "{{ user.name }}" + allow_existing_users: true + rc_message: + per_second: 0.5 + burst_count: 30 + rc_delayed_event_mgmt: + per_second: 1 + burst_count: 20 + rc_login: + address: + burst_count: 20 + per_second: 5 + account: + burst_count: 20 + per_second: 5 + failed_attempts: + burst_count: 20 + per_second: 5 + room_list_publication_rules: + - action: allow + well_known_client: + "m.homeserver": + "base_url": "https://matrix.live.bstein.dev" + "org.matrix.msc4143.rtc_foci": + - type: "livekit" + livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" + +worker: + enabled: false diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml new file mode 100644 index 0000000..655746a --- /dev/null +++ b/services/communication/wellknown.yaml @@ -0,0 +1,109 @@ +# services/communication/wellknown.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: matrix-wellknown + namespace: communication +data: + client.json: | + { + "m.homeserver": { + "base_url": "https://matrix.live.bstein.dev" + }, + "org.matrix.msc4143.rtc_foci": [ + { + "type": "livekit", + "livekit_service_url": "https://kit.live.bstein.dev/livekit/jwt" + } + ] + } + server.json: | + { + "m.server": "live.bstein.dev:443" + } +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: matrix-wellknown + namespace: communication + labels: + app: matrix-wellknown +spec: + replicas: 1 + selector: + matchLabels: + app: matrix-wellknown + template: + metadata: + labels: + app: matrix-wellknown + spec: + containers: + - name: nginx + image: nginx:1.27-alpine + ports: + - containerPort: 80 + volumeMounts: + - name: wellknown + mountPath: /usr/share/nginx/html/.well-known/matrix/client + subPath: client.json + - name: wellknown + mountPath: /usr/share/nginx/html/.well-known/matrix/server + subPath: server.json + volumes: + - name: wellknown + configMap: + name: matrix-wellknown + items: + - key: client.json + path: client.json + - key: server.json + path: server.json +--- +apiVersion: v1 +kind: Service +metadata: + name: matrix-wellknown + namespace: communication +spec: + selector: + app: matrix-wellknown + ports: + - name: http + port: 80 + targetPort: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: matrix-wellknown + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - live.bstein.dev + secretName: live-othrys-tls + rules: + - host: live.bstein.dev + http: + paths: + - path: /.well-known/matrix/client + pathType: Prefix + backend: + service: + name: matrix-wellknown + port: + number: 80 + - path: /.well-known/matrix/server + pathType: Prefix + backend: + service: + name: matrix-wellknown + port: + number: 80 diff --git a/services/mailu/front-lb.yaml b/services/mailu/front-lb.yaml new file mode 100644 index 0000000..ada16b0 --- /dev/null +++ b/services/mailu/front-lb.yaml @@ -0,0 +1,42 @@ +# services/mailu/front-lb.yaml +apiVersion: v1 +kind: Service +metadata: + name: mailu-front-lb + namespace: mailu-mailserver + annotations: + metallb.universe.tf/address-pool: communication-pool +spec: + type: LoadBalancer + loadBalancerClass: metallb + loadBalancerIP: 192.168.22.4 + externalTrafficPolicy: Cluster + selector: + app.kubernetes.io/component: front + app.kubernetes.io/instance: mailu + app.kubernetes.io/name: mailu + ports: + - name: smtp + port: 25 + targetPort: 25 + protocol: TCP + - name: smtps + port: 465 + targetPort: 465 + protocol: TCP + - name: submission + port: 587 + targetPort: 587 + protocol: TCP + - name: imaps + port: 993 + targetPort: 993 + protocol: TCP + - name: pop3s + port: 995 + targetPort: 995 + protocol: TCP + - name: sieve + port: 4190 + targetPort: 4190 + protocol: TCP diff --git a/services/mailu/kustomization.yaml b/services/mailu/kustomization.yaml index 2df7440..a23e0b1 100644 --- a/services/mailu/kustomization.yaml +++ b/services/mailu/kustomization.yaml @@ -13,6 +13,7 @@ resources: - mailu-sync-job.yaml - mailu-sync-cronjob.yaml - mailu-sync-listener.yaml + - front-lb.yaml configMapGenerator: - name: mailu-sync-script -- 2.47.2 From 71c58ee08185fe538061fb8eba0afff9409f9ee8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 12:11:54 -0300 Subject: [PATCH 115/684] communication: disable livekit room auto-create --- services/communication/livekit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index e9c13bb..364132a 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -18,4 +18,4 @@ data: port: 3478 protocol: udp room: - auto_create: true + auto_create: false -- 2.47.2 From 6759817518c68bb5f0298e7170ede2f736b689aa Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 12:15:18 -0300 Subject: [PATCH 116/684] communication: stage guest-helper for prune --- services/communication/guest-helper.yaml | 239 ++++++++++++++++++ services/communication/kustomization.yaml | 2 + .../synapse-federation-service.yaml | 15 ++ 3 files changed, 256 insertions(+) create mode 100644 services/communication/guest-helper.yaml create mode 100644 services/communication/synapse-federation-service.yaml diff --git a/services/communication/guest-helper.yaml b/services/communication/guest-helper.yaml new file mode 100644 index 0000000..a390b75 --- /dev/null +++ b/services/communication/guest-helper.yaml @@ -0,0 +1,239 @@ +# services/communication/guest-helper.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: guest-helper +data: + app.py: | + import os, uuid, string, random + import requests + from fastapi import FastAPI, HTTPException, Header + from pydantic import BaseModel + import uvicorn + + BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008") + SEED_USER = os.environ["SEEDER_USER"] + SEED_PASS = os.environ["SEEDER_PASS"] + SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") + ELEMENT_URL = os.environ.get("ELEMENT_URL", "https://live.bstein.dev") + ADMIN_TOKEN = os.environ.get("ADMIN_TOKEN") + + app = FastAPI(title="Guest Helper", version="0.1.0") + + + class InviteRequest(BaseModel): + room: str # room_id or alias + display_name: str | None = None + + + def login(user, password): + res = requests.post( + f"{BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }, + timeout=10, + ) + if res.status_code != 200: + raise HTTPException(status_code=500, detail="seeder login failed") + return res.json()["access_token"] + + + def resolve_room(token, room): + headers = {"Authorization": f"Bearer {token}"} + if room.startswith("!"): + return room + if room.startswith("#"): + alias_enc = requests.utils.requote_uri(room) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers, timeout=10) + if r.status_code != 200: + raise HTTPException(status_code=400, detail="room alias not found") + return r.json()["room_id"] + raise HTTPException(status_code=400, detail="room must be room_id or alias") + + + def random_pwd(): + alphabet = string.ascii_letters + string.digits + return "".join(random.choice(alphabet) for _ in range(20)) + + + def create_guest(token, display): + uid = f"@guest-{uuid.uuid4().hex[:8]}:{SERVER_NAME}" + pwd = random_pwd() + headers = {"Authorization": f"Bearer {token}"} + body = { + "password": pwd, + "displayname": display or "Guest", + "admin": False, + "deactivated": False, + } + r = requests.put( + f"{BASE}/_synapse/admin/v2/users/{requests.utils.requote_uri(uid)}", + headers=headers, + json=body, + timeout=10, + ) + if r.status_code not in (200, 201): + raise HTTPException(status_code=500, detail=f"user create failed: {r.text}") + return uid, pwd + + + def join_room_as(token, room_id, user_id): + headers = {"Authorization": f"Bearer {token}"} + r = requests.post( + f"{BASE}/_synapse/admin/v1/join/{requests.utils.requote_uri(room_id)}", + headers=headers, + json={"user_id": user_id}, + timeout=10, + ) + if r.status_code not in (200, 202): + raise HTTPException(status_code=500, detail=f"join failed: {r.text}") + + + def login_token(user, password): + r = requests.post( + f"{BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }, + timeout=10, + ) + if r.status_code != 200: + raise HTTPException(status_code=500, detail="guest login failed") + data = r.json() + return data["access_token"] + + + @app.post("/invite") + def invite(req: InviteRequest, x_admin_token: str | None = Header(default=None)): + if ADMIN_TOKEN and x_admin_token != ADMIN_TOKEN: + raise HTTPException(status_code=401, detail="unauthorized") + admin_token = login(SEED_USER, SEED_PASS) + room_id = resolve_room(admin_token, req.room) + guest_id, pwd = create_guest(admin_token, req.display_name) + join_room_as(admin_token, room_id, guest_id) + guest_token = login_token(guest_id, pwd) + join_url = f"{ELEMENT_URL}/#/room/{room_id}?access_token={guest_token}&user_id={guest_id}" + return { + "user_id": guest_id, + "password": pwd, + "room_id": room_id, + "access_token": guest_token, + "join_url": join_url, + } + + + def main(): + uvicorn.run(app, host="0.0.0.0", port=8081) + + + if __name__ == "__main__": + main() +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: guest-helper + labels: + app: guest-helper +spec: + replicas: 1 + selector: + matchLabels: + app: guest-helper + template: + metadata: + labels: + app: guest-helper + spec: + nodeSelector: + hardware: rpi5 + containers: + - name: api + image: python:3.11-slim + command: + - /bin/sh + - -c + - | + pip install --no-cache-dir fastapi uvicorn requests && \ + python /app/app.py + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials + key: seeder-password + - name: SERVER_NAME + value: live.bstein.dev + - name: ELEMENT_URL + value: https://live.bstein.dev + - name: ADMIN_TOKEN + valueFrom: + secretKeyRef: + name: guest-helper-admin + key: ADMIN_TOKEN + optional: true + ports: + - name: http + containerPort: 8081 + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 300m + memory: 256Mi + volumeMounts: + - name: code + mountPath: /app/app.py + subPath: app.py + volumes: + - name: code + configMap: + name: guest-helper +--- +apiVersion: v1 +kind: Service +metadata: + name: guest-helper +spec: + selector: + app: guest-helper + ports: + - name: http + port: 8081 + targetPort: 8081 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: guest-helper + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - live.bstein.dev + secretName: live-othrys-tls + rules: + - host: live.bstein.dev + http: + paths: + - path: /guest-helper + pathType: Prefix + backend: + service: + name: guest-helper + port: + number: 8081 diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 9cd7f38..3c5f115 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -22,3 +22,5 @@ resources: - atlasbot-deployment.yaml - seed-othrys-room.yaml - wellknown.yaml + - synapse-federation-service.yaml + - guest-helper.yaml diff --git a/services/communication/synapse-federation-service.yaml b/services/communication/synapse-federation-service.yaml new file mode 100644 index 0000000..5417c9e --- /dev/null +++ b/services/communication/synapse-federation-service.yaml @@ -0,0 +1,15 @@ +# services/communication/synapse-federation-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: synapse-federation +spec: + clusterIP: 10.43.60.60 + selector: + app.kubernetes.io/name: matrix-synapse + app.kubernetes.io/instance: othrys-synapse + ports: + - name: federation + port: 8448 + targetPort: 8008 + protocol: TCP -- 2.47.2 From 5ed650d19c04b644a847607050bca682159e6290 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 12:16:59 -0300 Subject: [PATCH 117/684] communication: prune guest-helper and synapse-federation --- services/communication/guest-helper.yaml | 239 ------------------ services/communication/kustomization.yaml | 2 - .../synapse-federation-service.yaml | 15 -- 3 files changed, 256 deletions(-) delete mode 100644 services/communication/guest-helper.yaml delete mode 100644 services/communication/synapse-federation-service.yaml diff --git a/services/communication/guest-helper.yaml b/services/communication/guest-helper.yaml deleted file mode 100644 index a390b75..0000000 --- a/services/communication/guest-helper.yaml +++ /dev/null @@ -1,239 +0,0 @@ -# services/communication/guest-helper.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: guest-helper -data: - app.py: | - import os, uuid, string, random - import requests - from fastapi import FastAPI, HTTPException, Header - from pydantic import BaseModel - import uvicorn - - BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008") - SEED_USER = os.environ["SEEDER_USER"] - SEED_PASS = os.environ["SEEDER_PASS"] - SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") - ELEMENT_URL = os.environ.get("ELEMENT_URL", "https://live.bstein.dev") - ADMIN_TOKEN = os.environ.get("ADMIN_TOKEN") - - app = FastAPI(title="Guest Helper", version="0.1.0") - - - class InviteRequest(BaseModel): - room: str # room_id or alias - display_name: str | None = None - - - def login(user, password): - res = requests.post( - f"{BASE}/_matrix/client/v3/login", - json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }, - timeout=10, - ) - if res.status_code != 200: - raise HTTPException(status_code=500, detail="seeder login failed") - return res.json()["access_token"] - - - def resolve_room(token, room): - headers = {"Authorization": f"Bearer {token}"} - if room.startswith("!"): - return room - if room.startswith("#"): - alias_enc = requests.utils.requote_uri(room) - r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers, timeout=10) - if r.status_code != 200: - raise HTTPException(status_code=400, detail="room alias not found") - return r.json()["room_id"] - raise HTTPException(status_code=400, detail="room must be room_id or alias") - - - def random_pwd(): - alphabet = string.ascii_letters + string.digits - return "".join(random.choice(alphabet) for _ in range(20)) - - - def create_guest(token, display): - uid = f"@guest-{uuid.uuid4().hex[:8]}:{SERVER_NAME}" - pwd = random_pwd() - headers = {"Authorization": f"Bearer {token}"} - body = { - "password": pwd, - "displayname": display or "Guest", - "admin": False, - "deactivated": False, - } - r = requests.put( - f"{BASE}/_synapse/admin/v2/users/{requests.utils.requote_uri(uid)}", - headers=headers, - json=body, - timeout=10, - ) - if r.status_code not in (200, 201): - raise HTTPException(status_code=500, detail=f"user create failed: {r.text}") - return uid, pwd - - - def join_room_as(token, room_id, user_id): - headers = {"Authorization": f"Bearer {token}"} - r = requests.post( - f"{BASE}/_synapse/admin/v1/join/{requests.utils.requote_uri(room_id)}", - headers=headers, - json={"user_id": user_id}, - timeout=10, - ) - if r.status_code not in (200, 202): - raise HTTPException(status_code=500, detail=f"join failed: {r.text}") - - - def login_token(user, password): - r = requests.post( - f"{BASE}/_matrix/client/v3/login", - json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }, - timeout=10, - ) - if r.status_code != 200: - raise HTTPException(status_code=500, detail="guest login failed") - data = r.json() - return data["access_token"] - - - @app.post("/invite") - def invite(req: InviteRequest, x_admin_token: str | None = Header(default=None)): - if ADMIN_TOKEN and x_admin_token != ADMIN_TOKEN: - raise HTTPException(status_code=401, detail="unauthorized") - admin_token = login(SEED_USER, SEED_PASS) - room_id = resolve_room(admin_token, req.room) - guest_id, pwd = create_guest(admin_token, req.display_name) - join_room_as(admin_token, room_id, guest_id) - guest_token = login_token(guest_id, pwd) - join_url = f"{ELEMENT_URL}/#/room/{room_id}?access_token={guest_token}&user_id={guest_id}" - return { - "user_id": guest_id, - "password": pwd, - "room_id": room_id, - "access_token": guest_token, - "join_url": join_url, - } - - - def main(): - uvicorn.run(app, host="0.0.0.0", port=8081) - - - if __name__ == "__main__": - main() ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: guest-helper - labels: - app: guest-helper -spec: - replicas: 1 - selector: - matchLabels: - app: guest-helper - template: - metadata: - labels: - app: guest-helper - spec: - nodeSelector: - hardware: rpi5 - containers: - - name: api - image: python:3.11-slim - command: - - /bin/sh - - -c - - | - pip install --no-cache-dir fastapi uvicorn requests && \ - python /app/app.py - env: - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: SEEDER_USER - value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials - key: seeder-password - - name: SERVER_NAME - value: live.bstein.dev - - name: ELEMENT_URL - value: https://live.bstein.dev - - name: ADMIN_TOKEN - valueFrom: - secretKeyRef: - name: guest-helper-admin - key: ADMIN_TOKEN - optional: true - ports: - - name: http - containerPort: 8081 - resources: - requests: - cpu: 50m - memory: 128Mi - limits: - cpu: 300m - memory: 256Mi - volumeMounts: - - name: code - mountPath: /app/app.py - subPath: app.py - volumes: - - name: code - configMap: - name: guest-helper ---- -apiVersion: v1 -kind: Service -metadata: - name: guest-helper -spec: - selector: - app: guest-helper - ports: - - name: http - port: 8081 - targetPort: 8081 ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: guest-helper - annotations: - kubernetes.io/ingress.class: traefik - traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.tls: "true" - cert-manager.io/cluster-issuer: letsencrypt -spec: - tls: - - hosts: - - live.bstein.dev - secretName: live-othrys-tls - rules: - - host: live.bstein.dev - http: - paths: - - path: /guest-helper - pathType: Prefix - backend: - service: - name: guest-helper - port: - number: 8081 diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 3c5f115..9cd7f38 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -22,5 +22,3 @@ resources: - atlasbot-deployment.yaml - seed-othrys-room.yaml - wellknown.yaml - - synapse-federation-service.yaml - - guest-helper.yaml diff --git a/services/communication/synapse-federation-service.yaml b/services/communication/synapse-federation-service.yaml deleted file mode 100644 index 5417c9e..0000000 --- a/services/communication/synapse-federation-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# services/communication/synapse-federation-service.yaml -apiVersion: v1 -kind: Service -metadata: - name: synapse-federation -spec: - clusterIP: 10.43.60.60 - selector: - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - ports: - - name: federation - port: 8448 - targetPort: 8008 - protocol: TCP -- 2.47.2 From a815322f6eacce64e2a71e38d7df819488cd9b7c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 13:25:45 -0300 Subject: [PATCH 118/684] comms: move LiveKit media to UDP 443 --- services/communication/coturn.yaml | 9 +++++++++ services/communication/livekit-config.yaml | 4 ++-- services/communication/livekit.yaml | 13 +++---------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/services/communication/coturn.yaml b/services/communication/coturn.yaml index 73a4d14..9b06a38 100644 --- a/services/communication/coturn.yaml +++ b/services/communication/coturn.yaml @@ -37,6 +37,9 @@ spec: --no-cli \ --fingerprint \ --lt-cred-mech \ + --listening-ip=0.0.0.0 \ + --relay-ip="${POD_IP}" \ + --external-ip="${TURN_PUBLIC_IP}/${POD_IP}" \ --user=livekit:"${TURN_STATIC_AUTH_SECRET}" \ --realm=live.bstein.dev \ --listening-port=3478 \ @@ -48,6 +51,12 @@ spec: --log-file=stdout \ --no-software-attribute env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: TURN_PUBLIC_IP + value: "38.28.125.112" - name: TURN_STATIC_AUTH_SECRET valueFrom: secretKeyRef: diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index 364132a..83be194 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -7,8 +7,8 @@ data: livekit.yaml: | port: 7880 rtc: - udp_port: 7882 - tcp_port: 7881 + udp_port: 443 + tcp_port: 0 use_external_ip: true turn_servers: - host: turn.live.bstein.dev diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index 1f3d7e9..4ec5b2d 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -65,10 +65,7 @@ spec: - containerPort: 7880 name: http protocol: TCP - - containerPort: 7881 - name: tcp-media - protocol: TCP - - containerPort: 7882 + - containerPort: 443 name: udp-media protocol: UDP volumeMounts: @@ -110,11 +107,7 @@ spec: port: 7880 targetPort: 7880 protocol: TCP - - name: tcp-media - port: 7881 - targetPort: 7881 - protocol: TCP - name: udp-media - port: 7882 - targetPort: 7882 + port: 443 + targetPort: 443 protocol: UDP -- 2.47.2 From ee6bcec3c5156f027ed7d7f40f208d1f1d216f40 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 13:43:24 -0300 Subject: [PATCH 119/684] chat.ai: gate root with API key --- .../chat-ai-gateway-configmap.yaml | 78 +++++++++++++++++++ .../chat-ai-gateway-deployment.yaml | 69 ++++++++++++++++ .../chat-ai-gateway-service.yaml | 13 ++++ services/bstein-dev-home/ingress.yaml | 8 +- services/bstein-dev-home/kustomization.yaml | 3 + .../communication/atlasbot-deployment.yaml | 2 +- services/communication/chat-ai-keys.yaml | 9 --- services/communication/kustomization.yaml | 1 - 8 files changed, 165 insertions(+), 18 deletions(-) create mode 100644 services/bstein-dev-home/chat-ai-gateway-configmap.yaml create mode 100644 services/bstein-dev-home/chat-ai-gateway-deployment.yaml create mode 100644 services/bstein-dev-home/chat-ai-gateway-service.yaml delete mode 100644 services/communication/chat-ai-keys.yaml diff --git a/services/bstein-dev-home/chat-ai-gateway-configmap.yaml b/services/bstein-dev-home/chat-ai-gateway-configmap.yaml new file mode 100644 index 0000000..17ed95b --- /dev/null +++ b/services/bstein-dev-home/chat-ai-gateway-configmap.yaml @@ -0,0 +1,78 @@ +# services/bstein-dev-home/chat-ai-gateway-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: chat-ai-gateway + namespace: bstein-dev-home +data: + gateway.py: | + import json + import os + from http.server import BaseHTTPRequestHandler, HTTPServer + from urllib import request, error + + UPSTREAM = os.environ.get("UPSTREAM_URL", "http://bstein-dev-home-backend/api/chat") + KEY_MATRIX = os.environ.get("CHAT_KEY_MATRIX", "") + KEY_HOMEPAGE = os.environ.get("CHAT_KEY_HOMEPAGE", "") + + ALLOWED = {k for k in (KEY_MATRIX, KEY_HOMEPAGE) if k} + + class Handler(BaseHTTPRequestHandler): + def _send_json(self, code: int, payload: dict): + body = json.dumps(payload).encode() + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): # noqa: N802 + if self.path in ("/healthz", "/"): + return self._send_json(200, {"ok": True}) + return self._send_json(404, {"error": "not_found"}) + + def do_POST(self): # noqa: N802 + if self.path != "/": + return self._send_json(404, {"error": "not_found"}) + + key = self.headers.get("x-api-key", "") + if not key or key not in ALLOWED: + return self._send_json(401, {"error": "unauthorized"}) + + length = int(self.headers.get("content-length", "0") or "0") + raw = self.rfile.read(length) if length else b"{}" + + try: + upstream_req = request.Request( + UPSTREAM, + data=raw, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with request.urlopen(upstream_req, timeout=90) as resp: + data = resp.read() + self.send_response(resp.status) + for k, v in resp.headers.items(): + if k.lower() in ("content-length", "connection", "server", "date"): + continue + self.send_header(k, v) + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + except error.HTTPError as e: + data = e.read() if hasattr(e, "read") else b"" + self.send_response(e.code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + except Exception: + return self._send_json(502, {"error": "bad_gateway"}) + + def main(): + port = int(os.environ.get("PORT", "8080")) + httpd = HTTPServer(("0.0.0.0", port), Handler) + httpd.serve_forever() + + if __name__ == "__main__": + main() diff --git a/services/bstein-dev-home/chat-ai-gateway-deployment.yaml b/services/bstein-dev-home/chat-ai-gateway-deployment.yaml new file mode 100644 index 0000000..7ac6504 --- /dev/null +++ b/services/bstein-dev-home/chat-ai-gateway-deployment.yaml @@ -0,0 +1,69 @@ +# services/bstein-dev-home/chat-ai-gateway-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chat-ai-gateway + namespace: bstein-dev-home +spec: + replicas: 1 + revisionHistoryLimit: 2 + selector: + matchLabels: + app: chat-ai-gateway + template: + metadata: + labels: + app: chat-ai-gateway + spec: + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: gateway + image: python:3.11-slim + command: ["/bin/sh","-c"] + args: + - python /app/gateway.py + env: + - name: UPSTREAM_URL + value: http://bstein-dev-home-backend/api/chat + - name: CHAT_KEY_MATRIX + valueFrom: + secretKeyRef: + name: chat-ai-keys-runtime + key: matrix + - name: CHAT_KEY_HOMEPAGE + valueFrom: + secretKeyRef: + name: chat-ai-keys-runtime + key: homepage + ports: + - name: http + containerPort: 8080 + readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 2 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + resources: + requests: + cpu: 20m + memory: 64Mi + limits: + cpu: 200m + memory: 256Mi + volumeMounts: + - name: code + mountPath: /app/gateway.py + subPath: gateway.py + volumes: + - name: code + configMap: + name: chat-ai-gateway diff --git a/services/bstein-dev-home/chat-ai-gateway-service.yaml b/services/bstein-dev-home/chat-ai-gateway-service.yaml new file mode 100644 index 0000000..8a71d20 --- /dev/null +++ b/services/bstein-dev-home/chat-ai-gateway-service.yaml @@ -0,0 +1,13 @@ +# services/bstein-dev-home/chat-ai-gateway-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: chat-ai-gateway + namespace: bstein-dev-home +spec: + selector: + app: chat-ai-gateway + ports: + - name: http + port: 80 + targetPort: 8080 diff --git a/services/bstein-dev-home/ingress.yaml b/services/bstein-dev-home/ingress.yaml index 872a0df..1537c94 100644 --- a/services/bstein-dev-home/ingress.yaml +++ b/services/bstein-dev-home/ingress.yaml @@ -32,15 +32,9 @@ spec: - host: chat.ai.bstein.dev http: paths: - - path: /api - pathType: Prefix - backend: - service: - name: bstein-dev-home-backend - port: { number: 80 } - path: / pathType: Prefix backend: service: - name: bstein-dev-home-frontend + name: chat-ai-gateway port: { number: 80 } diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index e15af3e..99b9443 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -6,6 +6,9 @@ resources: - namespace.yaml - image.yaml - rbac.yaml + - chat-ai-gateway-configmap.yaml + - chat-ai-gateway-deployment.yaml + - chat-ai-gateway-service.yaml - frontend-deployment.yaml - frontend-service.yaml - backend-deployment.yaml diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index bd39ae7..55f1d2d 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -38,7 +38,7 @@ spec: - name: CHAT_API_KEY valueFrom: secretKeyRef: - name: chat-ai-keys + name: chat-ai-keys-runtime key: matrix - name: OLLAMA_URL value: https://chat.ai.bstein.dev/ diff --git a/services/communication/chat-ai-keys.yaml b/services/communication/chat-ai-keys.yaml deleted file mode 100644 index ac6c4e8..0000000 --- a/services/communication/chat-ai-keys.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# services/communication/chat-ai-keys.yaml -apiVersion: v1 -kind: Secret -metadata: - name: chat-ai-keys - namespace: communication -type: Opaque -stringData: - matrix: "3d9b1e5e80f146f2b3f6a9fbe01b7b77" diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 9cd7f38..b579d4d 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -16,7 +16,6 @@ resources: - element-call-deployment.yaml - pin-othrys-job.yaml - guest-name-job.yaml - - chat-ai-keys.yaml - atlasbot-credentials.yaml - atlasbot-configmap.yaml - atlasbot-deployment.yaml -- 2.47.2 From aca05266fc111033322ca003b5874987e474c34e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 13:49:49 -0300 Subject: [PATCH 120/684] comms: avoid Synapse PVC rollout deadlock --- services/communication/kustomization.yaml | 3 +++ .../synapse-deployment-strategy-patch.yaml | 11 +++++++++++ 2 files changed, 14 insertions(+) create mode 100644 services/communication/synapse-deployment-strategy-patch.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index b579d4d..dc631de 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -21,3 +21,6 @@ resources: - atlasbot-deployment.yaml - seed-othrys-room.yaml - wellknown.yaml + +patchesStrategicMerge: + - synapse-deployment-strategy-patch.yaml diff --git a/services/communication/synapse-deployment-strategy-patch.yaml b/services/communication/synapse-deployment-strategy-patch.yaml new file mode 100644 index 0000000..0a795c6 --- /dev/null +++ b/services/communication/synapse-deployment-strategy-patch.yaml @@ -0,0 +1,11 @@ +# services/communication/synapse-deployment-strategy-patch.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: othrys-synapse-matrix-synapse +spec: + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 -- 2.47.2 From d8d741bbd976c9b8f025ba186e28ac020a75244e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:15:54 -0300 Subject: [PATCH 121/684] communication: remove plaintext secrets --- .../communication/atlasbot-credentials.yaml | 9 - .../communication/atlasbot-deployment.yaml | 2 +- services/communication/guest-name-job.yaml | 2 +- services/communication/kustomization.yaml | 1 - services/communication/pin-othrys-job.yaml | 2 +- services/communication/seed-othrys-room.yaml | 41 ++-- services/communication/synapse-rendered.yaml | 2 - services/jitsi/deployment.yaml | 231 ------------------ services/jitsi/ingress.yaml | 41 ---- services/jitsi/jvb-configmap.yaml | 22 -- services/jitsi/kustomization.yaml | 17 -- services/jitsi/launcher-configmap.yaml | 123 ---------- services/jitsi/launcher-deployment.yaml | 53 ---- services/jitsi/launcher-ingress.yaml | 24 -- services/jitsi/launcher-service.yaml | 12 - services/jitsi/namespace.yaml | 5 - services/jitsi/pvc.yaml | 42 ---- services/jitsi/secret.yaml | 11 - services/jitsi/secretproviderclass.yaml | 21 -- services/jitsi/service.yaml | 36 --- services/jitsi/serviceaccount.yaml | 6 - 21 files changed, 22 insertions(+), 681 deletions(-) delete mode 100644 services/communication/atlasbot-credentials.yaml delete mode 100644 services/jitsi/deployment.yaml delete mode 100644 services/jitsi/ingress.yaml delete mode 100644 services/jitsi/jvb-configmap.yaml delete mode 100644 services/jitsi/kustomization.yaml delete mode 100644 services/jitsi/launcher-configmap.yaml delete mode 100644 services/jitsi/launcher-deployment.yaml delete mode 100644 services/jitsi/launcher-ingress.yaml delete mode 100644 services/jitsi/launcher-service.yaml delete mode 100644 services/jitsi/namespace.yaml delete mode 100644 services/jitsi/pvc.yaml delete mode 100644 services/jitsi/secret.yaml delete mode 100644 services/jitsi/secretproviderclass.yaml delete mode 100644 services/jitsi/service.yaml delete mode 100644 services/jitsi/serviceaccount.yaml diff --git a/services/communication/atlasbot-credentials.yaml b/services/communication/atlasbot-credentials.yaml deleted file mode 100644 index 6676d4e..0000000 --- a/services/communication/atlasbot-credentials.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# services/communication/atlasbot-credentials.yaml -apiVersion: v1 -kind: Secret -metadata: - name: atlasbot-credentials -type: Opaque -stringData: - bot-password: "x8eU9xwsjJ2S7Xv1G4mQ" - seeder-password: "Qv5sjyH8nD6pPz7Lk3R0" diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 55f1d2d..90d245a 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -33,7 +33,7 @@ spec: - name: BOT_PASS valueFrom: secretKeyRef: - name: atlasbot-credentials + name: atlasbot-credentials-runtime key: bot-password - name: CHAT_API_KEY valueFrom: diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index fcd44e7..9ff2318 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -22,7 +22,7 @@ spec: - name: SEEDER_PASS valueFrom: secretKeyRef: - name: atlasbot-credentials + name: atlasbot-credentials-runtime key: seeder-password command: - /bin/sh diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index dc631de..5f0f361 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -16,7 +16,6 @@ resources: - element-call-deployment.yaml - pin-othrys-job.yaml - guest-name-job.yaml - - atlasbot-credentials.yaml - atlasbot-configmap.yaml - atlasbot-deployment.yaml - seed-othrys-room.yaml diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml index a3178e2..07bd9b6 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/communication/pin-othrys-job.yaml @@ -20,7 +20,7 @@ spec: - name: SEEDER_PASS valueFrom: secretKeyRef: - name: atlasbot-credentials + name: atlasbot-credentials-runtime key: seeder-password command: - /bin/sh diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml index a3d4a1d..ccc2f24 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/communication/seed-othrys-room.yaml @@ -18,45 +18,30 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: REG_SECRET - valueFrom: - secretKeyRef: - name: othrys-synapse-matrix-synapse - key: config.yaml - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS valueFrom: secretKeyRef: - name: atlasbot-credentials + name: atlasbot-credentials-runtime key: seeder-password - name: BOT_USER value: atlasbot - name: BOT_PASS valueFrom: secretKeyRef: - name: atlasbot-credentials + name: atlasbot-credentials-runtime key: bot-password command: - /bin/sh - -c - | set -euo pipefail - pip install --no-cache-dir requests pyyaml matrix-synapse >/dev/null + pip install --no-cache-dir requests pyyaml >/dev/null python - <<'PY' - import os, subprocess, requests, yaml + import os, requests, urllib.parse BASE = os.environ["SYNAPSE_BASE"] - CONFIG = "/config/config.yaml" - - def register(user, password, admin=False): - args = ["register_new_matrix_user", "-c", CONFIG, "-u", user, "-p", password] - if admin: - args.append("-a") - args.append(BASE) - res = subprocess.run(args, capture_output=True, text=True) - if res.returncode not in (0, 1): # 1 = already exists - raise SystemExit(f"register {user} failed: {res.returncode} {res.stderr}") def login(user, password): r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ @@ -68,6 +53,18 @@ spec: raise SystemExit(f"login failed: {r.status_code} {r.text}") return r.json()["access_token"] + def ensure_user(token, localpart, password, admin): + headers = {"Authorization": f"Bearer {token}"} + user_id = f"@{localpart}:live.bstein.dev" + url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}" + res = requests.get(url, headers=headers) + if res.status_code == 200: + return + payload = {"password": password, "admin": admin, "deactivated": False} + create = requests.put(url, headers=headers, json=payload) + if create.status_code not in (200, 201): + raise SystemExit(f"create user {user_id} failed: {create.status_code} {create.text}") + def ensure_room(token): headers = {"Authorization": f"Bearer {token}"} alias = "#othrys:live.bstein.dev" @@ -100,7 +97,7 @@ spec: def join_user(token, room_id, user_id): headers = {"Authorization": f"Bearer {token}"} - requests.post(f"{BASE}/_synapse/admin/v1/join/{room_id}", headers=headers, json={"user_id": user_id}) + requests.post(f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", headers=headers, json={"user_id": user_id}) def join_all_locals(token, room_id): headers = {"Authorization": f"Bearer {token}"} @@ -118,9 +115,9 @@ spec: for uid in users: join_user(token, room_id, uid) - register(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True) - register(os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False) token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True) + ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False) room_id = ensure_room(token) join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev") join_all_locals(token, room_id) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index db24fd4..3d77aec 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -27,8 +27,6 @@ stringData: config.yaml: | ## Registration ## - registration_shared_secret: "PlxXRFAiRfLDp8RbAS6aHN7b" - ## API Configuration ## ## Database configuration ## diff --git a/services/jitsi/deployment.yaml b/services/jitsi/deployment.yaml deleted file mode 100644 index 43d552c..0000000 --- a/services/jitsi/deployment.yaml +++ /dev/null @@ -1,231 +0,0 @@ -# services/jitsi/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: jitsi-prosody - namespace: jitsi -spec: - replicas: 1 - selector: - matchLabels: { app: jitsi-prosody } - template: - metadata: - labels: { app: jitsi-prosody } - spec: - serviceAccountName: jitsi - nodeSelector: - kubernetes.io/hostname: titan-22 - kubernetes.io/arch: amd64 - containers: - - name: prosody - image: jitsi/prosody:stable - ports: - - { name: c2s, containerPort: 5222, protocol: TCP } - - { name: http, containerPort: 5280, protocol: TCP } - - { name: comp, containerPort: 5347, protocol: TCP } - env: - - { name: XMPP_DOMAIN, value: "meet.jitsi" } - - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - - { name: ENABLE_AUTH, value: "1" } - - { name: ENABLE_GUESTS, value: "1" } - - { name: AUTH_TYPE, value: "jwt" } - - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - - { name: JWT_ACCEPTED_ISSUERS, value: "https://sso.bstein.dev/realms/atlas" } - - { name: JWT_ACCEPTED_AUDIENCES, value: "jitsi" } - - { name: JWT_APP_ID, value: "jitsi" } - - name: JWT_APP_SECRET - valueFrom: { secretKeyRef: { name: jitsi-jwt, key: app_secret } } - - { name: JICOFO_AUTH_USER, value: "focus" } - - { name: JVB_AUTH_USER, value: "jvb" } - - name: JICOFO_AUTH_PASSWORD - valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JICOFO_AUTH_PASSWORD } } - - name: JICOFO_COMPONENT_SECRET - valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JICOFO_COMPONENT_SECRET } } - - name: JVB_AUTH_PASSWORD - valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JVB_AUTH_PASSWORD } } - volumeMounts: - - { name: cfg, mountPath: /config } - - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } - volumes: - - name: cfg - persistentVolumeClaim: { claimName: jitsi-prosody-config } - - name: jwt - csi: - driver: secrets-store.csi.k8s.io - readOnly: true - volumeAttributes: - secretProviderClass: jitsi-jwt - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: jitsi-jicofo - namespace: jitsi -spec: - replicas: 1 - selector: - matchLabels: { app: jitsi-jicofo } - template: - metadata: - labels: { app: jitsi-jicofo } - spec: - serviceAccountName: jitsi - nodeSelector: - kubernetes.io/hostname: titan-22 - kubernetes.io/arch: amd64 - containers: - - name: jicofo - image: jitsi/jicofo:stable - env: - - { name: XMPP_DOMAIN, value: "meet.jitsi" } - - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - - { name: ENABLE_AUTH, value: "1" } - - { name: ENABLE_GUESTS, value: "1" } - - { name: AUTH_TYPE, value: "jwt" } - - { name: XMPP_SERVER, value: "jitsi-prosody.jitsi.svc.cluster.local" } - - { name: JICOFO_AUTH_USER, value: "focus" } - - name: JICOFO_AUTH_PASSWORD - valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JICOFO_AUTH_PASSWORD } } - - name: JICOFO_COMPONENT_SECRET - valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JICOFO_COMPONENT_SECRET } } - - { name: JVB_BREWERY_MUC, value: "jvbbrewery" } - volumeMounts: - - { name: cfg, mountPath: /config } - volumes: - - name: cfg - persistentVolumeClaim: { claimName: jitsi-jicofo-config } - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: jitsi-jvb - namespace: jitsi -spec: - replicas: 1 - strategy: - type: Recreate - selector: - matchLabels: { app: jitsi-jvb } - template: - metadata: - labels: { app: jitsi-jvb } - spec: - serviceAccountName: jitsi - initContainers: - - name: jvb-custom-config - image: busybox:1.36 - command: - - /bin/sh - - -c - - | - set -euo pipefail - cp /custom-config/custom-jvb.conf /config/custom-jvb.conf - cp /custom-config/sip-communicator.properties /config/sip-communicator.properties - volumeMounts: - - { name: cfg, mountPath: /config } - - { name: jvb-custom, mountPath: /custom-config } - nodeSelector: - kubernetes.io/hostname: titan-22 - kubernetes.io/arch: amd64 - containers: - - name: jvb - image: jitsi/jvb:stable - ports: - - { name: colibri-ws, containerPort: 9090, protocol: TCP } # WebSocket control channel - - { name: rtp-udp, containerPort: 10000, hostPort: 10000, protocol: UDP } # media - - { name: rtp-tcp, containerPort: 4443, hostPort: 4443, protocol: TCP } - env: - - { name: XMPP_DOMAIN, value: "meet.jitsi" } - - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - - { name: XMPP_SERVER, value: "jitsi-prosody.jitsi.svc.cluster.local" } - - { name: JVB_AUTH_USER, value: "jvb" } - - name: JVB_AUTH_PASSWORD - valueFrom: { secretKeyRef: { name: jitsi-internal-secrets, key: JVB_AUTH_PASSWORD } } - - { name: JVB_BREWERY_MUC, value: "jvbbrewery" } - - { name: JVB_PORT, value: "10000" } # matches hostPort above - - { name: ENABLE_COLIBRI_WEBSOCKET, value: "1" } # enables /colibri-ws - # - { name: JVB_STUN_SERVERS, value: "stun.l.google.com:19302,stun1.l.google.com:19302,meet-jit-si-turnrelay.jitsi.net:443" } - - { name: JVB_ENABLE_APIS, value: "rest,colibri" } - - { name: JVB_WS_DOMAIN, value: "meet.bstein.dev" } - - { name: JVB_WS_TLS, value: "true" } # advertise wss:// for bridge channel - - { name: JVB_ADVERTISE_IPS, value: "38.28.125.112,192.168.22.22" } - - { name: JVB_TCP_HARVESTER_DISABLED, value: "false" } - - { name: JVB_TCP_PORT, value: "4443" } - - { name: AUTH_TYPE, value: "jwt" } - - name: JVB_OPTS - value: "-Dorg.jitsi.videobridge.DISABLE_TCP_HARVESTER=false -Dorg.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false -Dorg.jitsi.videobridge.TCP_HARVESTER_PORT=4443 -Dorg.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443" - volumeMounts: - - { name: cfg, mountPath: /config } - volumes: - - name: cfg - persistentVolumeClaim: { claimName: jitsi-jvb-config } - - name: jvb-custom - configMap: - name: jitsi-jvb-custom-config - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: jitsi-web - namespace: jitsi -spec: - replicas: 1 - selector: - matchLabels: { app: jitsi-web } - template: - metadata: - labels: { app: jitsi-web } - spec: - serviceAccountName: jitsi - nodeSelector: - kubernetes.io/hostname: titan-22 - kubernetes.io/arch: amd64 - containers: - - name: web - image: jitsi/web:stable - ports: - - { name: http, containerPort: 80, protocol: TCP } - env: - - { name: PUBLIC_URL, value: "https://meet.bstein.dev" } - - { name: XMPP_DOMAIN, value: "meet.jitsi" } - - { name: XMPP_AUTH_DOMAIN, value: "auth.meet.jitsi" } - - { name: XMPP_MUC_DOMAIN, value: "muc.meet.jitsi" } - - { name: XMPP_INTERNAL_MUC_DOMAIN, value: "internal-muc.meet.jitsi" } - - { name: XMPP_GUEST_DOMAIN, value: "guest.meet.jitsi" } - - { name: ENABLE_AUTH, value: "1" } - - { name: ENABLE_GUESTS, value: "1" } - - { name: AUTH_TYPE, value: "jwt" } - - { name: JWT_APP_ID, value: "jitsi" } - - { name: JWT_ACCEPTED_ISSUERS, value: "https://sso.bstein.dev/realms/atlas" } - - { name: JWT_ACCEPTED_AUDIENCES, value: "jitsi" } - - name: JWT_APP_SECRET - valueFrom: { secretKeyRef: { name: jitsi-jwt, key: app_secret } } - - { name: XMPP_BOSH_URL_BASE, value: "https://meet.bstein.dev" } - - { name: ENABLE_XMPP_WEBSOCKET, value: "1" } - - { name: ENABLE_COLIBRI_WEBSOCKET, value: "1" } - volumeMounts: - - { name: cfg, mountPath: /config } - - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } - volumes: - - name: cfg - persistentVolumeClaim: { claimName: jitsi-web-config } - - name: jwt - csi: - driver: secrets-store.csi.k8s.io - readOnly: true - volumeAttributes: - secretProviderClass: jitsi-jwt diff --git a/services/jitsi/ingress.yaml b/services/jitsi/ingress.yaml deleted file mode 100644 index 3336c37..0000000 --- a/services/jitsi/ingress.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# services/jitsi/ingress.yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: jitsi - namespace: jitsi - annotations: - cert-manager.io/cluster-issuer: letsencrypt -spec: - ingressClassName: traefik - tls: - - hosts: [ "meet.bstein.dev" ] - secretName: jitsi-meet-tls - rules: - - host: meet.bstein.dev - http: - paths: - - path: /colibri-ws - pathType: Prefix - backend: - service: - name: jitsi-jvb - port: { number: 9090 } - - path: /xmpp-websocket - pathType: Prefix - backend: - service: - name: jitsi-prosody - port: { number: 5280 } - - path: /http-bind - pathType: Prefix - backend: - service: - name: jitsi-prosody - port: { number: 5280 } - - path: / - pathType: Prefix - backend: - service: - name: jitsi-web - port: { number: 80 } diff --git a/services/jitsi/jvb-configmap.yaml b/services/jitsi/jvb-configmap.yaml deleted file mode 100644 index 59cb165..0000000 --- a/services/jitsi/jvb-configmap.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# services/jitsi/jvb-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: jitsi-jvb-custom-config - namespace: jitsi -data: - custom-jvb.conf: | - videobridge { - ice { - tcp { - enabled = true - port = 4443 - mapped-port = 4443 - } - } - } - sip-communicator.properties: | - org.jitsi.videobridge.DISABLE_TCP_HARVESTER=false - org.ice4j.ice.harvest.DISABLE_TCP_HARVESTER=false - org.jitsi.videobridge.TCP_HARVESTER_PORT=4443 - org.jitsi.videobridge.TCP_HARVESTER_MAPPED_PORT=4443 diff --git a/services/jitsi/kustomization.yaml b/services/jitsi/kustomization.yaml deleted file mode 100644 index 805a967..0000000 --- a/services/jitsi/kustomization.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# services/jitsi/kustomization.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - namespace.yaml - - serviceaccount.yaml - - secretproviderclass.yaml - - deployment.yaml - - launcher-configmap.yaml - - launcher-deployment.yaml - - launcher-service.yaml - - launcher-ingress.yaml - - service.yaml - - pvc.yaml - - ingress.yaml - - secret.yaml - - jvb-configmap.yaml diff --git a/services/jitsi/launcher-configmap.yaml b/services/jitsi/launcher-configmap.yaml deleted file mode 100644 index 5877219..0000000 --- a/services/jitsi/launcher-configmap.yaml +++ /dev/null @@ -1,123 +0,0 @@ -# services/jitsi/launcher-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: jitsi-launcher - namespace: jitsi -data: - app.py: | - import base64 - import hashlib - import hmac - import json - import os - import time - from fastapi import FastAPI, HTTPException, Request - from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse - - ISSUER = os.getenv("JWT_ISSUER", "https://sso.bstein.dev/realms/atlas") - AUDIENCE = os.getenv("JWT_AUDIENCE", "jitsi") - APP_ID = os.getenv("JWT_APP_ID", "jitsi") - PUBLIC_URL = os.getenv("PUBLIC_URL", "https://meet.bstein.dev") - SECRET_FILE = os.getenv("JWT_SECRET_FILE", "/var/lib/jitsi-jwt/jwt") - ALLOWED_GROUPS = {g for g in os.getenv("ALLOWED_GROUPS", "").split(",") if g} - TOKEN_TTL = int(os.getenv("JWT_TTL_SECONDS", "600")) - - app = FastAPI() - - - def _b64url(data: bytes) -> bytes: - return base64.urlsafe_b64encode(data).rstrip(b"=") - - - def _read_secret() -> bytes: - raw = open(SECRET_FILE, "rb").read().strip() - try: - return bytes.fromhex(raw.decode()) - except ValueError: - return raw - - - def _sign(room: str, user: str, groups: list[str]) -> str: - now = int(time.time()) - header = {"alg": "HS256", "typ": "JWT"} - payload = { - "iss": ISSUER, - "aud": AUDIENCE, - "sub": "meet.jitsi", - "room": room, - "exp": now + TOKEN_TTL, - "nbf": now - 10, - "context": { - "user": { - "name": user, - "email": user, - "affiliation": "owner", - "groups": groups, - } - }, - "app_id": APP_ID, - } - secret = _read_secret() - signing_input = b".".join( - [ - _b64url(json.dumps(header, separators=(",", ":")).encode()), - _b64url(json.dumps(payload, separators=(",", ":")).encode()), - ] - ) - sig = _b64url(hmac.new(secret, signing_input, hashlib.sha256).digest()) - return b".".join([signing_input, sig]).decode() - - - def _render_form(message: str = "") -> HTMLResponse: - body = f""" - - -

Start a Jitsi room

- {'

'+message+'

' if message else ''} -
- - - -
- - - """ - return HTMLResponse(body) - - - def _extract_groups(request: Request) -> set[str]: - raw = request.headers.get("x-auth-request-groups", "") - # Traefik forwardAuth returns comma-separated groups - return {g.strip() for g in raw.split(",") if g.strip()} - - - @app.get("/launch") - async def launch(request: Request, room: str | None = None): - user = request.headers.get("x-auth-request-email") or request.headers.get( - "x-auth-request-user", "" - ) - groups = _extract_groups(request) - if ALLOWED_GROUPS and not (groups & ALLOWED_GROUPS): - raise HTTPException(status_code=403, detail="forbidden") - if not room: - return _render_form() - room = room.strip() - if not room or "/" in room or ".." in room: - raise HTTPException(status_code=400, detail="invalid room") - token = _sign(room, user or "moderator", sorted(groups)) - join_url = f"{PUBLIC_URL}/{room}#config.jwt={token}" - accept = request.headers.get("accept", "") - if "text/html" in accept: - return RedirectResponse(join_url, status_code=302) - return JSONResponse({"room": room, "join_url": join_url, "token": token}) - - - @app.get("/") - async def root(): - return RedirectResponse("/launch") - - - @app.get("/health") - async def health(): - return {"status": "ok"} diff --git a/services/jitsi/launcher-deployment.yaml b/services/jitsi/launcher-deployment.yaml deleted file mode 100644 index b957e24..0000000 --- a/services/jitsi/launcher-deployment.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# services/jitsi/launcher-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: jitsi-launcher - namespace: jitsi -spec: - replicas: 1 - selector: - matchLabels: { app: jitsi-launcher } - template: - metadata: - labels: { app: jitsi-launcher } - spec: - serviceAccountName: jitsi - nodeSelector: - kubernetes.io/hostname: titan-22 - kubernetes.io/arch: amd64 - containers: - - name: launcher - image: docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim - imagePullPolicy: IfNotPresent - env: - - { name: JWT_SECRET_FILE, value: "/var/lib/jitsi-jwt/jwt" } - - { name: JWT_ISSUER, value: "https://sso.bstein.dev/realms/atlas" } - - { name: JWT_AUDIENCE, value: "jitsi" } - - { name: JWT_APP_ID, value: "jitsi" } - - { name: PUBLIC_URL, value: "https://meet.bstein.dev" } - # Allow any authenticated user to mint; tighten later by setting comma list - - { name: ALLOWED_GROUPS, value: "" } - - { name: JWT_TTL_SECONDS, value: "600" } - ports: - - { name: http, containerPort: 80 } - volumeMounts: - - { name: app, mountPath: /app/main.py, subPath: app.py } - - { name: jwt, mountPath: /var/lib/jitsi-jwt, readOnly: true } - readinessProbe: - httpGet: - path: /health - port: 80 - initialDelaySeconds: 5 - periodSeconds: 10 - volumes: - - name: app - configMap: - name: jitsi-launcher - defaultMode: 0444 - - name: jwt - csi: - driver: secrets-store.csi.k8s.io - readOnly: true - volumeAttributes: - secretProviderClass: jitsi-jwt diff --git a/services/jitsi/launcher-ingress.yaml b/services/jitsi/launcher-ingress.yaml deleted file mode 100644 index b0b58e2..0000000 --- a/services/jitsi/launcher-ingress.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# services/jitsi/launcher-ingress.yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: jitsi-launcher - namespace: jitsi - annotations: - cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd,sso-oauth2-proxy-errors@kubernetescrd -spec: - ingressClassName: traefik - tls: - - hosts: [ "meet.bstein.dev" ] - secretName: jitsi-meet-tls - rules: - - host: meet.bstein.dev - http: - paths: - - path: /launch - pathType: Prefix - backend: - service: - name: jitsi-launcher - port: { number: 80 } diff --git a/services/jitsi/launcher-service.yaml b/services/jitsi/launcher-service.yaml deleted file mode 100644 index 3ed7f5a..0000000 --- a/services/jitsi/launcher-service.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# services/jitsi/launcher-service.yaml -apiVersion: v1 -kind: Service -metadata: - name: jitsi-launcher - namespace: jitsi -spec: - selector: { app: jitsi-launcher } - ports: - - name: http - port: 80 - targetPort: 80 diff --git a/services/jitsi/namespace.yaml b/services/jitsi/namespace.yaml deleted file mode 100644 index 6ba93f2..0000000 --- a/services/jitsi/namespace.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# services/jitsi/namespace.yaml -apiVersion: v1 -kind: Namespace -metadata: - name: jitsi diff --git a/services/jitsi/pvc.yaml b/services/jitsi/pvc.yaml deleted file mode 100644 index 3a2c14e..0000000 --- a/services/jitsi/pvc.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# services/jitsi/pvc.yaml -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: jitsi-web-config - namespace: jitsi -spec: - accessModes: ["ReadWriteOnce"] - resources: { requests: { storage: 10Gi } } - ---- - -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: jitsi-prosody-config - namespace: jitsi -spec: - accessModes: ["ReadWriteOnce"] - resources: { requests: { storage: 10Gi } } - ---- - -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: jitsi-jicofo-config - namespace: jitsi -spec: - accessModes: ["ReadWriteOnce"] - resources: { requests: { storage: 10Gi } } - ---- - -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: jitsi-jvb-config - namespace: jitsi -spec: - accessModes: ["ReadWriteOnce"] - resources: { requests: { storage: 10Gi } } diff --git a/services/jitsi/secret.yaml b/services/jitsi/secret.yaml deleted file mode 100644 index f851bac..0000000 --- a/services/jitsi/secret.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# services/jitsi/secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: jitsi-internal-secrets - namespace: jitsi -type: Opaque -data: - JICOFO_COMPONENT_SECRET: bEg5Y09hZFJBem5PUFliQlp4RHkwRTRP - JICOFO_AUTH_PASSWORD: VVkyUmczaVRDWUZ0MzdQdmN3UDN1SFc5 - JVB_AUTH_PASSWORD: d0M5aWJ4dWlPTnhFak9lRHJqSHdYa0g5 \ No newline at end of file diff --git a/services/jitsi/secretproviderclass.yaml b/services/jitsi/secretproviderclass.yaml deleted file mode 100644 index 365af60..0000000 --- a/services/jitsi/secretproviderclass.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# services/jitsi/secretproviderclass.yaml -apiVersion: secrets-store.csi.x-k8s.io/v1 -kind: SecretProviderClass -metadata: - name: jitsi-jwt - namespace: jitsi -spec: - provider: vault - parameters: - vaultAddress: "http://vault.vault.svc.cluster.local:8200" - roleName: jitsi-jwt - objects: | - - objectName: "jwt" - secretPath: "kv/data/jitsi/jwt-hs256" - secretKey: "app_secret" - secretObjects: - - secretName: jitsi-jwt - type: Opaque - data: - - objectName: "jwt" - key: app_secret diff --git a/services/jitsi/service.yaml b/services/jitsi/service.yaml deleted file mode 100644 index 7b44b5c..0000000 --- a/services/jitsi/service.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# services/jitsi/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: jitsi-prosody - namespace: jitsi -spec: - selector: { app: jitsi-prosody } - ports: - - { name: c2s, port: 5222, targetPort: 5222, protocol: TCP } - - { name: http, port: 5280, targetPort: 5280, protocol: TCP } - - { name: comp, port: 5347, targetPort: 5347, protocol: TCP } - ---- - -apiVersion: v1 -kind: Service -metadata: - name: jitsi-jvb - namespace: jitsi -spec: - selector: { app: jitsi-jvb } - ports: - - { name: colibri-ws, port: 9090, targetPort: 9090, protocol: TCP } - ---- - -apiVersion: v1 -kind: Service -metadata: - name: jitsi-web - namespace: jitsi -spec: - selector: { app: jitsi-web } - ports: - - { name: http, port: 80, targetPort: 80, protocol: TCP } diff --git a/services/jitsi/serviceaccount.yaml b/services/jitsi/serviceaccount.yaml deleted file mode 100644 index ce1a1c9..0000000 --- a/services/jitsi/serviceaccount.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# services/jitsi/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: jitsi - namespace: jitsi -- 2.47.2 From 6203faae3fe23114fa4a3c26b7f017dc3cc6b9da Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:23:17 -0300 Subject: [PATCH 122/684] communication: make pin job mutable --- services/communication/pin-othrys-job.yaml | 153 +++++++++++++-------- 1 file changed, 97 insertions(+), 56 deletions(-) diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml index 07bd9b6..8d4d843 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/communication/pin-othrys-job.yaml @@ -1,68 +1,109 @@ # services/communication/pin-othrys-job.yaml apiVersion: batch/v1 -kind: Job +kind: CronJob metadata: name: pin-othrys-invite namespace: communication spec: - ttlSecondsAfterFinished: 3600 - template: + schedule: "*/30 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: spec: - restartPolicy: OnFailure - containers: - - name: pin - image: python:3.11-slim - env: - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: SEEDER_USER - value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - command: - - /bin/sh - - -c - - | - set -euo pipefail - pip install --no-cache-dir requests >/dev/null - python - <<'PY' - import requests, urllib.parse, os + template: + spec: + restartPolicy: OnFailure + containers: + - name: pin + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import os, requests, urllib.parse - BASE = os.environ["SYNAPSE_BASE"] - def login(user, password): - r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }) - r.raise_for_status() - return r.json()["access_token"] + BASE = os.environ["SYNAPSE_BASE"] + ROOM_ALIAS = "#othrys:live.bstein.dev" + MESSAGE = ( + "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join " + "and choose 'Continue' -> 'Join as guest'." + ) - def resolve(alias, token): - enc = urllib.parse.quote(alias) - r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers={"Authorization": f"Bearer {token}"}) - r.raise_for_status() - return r.json()["room_id"] + def auth(token): return {"Authorization": f"Bearer {token}"} - def send(room_id, token, body): - r = requests.post(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", - headers={"Authorization": f"Bearer {token}"}, - json={"msgtype": "m.text", "body": body}) - r.raise_for_status() - return r.json()["event_id"] + def login(user, password): + r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }) + r.raise_for_status() + return r.json()["access_token"] - def pin(room_id, token, event_id): - r = requests.put(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events", - headers={"Authorization": f"Bearer {token}"}, - json={"pinned": [event_id]}) - r.raise_for_status() + def resolve(alias, token): + enc = urllib.parse.quote(alias) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) + r.raise_for_status() + return r.json()["room_id"] - token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) - room_id = resolve("#othrys:live.bstein.dev", token) - msg = "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." - eid = send(room_id, token, msg) - pin(room_id, token, eid) - PY + def get_pinned(room_id, token): + r = requests.get( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events", + headers=auth(token), + ) + if r.status_code == 404: + return [] + r.raise_for_status() + return r.json().get("pinned", []) + + def get_event(room_id, event_id, token): + r = requests.get( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}", + headers=auth(token), + ) + if r.status_code == 404: + return None + r.raise_for_status() + return r.json() + + def send(room_id, token, body): + r = requests.post( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", + headers=auth(token), + json={"msgtype": "m.text", "body": body}, + ) + r.raise_for_status() + return r.json()["event_id"] + + def pin(room_id, token, event_id): + r = requests.put( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events", + headers=auth(token), + json={"pinned": [event_id]}, + ) + r.raise_for_status() + + token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + room_id = resolve(ROOM_ALIAS, token) + for event_id in get_pinned(room_id, token): + ev = get_event(room_id, event_id, token) + if ev and ev.get("content", {}).get("body") == MESSAGE: + raise SystemExit(0) + + eid = send(room_id, token, MESSAGE) + pin(room_id, token, eid) + PY -- 2.47.2 From 716059d9ac2e9dedc2ee4d5c03246e0d206b326c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:37:54 -0300 Subject: [PATCH 123/684] communication: add matrix-authentication-service --- services/communication/kustomization.yaml | 2 + services/communication/mas-configmap.yaml | 56 +++++++++++ services/communication/mas-deployment.yaml | 108 +++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 services/communication/mas-configmap.yaml create mode 100644 services/communication/mas-deployment.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 5f0f361..6b4f4a0 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -5,6 +5,8 @@ namespace: communication resources: - namespace.yaml - synapse-rendered.yaml + - mas-configmap.yaml + - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml - livekit.yaml diff --git a/services/communication/mas-configmap.yaml b/services/communication/mas-configmap.yaml new file mode 100644 index 0000000..cc859ba --- /dev/null +++ b/services/communication/mas-configmap.yaml @@ -0,0 +1,56 @@ +# services/communication/mas-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: matrix-authentication-service-config + namespace: communication +data: + config.yaml: | + http: + public_base: "https://matrix.live.bstein.dev/" + + database: + uri: "postgresql://mas:@@MAS_DB_PASSWORD@@@postgres-service.postgres.svc.cluster.local:5432/mas?sslmode=prefer" + + secrets: + encryption_file: /etc/mas/secrets/encryption + keys_dir: /etc/mas/keys + + passwords: + enabled: true + + matrix: + kind: synapse + homeserver: live.bstein.dev + endpoint: "http://othrys-synapse-matrix-synapse:8008/" + secret_file: /etc/mas/secrets/matrix_shared_secret + + upstream_oauth2: + providers: + - id: 01KDTTKYCYTAAAQKMAKZZ5CPW3 + synapse_idp_id: oidc-keycloak + issuer: "https://sso.bstein.dev/realms/atlas" + human_name: "Keycloak" + brand_name: "keycloak" + client_id: "othrys-mas" + client_secret_file: /etc/mas/secrets/keycloak_client_secret + token_endpoint_auth_method: client_secret_post + scope: "openid profile email" + claims_imports: + localpart: + action: require + template: "{{ user.preferred_username }}" + on_conflict: add + displayname: + action: force + template: "{{ user.name }}" + email: + action: force + template: "{{ user.email }}" + + policy: + data: + client_registration: + allow_insecure_uris: true + allow_host_mismatch: true + allow_missing_client_uri: true diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml new file mode 100644 index 0000000..711640c --- /dev/null +++ b/services/communication/mas-deployment.yaml @@ -0,0 +1,108 @@ +# services/communication/mas-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: matrix-authentication-service + namespace: communication + labels: + app: matrix-authentication-service +spec: + replicas: 1 + selector: + matchLabels: + app: matrix-authentication-service + template: + metadata: + labels: + app: matrix-authentication-service + spec: + enableServiceLinks: false + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + containers: + - name: mas + image: ghcr.io/element-hq/matrix-authentication-service:0.20.0 + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + umask 077 + + DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" + sed "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" /etc/mas/config.yaml > /var/run/mas-config.yaml + + exec mas-cli server --config /var/run/mas-config.yaml + env: + - name: MAS_DB_PASSWORD + valueFrom: + secretKeyRef: + name: mas-db + key: password + ports: + - name: http + containerPort: 8080 + protocol: TCP + volumeMounts: + - name: config + mountPath: /etc/mas/config.yaml + subPath: config.yaml + readOnly: true + - name: secrets + mountPath: /etc/mas/secrets + readOnly: true + - name: keys + mountPath: /etc/mas/keys + readOnly: true + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: "2" + memory: 1Gi + volumes: + - name: config + configMap: + name: matrix-authentication-service-config + items: + - key: config.yaml + path: config.yaml + - name: secrets + secret: + secretName: mas-secrets-runtime + items: + - key: encryption + path: encryption + - key: matrix_shared_secret + path: matrix_shared_secret + - key: keycloak_client_secret + path: keycloak_client_secret + - name: keys + secret: + secretName: mas-secrets-runtime + items: + - key: rsa_key + path: rsa_key +--- +apiVersion: v1 +kind: Service +metadata: + name: matrix-authentication-service + namespace: communication +spec: + selector: + app: matrix-authentication-service + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP -- 2.47.2 From d9c003ce5a0adf9bcf950483d059d856c6c73e78 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:41:15 -0300 Subject: [PATCH 124/684] communication: fix MAS container entrypoint --- services/communication/mas-deployment.yaml | 30 ++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 711640c..1091278 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -28,33 +28,41 @@ spec: - key: hardware operator: In values: ["rpi5","rpi4"] - containers: - - name: mas - image: ghcr.io/element-hq/matrix-authentication-service:0.20.0 + initContainers: + - name: render-config + image: alpine:3.20 command: ["/bin/sh","-c"] args: - | set -euo pipefail umask 077 - DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" - sed "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" /etc/mas/config.yaml > /var/run/mas-config.yaml - - exec mas-cli server --config /var/run/mas-config.yaml + sed "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" /etc/mas/config.yaml > /rendered/config.yaml env: - name: MAS_DB_PASSWORD valueFrom: secretKeyRef: name: mas-db key: password + volumeMounts: + - name: config + mountPath: /etc/mas/config.yaml + subPath: config.yaml + readOnly: true + - name: rendered + mountPath: /rendered + readOnly: false + containers: + - name: mas + image: ghcr.io/element-hq/matrix-authentication-service:0.20.0 + args: ["server","--config","/rendered/config.yaml"] ports: - name: http containerPort: 8080 protocol: TCP volumeMounts: - - name: config - mountPath: /etc/mas/config.yaml - subPath: config.yaml + - name: rendered + mountPath: /rendered readOnly: true - name: secrets mountPath: /etc/mas/secrets @@ -76,6 +84,8 @@ spec: items: - key: config.yaml path: config.yaml + - name: rendered + emptyDir: {} - name: secrets secret: secretName: mas-secrets-runtime -- 2.47.2 From 45f62bc3316eb037195bf6b6aee504ac2c728d4b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:44:17 -0300 Subject: [PATCH 125/684] communication: fix MAS config permissions --- services/communication/mas-deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 1091278..58df3c0 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -38,6 +38,7 @@ spec: umask 077 DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" sed "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" /etc/mas/config.yaml > /rendered/config.yaml + chmod 0644 /rendered/config.yaml env: - name: MAS_DB_PASSWORD valueFrom: -- 2.47.2 From 940e0cc613177733b4a18d40c52509465853b117 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:49:21 -0300 Subject: [PATCH 126/684] communication: wire MAS secrets via init render --- services/communication/mas-configmap.yaml | 8 +++++--- services/communication/mas-deployment.yaml | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/services/communication/mas-configmap.yaml b/services/communication/mas-configmap.yaml index cc859ba..25d4eb3 100644 --- a/services/communication/mas-configmap.yaml +++ b/services/communication/mas-configmap.yaml @@ -14,7 +14,9 @@ data: secrets: encryption_file: /etc/mas/secrets/encryption - keys_dir: /etc/mas/keys + keys: + - kid: "othrys-rsa-1" + key_file: /etc/mas/keys/rsa_key passwords: enabled: true @@ -23,7 +25,7 @@ data: kind: synapse homeserver: live.bstein.dev endpoint: "http://othrys-synapse-matrix-synapse:8008/" - secret_file: /etc/mas/secrets/matrix_shared_secret + secret: "@@MATRIX_SHARED_SECRET@@" upstream_oauth2: providers: @@ -33,7 +35,7 @@ data: human_name: "Keycloak" brand_name: "keycloak" client_id: "othrys-mas" - client_secret_file: /etc/mas/secrets/keycloak_client_secret + client_secret: "@@KEYCLOAK_CLIENT_SECRET@@" token_endpoint_auth_method: client_secret_post scope: "openid profile email" claims_imports: diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 58df3c0..8f5bf0e 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -37,7 +37,14 @@ spec: set -euo pipefail umask 077 DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" - sed "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" /etc/mas/config.yaml > /rendered/config.yaml + MATRIX_SECRET_ESCAPED="$(printf '%s' "${MATRIX_SHARED_SECRET}" | sed 's/[\\/&]/\\&/g')" + KC_SECRET_ESCAPED="$(printf '%s' "${KEYCLOAK_CLIENT_SECRET}" | sed 's/[\\/&]/\\&/g')" + + sed \ + -e "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" \ + -e "s/@@MATRIX_SHARED_SECRET@@/${MATRIX_SECRET_ESCAPED}/g" \ + -e "s/@@KEYCLOAK_CLIENT_SECRET@@/${KC_SECRET_ESCAPED}/g" \ + /etc/mas/config.yaml > /rendered/config.yaml chmod 0644 /rendered/config.yaml env: - name: MAS_DB_PASSWORD @@ -45,6 +52,16 @@ spec: secretKeyRef: name: mas-db key: password + - name: MATRIX_SHARED_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: matrix_shared_secret + - name: KEYCLOAK_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: keycloak_client_secret volumeMounts: - name: config mountPath: /etc/mas/config.yaml -- 2.47.2 From cb82a44e2ea7d3d2178840deaf93b19b790d7e2a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:53:35 -0300 Subject: [PATCH 127/684] communication: enable MAS delegated auth --- services/communication/kustomization.yaml | 1 + services/communication/mas-ingress.yaml | 68 ++++++++++++++++++++ services/communication/synapse-rendered.yaml | 14 ++++ services/communication/wellknown.yaml | 4 ++ 4 files changed, 87 insertions(+) create mode 100644 services/communication/mas-ingress.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 6b4f4a0..39d5890 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -7,6 +7,7 @@ resources: - synapse-rendered.yaml - mas-configmap.yaml - mas-deployment.yaml + - mas-ingress.yaml - element-rendered.yaml - livekit-config.yaml - livekit.yaml diff --git a/services/communication/mas-ingress.yaml b/services/communication/mas-ingress.yaml new file mode 100644 index 0000000..6e4ad54 --- /dev/null +++ b/services/communication/mas-ingress.yaml @@ -0,0 +1,68 @@ +# services/communication/mas-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: matrix-authentication-service + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - matrix.live.bstein.dev + secretName: matrix-live-tls + rules: + - host: matrix.live.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: matrix-authentication-service + port: + number: 8080 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: matrix-authentication-service-compat + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - matrix.live.bstein.dev + secretName: matrix-live-tls + rules: + - host: matrix.live.bstein.dev + http: + paths: + - path: /_matrix/client/v3/login + pathType: Exact + backend: + service: + name: matrix-authentication-service + port: + number: 8080 + - path: /_matrix/client/v3/logout + pathType: Exact + backend: + service: + name: matrix-authentication-service + port: + number: 8080 + - path: /_matrix/client/v3/refresh + pathType: Exact + backend: + service: + name: matrix-authentication-service + port: + number: 8080 diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 3d77aec..c824b5d 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -394,6 +394,11 @@ data: display_name_template: '{{ user.name }}' localpart_template: '{{ user.preferred_username }}' userinfo_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo + + matrix_authentication_service: + enabled: true + endpoint: http://matrix-authentication-service:8080/ + secret: "@@MAS_SHARED_SECRET@@" --- # Source: matrix-synapse/templates/pvc.yaml kind: PersistentVolumeClaim @@ -711,6 +716,7 @@ spec: export REDIS_PASSWORD=$(echo "${REDIS_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') && \ export OIDC_CLIENT_SECRET_ESCAPED=$(echo "${OIDC_CLIENT_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export TURN_SECRET_ESCAPED=$(echo "${TURN_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ + export MAS_SHARED_SECRET_ESCAPED=$(echo "${MAS_SHARED_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ cat /synapse/secrets/*.yaml | \ sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ -e "s/@@REDIS_PASSWORD@@/${REDIS_PASSWORD:-}/" \ @@ -722,6 +728,9 @@ spec: fi; \ if [ -n "${TURN_SECRET_ESCAPED}" ]; then \ sed -i "s/@@TURN_SECRET@@/${TURN_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ + fi; \ + if [ -n "${MAS_SHARED_SECRET_ESCAPED}" ]; then \ + sed -i "s/@@MAS_SHARED_SECRET@@/${MAS_SHARED_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ fi exec python -B -m synapse.app.homeserver \ -c /synapse/runtime-config/homeserver.yaml \ @@ -747,6 +756,11 @@ spec: secretKeyRef: name: turn-shared-secret key: TURN_STATIC_AUTH_SECRET + - name: MAS_SHARED_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: matrix_shared_secret image: "ghcr.io/element-hq/synapse:v1.144.0" imagePullPolicy: IfNotPresent securityContext: diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml index 655746a..8627e0e 100644 --- a/services/communication/wellknown.yaml +++ b/services/communication/wellknown.yaml @@ -10,6 +10,10 @@ data: "m.homeserver": { "base_url": "https://matrix.live.bstein.dev" }, + "org.matrix.msc2965.authentication": { + "issuer": "https://matrix.live.bstein.dev/", + "account": "https://matrix.live.bstein.dev/account/" + }, "org.matrix.msc4143.rtc_foci": [ { "type": "livekit", -- 2.47.2 From 683f495bd8d69a02df808206188d155de37ec43b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:57:33 -0300 Subject: [PATCH 128/684] communication: make MAS listen on IPv4 --- services/communication/mas-configmap.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/services/communication/mas-configmap.yaml b/services/communication/mas-configmap.yaml index 25d4eb3..d2553a3 100644 --- a/services/communication/mas-configmap.yaml +++ b/services/communication/mas-configmap.yaml @@ -8,6 +8,23 @@ data: config.yaml: | http: public_base: "https://matrix.live.bstein.dev/" + listeners: + - name: web + resources: + - name: discovery + - name: human + - name: oauth + - name: compat + - name: graphql + - name: assets + binds: + - address: "0.0.0.0:8080" + - name: internal + resources: + - name: health + binds: + - host: localhost + port: 8081 database: uri: "postgresql://mas:@@MAS_DB_PASSWORD@@@postgres-service.postgres.svc.cluster.local:5432/mas?sslmode=prefer" -- 2.47.2 From 20df5cfb6eb849c0eb650192c4340f770c476949 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 15:59:46 -0300 Subject: [PATCH 129/684] communication: restart MAS on config change --- services/communication/mas-deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 8f5bf0e..326ea71 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -13,6 +13,8 @@ spec: app: matrix-authentication-service template: metadata: + annotations: + checksum/config: v4-bind-1 labels: app: matrix-authentication-service spec: -- 2.47.2 From 07ae28e1b15ba805bb79ca60714b52ec7df71e26 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:05:32 -0300 Subject: [PATCH 130/684] communication: fix Synapse delegated auth --- services/communication/synapse-rendered.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index c824b5d..14d0024 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -321,6 +321,7 @@ data: ## Signing Keys ## signing_key_path: "/synapse/keys/signing.key" + macaroon_secret_key: "@@MACAROON_SECRET_KEY@@" # The trusted servers to download signing keys from. trusted_key_servers: @@ -342,7 +343,7 @@ data: msc4222_enabled: true max_event_delay_duration: 24h password_config: - enabled: true + enabled: false turn_uris: - "turn:turn.live.bstein.dev:3478?transport=udp" - "turn:turn.live.bstein.dev:3478?transport=tcp" @@ -371,6 +372,9 @@ data: well_known_client: "m.homeserver": "base_url": "https://matrix.live.bstein.dev" + "org.matrix.msc2965.authentication": + "issuer": "https://matrix.live.bstein.dev/" + "account": "https://matrix.live.bstein.dev/account/" "org.matrix.msc4143.rtc_foci": - type: "livekit" livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" @@ -717,6 +721,7 @@ spec: export OIDC_CLIENT_SECRET_ESCAPED=$(echo "${OIDC_CLIENT_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export TURN_SECRET_ESCAPED=$(echo "${TURN_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export MAS_SHARED_SECRET_ESCAPED=$(echo "${MAS_SHARED_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ + export MACAROON_SECRET_KEY_ESCAPED=$(echo "${MACAROON_SECRET_KEY:-}" | sed 's/[\\/&]/\\&/g') && \ cat /synapse/secrets/*.yaml | \ sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ -e "s/@@REDIS_PASSWORD@@/${REDIS_PASSWORD:-}/" \ @@ -731,6 +736,9 @@ spec: fi; \ if [ -n "${MAS_SHARED_SECRET_ESCAPED}" ]; then \ sed -i "s/@@MAS_SHARED_SECRET@@/${MAS_SHARED_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ + fi; \ + if [ -n "${MACAROON_SECRET_KEY_ESCAPED}" ]; then \ + sed -i "s/@@MACAROON_SECRET_KEY@@/${MACAROON_SECRET_KEY_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ fi exec python -B -m synapse.app.homeserver \ -c /synapse/runtime-config/homeserver.yaml \ @@ -761,6 +769,11 @@ spec: secretKeyRef: name: mas-secrets-runtime key: matrix_shared_secret + - name: MACAROON_SECRET_KEY + valueFrom: + secretKeyRef: + name: synapse-macaroon + key: macaroon_secret_key image: "ghcr.io/element-hq/synapse:v1.144.0" imagePullPolicy: IfNotPresent securityContext: -- 2.47.2 From 385df610bef9e8b4e4c0a6c17cfa56de4cd027e7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:11:33 -0300 Subject: [PATCH 131/684] communication: disable Synapse OIDC under MAS --- services/communication/synapse-rendered.yaml | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 14d0024..8365333 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -378,26 +378,6 @@ data: "org.matrix.msc4143.rtc_foci": - type: "livekit" livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" - oidc_enabled: true - oidc_providers: - - allow_existing_users: true - authorization_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth - client_auth_method: client_secret_post - client_id: synapse - client_secret: "@@OIDC_CLIENT_SECRET@@" - idp_id: keycloak - idp_name: Keycloak - issuer: https://sso.bstein.dev/realms/atlas - scopes: - - openid - - profile - - email - token_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token - user_mapping_provider: - config: - display_name_template: '{{ user.name }}' - localpart_template: '{{ user.preferred_username }}' - userinfo_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo matrix_authentication_service: enabled: true -- 2.47.2 From 01dcb769666e2835eec9d092f945b9af27eb8048 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:18:24 -0300 Subject: [PATCH 132/684] communication: fix Matrix well-known auth JSON --- services/communication/wellknown.yaml | 44 ++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml index 8627e0e..56e0c74 100644 --- a/services/communication/wellknown.yaml +++ b/services/communication/wellknown.yaml @@ -26,6 +26,30 @@ data: "m.server": "live.bstein.dev:443" } --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: matrix-wellknown-nginx + namespace: communication +data: + default.conf: | + server { + listen 80; + server_name _; + + root /usr/share/nginx/html; + + location = /.well-known/matrix/client { + default_type application/json; + try_files $uri =404; + } + + location = /.well-known/matrix/server { + default_type application/json; + try_files $uri =404; + } + } +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -50,20 +74,26 @@ spec: - containerPort: 80 volumeMounts: - name: wellknown - mountPath: /usr/share/nginx/html/.well-known/matrix/client - subPath: client.json - - name: wellknown - mountPath: /usr/share/nginx/html/.well-known/matrix/server - subPath: server.json + mountPath: /usr/share/nginx/html/.well-known/matrix + readOnly: true + - name: nginx-config + mountPath: /etc/nginx/conf.d/default.conf + subPath: default.conf volumes: - name: wellknown configMap: name: matrix-wellknown items: - key: client.json - path: client.json + path: client - key: server.json - path: server.json + path: server + - name: nginx-config + configMap: + name: matrix-wellknown-nginx + items: + - key: default.conf + path: default.conf --- apiVersion: v1 kind: Service -- 2.47.2 From 650d21087632e930c932795c41a0814d856ad147 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:27:09 -0300 Subject: [PATCH 133/684] communication: move LiveKit media to 7882/7881 --- services/communication/livekit-config.yaml | 4 ++-- services/communication/livekit.yaml | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index 83be194..364132a 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -7,8 +7,8 @@ data: livekit.yaml: | port: 7880 rtc: - udp_port: 443 - tcp_port: 0 + udp_port: 7882 + tcp_port: 7881 use_external_ip: true turn_servers: - host: turn.live.bstein.dev diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index 4ec5b2d..6df4acd 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -65,8 +65,11 @@ spec: - containerPort: 7880 name: http protocol: TCP - - containerPort: 443 - name: udp-media + - containerPort: 7881 + name: rtc-tcp + protocol: TCP + - containerPort: 7882 + name: rtc-udp protocol: UDP volumeMounts: - name: config @@ -107,7 +110,11 @@ spec: port: 7880 targetPort: 7880 protocol: TCP - - name: udp-media - port: 443 - targetPort: 443 + - name: rtc-tcp + port: 7881 + targetPort: 7881 + protocol: TCP + - name: rtc-udp + port: 7882 + targetPort: 7882 protocol: UDP -- 2.47.2 From 2fdcfbfbafc9a06f08c04850c06d3988a88dda34 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:38:09 -0300 Subject: [PATCH 134/684] communication: add Synapse msc3861 admin token --- services/communication/synapse-rendered.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 8365333..b061997 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -341,6 +341,10 @@ data: msc3266_enabled: true msc4143_enabled: true msc4222_enabled: true + msc3861: + enabled: true + issuer: "https://matrix.live.bstein.dev/" + admin_token: "@@MAS_SHARED_SECRET@@" max_event_delay_duration: 24h password_config: enabled: false @@ -677,7 +681,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-1 + checksum/config: manual-rtc-enable-2 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From f869d0ffb955c5248d13300968c158a2884ead5b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:44:44 -0300 Subject: [PATCH 135/684] communication: configure Synapse msc3861 client creds --- services/communication/synapse-rendered.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index b061997..6241761 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -344,6 +344,9 @@ data: msc3861: enabled: true issuer: "https://matrix.live.bstein.dev/" + client_id: synapse + client_secret: "@@MAS_SHARED_SECRET@@" + client_auth_method: client_secret_post admin_token: "@@MAS_SHARED_SECRET@@" max_event_delay_duration: 24h password_config: @@ -681,7 +684,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-2 + checksum/config: manual-rtc-enable-3 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From 214a228bf51306e81237cfd7939c3ab2914880aa Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 16:54:58 -0300 Subject: [PATCH 136/684] communication: drop msc3861 config for MAS --- services/communication/synapse-rendered.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 6241761..ec6d473 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -341,13 +341,6 @@ data: msc3266_enabled: true msc4143_enabled: true msc4222_enabled: true - msc3861: - enabled: true - issuer: "https://matrix.live.bstein.dev/" - client_id: synapse - client_secret: "@@MAS_SHARED_SECRET@@" - client_auth_method: client_secret_post - admin_token: "@@MAS_SHARED_SECRET@@" max_event_delay_duration: 24h password_config: enabled: false @@ -684,7 +677,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-3 + checksum/config: manual-rtc-enable-4 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From 8b37ba32136372e98c2ea57adb0225e63f0ba2e5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 17:04:11 -0300 Subject: [PATCH 137/684] communication: bump MAS to v1.8.0 --- services/communication/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 326ea71..b327f6c 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -74,7 +74,7 @@ spec: readOnly: false containers: - name: mas - image: ghcr.io/element-hq/matrix-authentication-service:0.20.0 + image: ghcr.io/element-hq/matrix-authentication-service:v1.8.0 args: ["server","--config","/rendered/config.yaml"] ports: - name: http -- 2.47.2 From a5112d5f88b097b3e1bc5639d580d0f61b5652ee Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 17:10:45 -0300 Subject: [PATCH 138/684] communication: fix MAS image tag --- services/communication/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index b327f6c..9643175 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -74,7 +74,7 @@ spec: readOnly: false containers: - name: mas - image: ghcr.io/element-hq/matrix-authentication-service:v1.8.0 + image: ghcr.io/element-hq/matrix-authentication-service:1.8.0 args: ["server","--config","/rendered/config.yaml"] ports: - name: http -- 2.47.2 From be2c2ba33e64f86dd9a42d29919135c119ccfedb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 17:21:40 -0300 Subject: [PATCH 139/684] communication: route Matrix SSO redirects to MAS --- services/communication/mas-ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/mas-ingress.yaml b/services/communication/mas-ingress.yaml index 6e4ad54..b6e4bda 100644 --- a/services/communication/mas-ingress.yaml +++ b/services/communication/mas-ingress.yaml @@ -46,7 +46,7 @@ spec: http: paths: - path: /_matrix/client/v3/login - pathType: Exact + pathType: Prefix backend: service: name: matrix-authentication-service -- 2.47.2 From bfd1c5dd499baa93277642e77172755e6302338e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 17:26:37 -0300 Subject: [PATCH 140/684] communication: switch atlasbot to MAS login --- services/communication/atlasbot-configmap.yaml | 7 ++++--- services/communication/atlasbot-deployment.yaml | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index 92f77ba..a15352d 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -9,6 +9,7 @@ data: from urllib import request, parse, error BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") + AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080") USER = os.environ["BOT_USER"] PASSWORD = os.environ["BOT_PASS"] ROOM_ALIAS = "#othrys:live.bstein.dev" @@ -16,8 +17,8 @@ data: MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") API_KEY = os.environ.get("CHAT_API_KEY", "") - def req(method: str, path: str, token: str | None = None, body=None, timeout=60): - url = BASE + path + def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): + url = (base or BASE) + path data = None headers = {} if body is not None: @@ -36,7 +37,7 @@ data: "identifier": {"type": "m.id.user", "user": USER}, "password": PASSWORD, } - res = req("POST", "/_matrix/client/v3/login", body=payload) + res = req("POST", "/_matrix/client/v3/login", body=payload, base=AUTH_BASE) return res["access_token"] def resolve_alias(token: str, alias: str) -> str: diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 90d245a..5834435 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -28,13 +28,15 @@ spec: env: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 - name: BOT_USER - value: atlasbot + value: atlas - name: BOT_PASS valueFrom: secretKeyRef: - name: atlasbot-credentials-runtime - key: bot-password + name: atlasbot-mas-credentials-runtime + key: password - name: CHAT_API_KEY valueFrom: secretKeyRef: -- 2.47.2 From 26d82b3f851ea04c33f2bffb6269cdc73b40b9e1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 17:28:44 -0300 Subject: [PATCH 141/684] communication: suspend flaky bootstrap cronjobs --- services/communication/guest-name-job.yaml | 1 + services/communication/pin-othrys-job.yaml | 1 + services/communication/seed-othrys-room.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index 9ff2318..a576e18 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -6,6 +6,7 @@ metadata: namespace: communication spec: schedule: "*/1 * * * *" + suspend: true jobTemplate: spec: template: diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml index 8d4d843..a86270c 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/communication/pin-othrys-job.yaml @@ -6,6 +6,7 @@ metadata: namespace: communication spec: schedule: "*/30 * * * *" + suspend: true concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 1 diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml index ccc2f24..6b115d3 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/communication/seed-othrys-room.yaml @@ -6,6 +6,7 @@ metadata: namespace: communication spec: schedule: "*/10 * * * *" + suspend: true concurrencyPolicy: Forbid jobTemplate: spec: -- 2.47.2 From 73f577a49a0b20716d1951e0d39d2e4b7521bf31 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 17:33:20 -0300 Subject: [PATCH 142/684] communication: make suspended cronjobs fail-fast --- services/communication/guest-name-job.yaml | 3 ++- services/communication/pin-othrys-job.yaml | 3 ++- services/communication/seed-othrys-room.yaml | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index a576e18..f6f1469 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -9,9 +9,10 @@ spec: suspend: true jobTemplate: spec: + backoffLimit: 0 template: spec: - restartPolicy: OnFailure + restartPolicy: Never containers: - name: rename image: python:3.11-slim diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml index a86270c..a03e43d 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/communication/pin-othrys-job.yaml @@ -12,9 +12,10 @@ spec: failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 0 template: spec: - restartPolicy: OnFailure + restartPolicy: Never containers: - name: pin image: python:3.11-slim diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml index 6b115d3..09dc1e8 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/communication/seed-othrys-room.yaml @@ -10,9 +10,10 @@ spec: concurrencyPolicy: Forbid jobTemplate: spec: + backoffLimit: 0 template: spec: - restartPolicy: OnFailure + restartPolicy: Never containers: - name: seed image: python:3.11-slim -- 2.47.2 From 9658e48a2d9e4defb940932955663f4376077a65 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:00:57 -0300 Subject: [PATCH 143/684] communication: add MAS syn2mas check job --- services/communication/kustomization.yaml | 1 + .../communication/mas-syn2mas-check-job.yaml | 118 ++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 services/communication/mas-syn2mas-check-job.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 39d5890..266f19c 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -8,6 +8,7 @@ resources: - mas-configmap.yaml - mas-deployment.yaml - mas-ingress.yaml + - mas-syn2mas-check-job.yaml - element-rendered.yaml - livekit-config.yaml - livekit.yaml diff --git a/services/communication/mas-syn2mas-check-job.yaml b/services/communication/mas-syn2mas-check-job.yaml new file mode 100644 index 0000000..829a979 --- /dev/null +++ b/services/communication/mas-syn2mas-check-job.yaml @@ -0,0 +1,118 @@ +# services/communication/mas-syn2mas-check-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: mas-syn2mas-check + namespace: communication +spec: + backoffLimit: 0 + template: + metadata: + labels: + app: mas-syn2mas-check + spec: + enableServiceLinks: false + restartPolicy: Never + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + initContainers: + - name: render-mas-config + image: alpine:3.20 + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + umask 077 + DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" + MATRIX_SECRET_ESCAPED="$(printf '%s' "${MATRIX_SHARED_SECRET}" | sed 's/[\\/&]/\\&/g')" + KC_SECRET_ESCAPED="$(printf '%s' "${KEYCLOAK_CLIENT_SECRET}" | sed 's/[\\/&]/\\&/g')" + + sed \ + -e "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" \ + -e "s/@@MATRIX_SHARED_SECRET@@/${MATRIX_SECRET_ESCAPED}/g" \ + -e "s/@@KEYCLOAK_CLIENT_SECRET@@/${KC_SECRET_ESCAPED}/g" \ + /etc/mas/config.yaml > /rendered/config.yaml + chmod 0644 /rendered/config.yaml + env: + - name: MAS_DB_PASSWORD + valueFrom: + secretKeyRef: + name: mas-db + key: password + - name: MATRIX_SHARED_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: matrix_shared_secret + - name: KEYCLOAK_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: keycloak_client_secret + volumeMounts: + - name: mas-config + mountPath: /etc/mas/config.yaml + subPath: config.yaml + readOnly: true + - name: rendered + mountPath: /rendered + readOnly: false + containers: + - name: syn2mas-check + image: ghcr.io/element-hq/matrix-authentication-service:1.8.0 + args: + - syn2mas + - check + - --config + - /rendered/config.yaml + - --synapse-config + - /synapse-config/homeserver.yaml + - --synapse-database-uri + - postgresql: + env: + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: synapse + - name: PGUSER + value: synapse + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD + - name: PGSSLMODE + value: prefer + volumeMounts: + - name: rendered + mountPath: /rendered + readOnly: true + - name: synapse-config + mountPath: /synapse-config + readOnly: true + volumes: + - name: mas-config + configMap: + name: matrix-authentication-service-config + items: + - key: config.yaml + path: config.yaml + - name: rendered + emptyDir: {} + - name: synapse-config + configMap: + name: othrys-synapse-matrix-synapse + items: + - key: homeserver.yaml + path: homeserver.yaml -- 2.47.2 From 805a7215bce41290b41db37b64c01867762b57a2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:06:32 -0300 Subject: [PATCH 144/684] communication: fix syn2mas check db URI arg --- services/communication/mas-syn2mas-check-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/mas-syn2mas-check-job.yaml b/services/communication/mas-syn2mas-check-job.yaml index 829a979..b5b71d5 100644 --- a/services/communication/mas-syn2mas-check-job.yaml +++ b/services/communication/mas-syn2mas-check-job.yaml @@ -77,7 +77,7 @@ spec: - --synapse-config - /synapse-config/homeserver.yaml - --synapse-database-uri - - postgresql: + - "postgresql:" env: - name: PGHOST value: postgres-service.postgres.svc.cluster.local -- 2.47.2 From 35770a8b9084339082d275a4c6cba4731d88dd5f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:08:30 -0300 Subject: [PATCH 145/684] communication: syn2mas check include synapse secret --- services/communication/mas-syn2mas-check-job.yaml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/services/communication/mas-syn2mas-check-job.yaml b/services/communication/mas-syn2mas-check-job.yaml index b5b71d5..724b625 100644 --- a/services/communication/mas-syn2mas-check-job.yaml +++ b/services/communication/mas-syn2mas-check-job.yaml @@ -2,14 +2,14 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-syn2mas-check + name: mas-syn2mas-check-v2 namespace: communication spec: backoffLimit: 0 template: metadata: labels: - app: mas-syn2mas-check + app: mas-syn2mas-check-v2 spec: enableServiceLinks: false restartPolicy: Never @@ -76,6 +76,8 @@ spec: - /rendered/config.yaml - --synapse-config - /synapse-config/homeserver.yaml + - --synapse-config + - /synapse-secret/config.yaml - --synapse-database-uri - "postgresql:" env: @@ -101,6 +103,9 @@ spec: - name: synapse-config mountPath: /synapse-config readOnly: true + - name: synapse-secret + mountPath: /synapse-secret + readOnly: true volumes: - name: mas-config configMap: @@ -116,3 +121,9 @@ spec: items: - key: homeserver.yaml path: homeserver.yaml + - name: synapse-secret + secret: + secretName: othrys-synapse-matrix-synapse + items: + - key: config.yaml + path: config.yaml -- 2.47.2 From 06a1cde738cb368ce69dfdff5be2fd95a1a69327 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:12:45 -0300 Subject: [PATCH 146/684] communication: scale down MAS and Synapse for syn2mas --- services/communication/mas-deployment.yaml | 2 +- services/communication/synapse-rendered.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 9643175..92364ac 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -7,7 +7,7 @@ metadata: labels: app: matrix-authentication-service spec: - replicas: 1 + replicas: 0 selector: matchLabels: app: matrix-authentication-service diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index ec6d473..0f21264 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -666,7 +666,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: synapse spec: - replicas: 1 + replicas: 0 strategy: type: RollingUpdate selector: -- 2.47.2 From af03ac6dbc6200987815068fc86932afc86d31da Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:14:44 -0300 Subject: [PATCH 147/684] communication: add MAS syn2mas migrate job --- services/communication/kustomization.yaml | 1 + .../mas-syn2mas-migrate-job.yaml | 129 ++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 services/communication/mas-syn2mas-migrate-job.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 266f19c..1d83466 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -9,6 +9,7 @@ resources: - mas-deployment.yaml - mas-ingress.yaml - mas-syn2mas-check-job.yaml + - mas-syn2mas-migrate-job.yaml - element-rendered.yaml - livekit-config.yaml - livekit.yaml diff --git a/services/communication/mas-syn2mas-migrate-job.yaml b/services/communication/mas-syn2mas-migrate-job.yaml new file mode 100644 index 0000000..70da52e --- /dev/null +++ b/services/communication/mas-syn2mas-migrate-job.yaml @@ -0,0 +1,129 @@ +# services/communication/mas-syn2mas-migrate-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: mas-syn2mas-migrate + namespace: communication +spec: + backoffLimit: 0 + template: + metadata: + labels: + app: mas-syn2mas-migrate + spec: + enableServiceLinks: false + restartPolicy: Never + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + initContainers: + - name: render-mas-config + image: alpine:3.20 + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + umask 077 + DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" + MATRIX_SECRET_ESCAPED="$(printf '%s' "${MATRIX_SHARED_SECRET}" | sed 's/[\\/&]/\\&/g')" + KC_SECRET_ESCAPED="$(printf '%s' "${KEYCLOAK_CLIENT_SECRET}" | sed 's/[\\/&]/\\&/g')" + + sed \ + -e "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" \ + -e "s/@@MATRIX_SHARED_SECRET@@/${MATRIX_SECRET_ESCAPED}/g" \ + -e "s/@@KEYCLOAK_CLIENT_SECRET@@/${KC_SECRET_ESCAPED}/g" \ + /etc/mas/config.yaml > /rendered/config.yaml + chmod 0644 /rendered/config.yaml + env: + - name: MAS_DB_PASSWORD + valueFrom: + secretKeyRef: + name: mas-db + key: password + - name: MATRIX_SHARED_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: matrix_shared_secret + - name: KEYCLOAK_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: mas-secrets-runtime + key: keycloak_client_secret + volumeMounts: + - name: mas-config + mountPath: /etc/mas/config.yaml + subPath: config.yaml + readOnly: true + - name: rendered + mountPath: /rendered + readOnly: false + containers: + - name: syn2mas-migrate + image: ghcr.io/element-hq/matrix-authentication-service:1.8.0 + args: + - syn2mas + - migrate + - --config + - /rendered/config.yaml + - --synapse-config + - /synapse-config/homeserver.yaml + - --synapse-config + - /synapse-secret/config.yaml + - --synapse-database-uri + - "postgresql:" + env: + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: synapse + - name: PGUSER + value: synapse + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD + - name: PGSSLMODE + value: prefer + volumeMounts: + - name: rendered + mountPath: /rendered + readOnly: true + - name: synapse-config + mountPath: /synapse-config + readOnly: true + - name: synapse-secret + mountPath: /synapse-secret + readOnly: true + volumes: + - name: mas-config + configMap: + name: matrix-authentication-service-config + items: + - key: config.yaml + path: config.yaml + - name: rendered + emptyDir: {} + - name: synapse-config + configMap: + name: othrys-synapse-matrix-synapse + items: + - key: homeserver.yaml + path: homeserver.yaml + - name: synapse-secret + secret: + secretName: othrys-synapse-matrix-synapse + items: + - key: config.yaml + path: config.yaml -- 2.47.2 From 101fcc18a346ac04e3182e09c0a9108517ef7ab4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:16:53 -0300 Subject: [PATCH 148/684] communication: syn2mas migrate mount MAS secrets --- .../mas-syn2mas-migrate-job.yaml | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/services/communication/mas-syn2mas-migrate-job.yaml b/services/communication/mas-syn2mas-migrate-job.yaml index 70da52e..0f2f18e 100644 --- a/services/communication/mas-syn2mas-migrate-job.yaml +++ b/services/communication/mas-syn2mas-migrate-job.yaml @@ -2,14 +2,14 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-syn2mas-migrate + name: mas-syn2mas-migrate-v2 namespace: communication spec: backoffLimit: 0 template: metadata: labels: - app: mas-syn2mas-migrate + app: mas-syn2mas-migrate-v2 spec: enableServiceLinks: false restartPolicy: Never @@ -100,6 +100,12 @@ spec: - name: rendered mountPath: /rendered readOnly: true + - name: mas-secrets + mountPath: /etc/mas/secrets + readOnly: true + - name: mas-keys + mountPath: /etc/mas/keys + readOnly: true - name: synapse-config mountPath: /synapse-config readOnly: true @@ -115,6 +121,22 @@ spec: path: config.yaml - name: rendered emptyDir: {} + - name: mas-secrets + secret: + secretName: mas-secrets-runtime + items: + - key: encryption + path: encryption + - key: matrix_shared_secret + path: matrix_shared_secret + - key: keycloak_client_secret + path: keycloak_client_secret + - name: mas-keys + secret: + secretName: mas-secrets-runtime + items: + - key: rsa_key + path: rsa_key - name: synapse-config configMap: name: othrys-synapse-matrix-synapse -- 2.47.2 From bbd3815f25700cfcea289a84703ee981353d8626 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:22:22 -0300 Subject: [PATCH 149/684] communication: rerun syn2mas migrate job --- services/communication/mas-syn2mas-migrate-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/mas-syn2mas-migrate-job.yaml b/services/communication/mas-syn2mas-migrate-job.yaml index 0f2f18e..15b7dcd 100644 --- a/services/communication/mas-syn2mas-migrate-job.yaml +++ b/services/communication/mas-syn2mas-migrate-job.yaml @@ -2,14 +2,14 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-syn2mas-migrate-v2 + name: mas-syn2mas-migrate-v3 namespace: communication spec: backoffLimit: 0 template: metadata: labels: - app: mas-syn2mas-migrate-v2 + app: mas-syn2mas-migrate-v3 spec: enableServiceLinks: false restartPolicy: Never -- 2.47.2 From c4931c381c78163db7eed738bf97f30d6426c3c3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:27:04 -0300 Subject: [PATCH 150/684] communication: prep syn2mas migrate (bcrypt, disable guests) --- services/communication/mas-configmap.yaml | 3 +++ services/communication/mas-syn2mas-migrate-job.yaml | 4 ++-- services/communication/synapse-rendered.yaml | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/services/communication/mas-configmap.yaml b/services/communication/mas-configmap.yaml index d2553a3..fab43c2 100644 --- a/services/communication/mas-configmap.yaml +++ b/services/communication/mas-configmap.yaml @@ -37,6 +37,9 @@ data: passwords: enabled: true + schemes: + - version: 1 + algorithm: bcrypt matrix: kind: synapse diff --git a/services/communication/mas-syn2mas-migrate-job.yaml b/services/communication/mas-syn2mas-migrate-job.yaml index 15b7dcd..bc458a5 100644 --- a/services/communication/mas-syn2mas-migrate-job.yaml +++ b/services/communication/mas-syn2mas-migrate-job.yaml @@ -2,14 +2,14 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-syn2mas-migrate-v3 + name: mas-syn2mas-migrate-v4 namespace: communication spec: backoffLimit: 0 template: metadata: labels: - app: mas-syn2mas-migrate-v3 + app: mas-syn2mas-migrate-v4 spec: enableServiceLinks: false restartPolicy: Never diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 0f21264..0b6b706 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -331,7 +331,7 @@ data: ## Extra config ## - allow_guest_access: true + allow_guest_access: false allow_public_rooms_without_auth: true auto_join_rooms: - "#othrys:live.bstein.dev" -- 2.47.2 From 6c1ff72af6dce9e550b1d84cd05dedf9d45d039a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:29:25 -0300 Subject: [PATCH 151/684] communication: scale MAS/Synapse back up --- services/communication/mas-deployment.yaml | 2 +- services/communication/synapse-rendered.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 92364ac..9643175 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -7,7 +7,7 @@ metadata: labels: app: matrix-authentication-service spec: - replicas: 0 + replicas: 1 selector: matchLabels: app: matrix-authentication-service diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 0b6b706..ec6d473 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -331,7 +331,7 @@ data: ## Extra config ## - allow_guest_access: false + allow_guest_access: true allow_public_rooms_without_auth: true auto_join_rooms: - "#othrys:live.bstein.dev" @@ -666,7 +666,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: synapse spec: - replicas: 0 + replicas: 1 strategy: type: RollingUpdate selector: -- 2.47.2 From a260d55826d91cdbc8c410137c8d6ed8513f13fe Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:32:26 -0300 Subject: [PATCH 152/684] communication: remove one-shot syn2mas jobs --- services/communication/kustomization.yaml | 2 - .../communication/mas-syn2mas-check-job.yaml | 129 --------------- .../mas-syn2mas-migrate-job.yaml | 151 ------------------ 3 files changed, 282 deletions(-) delete mode 100644 services/communication/mas-syn2mas-check-job.yaml delete mode 100644 services/communication/mas-syn2mas-migrate-job.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 1d83466..39d5890 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -8,8 +8,6 @@ resources: - mas-configmap.yaml - mas-deployment.yaml - mas-ingress.yaml - - mas-syn2mas-check-job.yaml - - mas-syn2mas-migrate-job.yaml - element-rendered.yaml - livekit-config.yaml - livekit.yaml diff --git a/services/communication/mas-syn2mas-check-job.yaml b/services/communication/mas-syn2mas-check-job.yaml deleted file mode 100644 index 724b625..0000000 --- a/services/communication/mas-syn2mas-check-job.yaml +++ /dev/null @@ -1,129 +0,0 @@ -# services/communication/mas-syn2mas-check-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: mas-syn2mas-check-v2 - namespace: communication -spec: - backoffLimit: 0 - template: - metadata: - labels: - app: mas-syn2mas-check-v2 - spec: - enableServiceLinks: false - restartPolicy: Never - nodeSelector: - hardware: rpi5 - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 50 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5","rpi4"] - initContainers: - - name: render-mas-config - image: alpine:3.20 - command: ["/bin/sh","-c"] - args: - - | - set -euo pipefail - umask 077 - DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" - MATRIX_SECRET_ESCAPED="$(printf '%s' "${MATRIX_SHARED_SECRET}" | sed 's/[\\/&]/\\&/g')" - KC_SECRET_ESCAPED="$(printf '%s' "${KEYCLOAK_CLIENT_SECRET}" | sed 's/[\\/&]/\\&/g')" - - sed \ - -e "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" \ - -e "s/@@MATRIX_SHARED_SECRET@@/${MATRIX_SECRET_ESCAPED}/g" \ - -e "s/@@KEYCLOAK_CLIENT_SECRET@@/${KC_SECRET_ESCAPED}/g" \ - /etc/mas/config.yaml > /rendered/config.yaml - chmod 0644 /rendered/config.yaml - env: - - name: MAS_DB_PASSWORD - valueFrom: - secretKeyRef: - name: mas-db - key: password - - name: MATRIX_SHARED_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: matrix_shared_secret - - name: KEYCLOAK_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: keycloak_client_secret - volumeMounts: - - name: mas-config - mountPath: /etc/mas/config.yaml - subPath: config.yaml - readOnly: true - - name: rendered - mountPath: /rendered - readOnly: false - containers: - - name: syn2mas-check - image: ghcr.io/element-hq/matrix-authentication-service:1.8.0 - args: - - syn2mas - - check - - --config - - /rendered/config.yaml - - --synapse-config - - /synapse-config/homeserver.yaml - - --synapse-config - - /synapse-secret/config.yaml - - --synapse-database-uri - - "postgresql:" - env: - - name: PGHOST - value: postgres-service.postgres.svc.cluster.local - - name: PGPORT - value: "5432" - - name: PGDATABASE - value: synapse - - name: PGUSER - value: synapse - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD - - name: PGSSLMODE - value: prefer - volumeMounts: - - name: rendered - mountPath: /rendered - readOnly: true - - name: synapse-config - mountPath: /synapse-config - readOnly: true - - name: synapse-secret - mountPath: /synapse-secret - readOnly: true - volumes: - - name: mas-config - configMap: - name: matrix-authentication-service-config - items: - - key: config.yaml - path: config.yaml - - name: rendered - emptyDir: {} - - name: synapse-config - configMap: - name: othrys-synapse-matrix-synapse - items: - - key: homeserver.yaml - path: homeserver.yaml - - name: synapse-secret - secret: - secretName: othrys-synapse-matrix-synapse - items: - - key: config.yaml - path: config.yaml diff --git a/services/communication/mas-syn2mas-migrate-job.yaml b/services/communication/mas-syn2mas-migrate-job.yaml deleted file mode 100644 index bc458a5..0000000 --- a/services/communication/mas-syn2mas-migrate-job.yaml +++ /dev/null @@ -1,151 +0,0 @@ -# services/communication/mas-syn2mas-migrate-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: mas-syn2mas-migrate-v4 - namespace: communication -spec: - backoffLimit: 0 - template: - metadata: - labels: - app: mas-syn2mas-migrate-v4 - spec: - enableServiceLinks: false - restartPolicy: Never - nodeSelector: - hardware: rpi5 - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 50 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5","rpi4"] - initContainers: - - name: render-mas-config - image: alpine:3.20 - command: ["/bin/sh","-c"] - args: - - | - set -euo pipefail - umask 077 - DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" - MATRIX_SECRET_ESCAPED="$(printf '%s' "${MATRIX_SHARED_SECRET}" | sed 's/[\\/&]/\\&/g')" - KC_SECRET_ESCAPED="$(printf '%s' "${KEYCLOAK_CLIENT_SECRET}" | sed 's/[\\/&]/\\&/g')" - - sed \ - -e "s/@@MAS_DB_PASSWORD@@/${DB_PASS_ESCAPED}/g" \ - -e "s/@@MATRIX_SHARED_SECRET@@/${MATRIX_SECRET_ESCAPED}/g" \ - -e "s/@@KEYCLOAK_CLIENT_SECRET@@/${KC_SECRET_ESCAPED}/g" \ - /etc/mas/config.yaml > /rendered/config.yaml - chmod 0644 /rendered/config.yaml - env: - - name: MAS_DB_PASSWORD - valueFrom: - secretKeyRef: - name: mas-db - key: password - - name: MATRIX_SHARED_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: matrix_shared_secret - - name: KEYCLOAK_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: keycloak_client_secret - volumeMounts: - - name: mas-config - mountPath: /etc/mas/config.yaml - subPath: config.yaml - readOnly: true - - name: rendered - mountPath: /rendered - readOnly: false - containers: - - name: syn2mas-migrate - image: ghcr.io/element-hq/matrix-authentication-service:1.8.0 - args: - - syn2mas - - migrate - - --config - - /rendered/config.yaml - - --synapse-config - - /synapse-config/homeserver.yaml - - --synapse-config - - /synapse-secret/config.yaml - - --synapse-database-uri - - "postgresql:" - env: - - name: PGHOST - value: postgres-service.postgres.svc.cluster.local - - name: PGPORT - value: "5432" - - name: PGDATABASE - value: synapse - - name: PGUSER - value: synapse - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD - - name: PGSSLMODE - value: prefer - volumeMounts: - - name: rendered - mountPath: /rendered - readOnly: true - - name: mas-secrets - mountPath: /etc/mas/secrets - readOnly: true - - name: mas-keys - mountPath: /etc/mas/keys - readOnly: true - - name: synapse-config - mountPath: /synapse-config - readOnly: true - - name: synapse-secret - mountPath: /synapse-secret - readOnly: true - volumes: - - name: mas-config - configMap: - name: matrix-authentication-service-config - items: - - key: config.yaml - path: config.yaml - - name: rendered - emptyDir: {} - - name: mas-secrets - secret: - secretName: mas-secrets-runtime - items: - - key: encryption - path: encryption - - key: matrix_shared_secret - path: matrix_shared_secret - - key: keycloak_client_secret - path: keycloak_client_secret - - name: mas-keys - secret: - secretName: mas-secrets-runtime - items: - - key: rsa_key - path: rsa_key - - name: synapse-config - configMap: - name: othrys-synapse-matrix-synapse - items: - - key: homeserver.yaml - path: homeserver.yaml - - name: synapse-secret - secret: - secretName: othrys-synapse-matrix-synapse - items: - - key: config.yaml - path: config.yaml -- 2.47.2 From b7b1ffde6cd4ab31995e5e7b99ddd7c82638f17d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:44:54 -0300 Subject: [PATCH 153/684] communication: fix LiveKit udp_port mux syntax --- services/communication/livekit-config.yaml | 2 +- services/communication/livekit.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index 364132a..68c33ec 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -7,7 +7,7 @@ data: livekit.yaml: | port: 7880 rtc: - udp_port: 7882 + udp_port: "7882-7882" tcp_port: 7881 use_external_ip: true turn_servers: diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index 6df4acd..eea9d94 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -12,6 +12,8 @@ spec: app: livekit template: metadata: + annotations: + checksum/config: livekit-config-v2 labels: app: livekit spec: -- 2.47.2 From db01ab02ef383f15739f49b8a28d8d2d28333fdb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 18:48:18 -0300 Subject: [PATCH 154/684] communication: fix LiveKit udp_port range and expose 7883 --- services/communication/livekit-config.yaml | 2 +- services/communication/livekit.yaml | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index 68c33ec..7e3d981 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -7,7 +7,7 @@ data: livekit.yaml: | port: 7880 rtc: - udp_port: "7882-7882" + udp_port: "7882-7883" tcp_port: 7881 use_external_ip: true turn_servers: diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index eea9d94..a8e782c 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: livekit-config-v2 + checksum/config: livekit-config-v3 labels: app: livekit spec: @@ -73,6 +73,9 @@ spec: - containerPort: 7882 name: rtc-udp protocol: UDP + - containerPort: 7883 + name: rtc-udp2 + protocol: UDP volumeMounts: - name: config mountPath: /etc/livekit @@ -116,7 +119,11 @@ spec: port: 7881 targetPort: 7881 protocol: TCP - - name: rtc-udp + - name: rtc-udp-7882 port: 7882 targetPort: 7882 protocol: UDP + - name: rtc-udp-7883 + port: 7883 + targetPort: 7883 + protocol: UDP -- 2.47.2 From 9c6001126146b9a5c14ef2befcd18c1572e17e5b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 19:13:08 -0300 Subject: [PATCH 155/684] communication: serve matrix well-known with trailing slash --- services/communication/wellknown.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml index 56e0c74..7ed7610 100644 --- a/services/communication/wellknown.yaml +++ b/services/communication/wellknown.yaml @@ -39,14 +39,17 @@ data: root /usr/share/nginx/html; - location = /.well-known/matrix/client { + # Some clients request a trailing slash; serve both. + location ~ ^/\\.well-known/matrix/client/?$ { default_type application/json; - try_files $uri =404; + add_header Access-Control-Allow-Origin "*" always; + try_files /.well-known/matrix/client =404; } - location = /.well-known/matrix/server { + location ~ ^/\\.well-known/matrix/server/?$ { default_type application/json; - try_files $uri =404; + add_header Access-Control-Allow-Origin "*" always; + try_files /.well-known/matrix/server =404; } } --- -- 2.47.2 From f1ca9d919de2d64220fed205bcc1f819822e1ec3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 19:15:01 -0300 Subject: [PATCH 156/684] communication: fix well-known nginx regex escaping --- services/communication/wellknown.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml index 7ed7610..803a75f 100644 --- a/services/communication/wellknown.yaml +++ b/services/communication/wellknown.yaml @@ -40,13 +40,13 @@ data: root /usr/share/nginx/html; # Some clients request a trailing slash; serve both. - location ~ ^/\\.well-known/matrix/client/?$ { + location ~ ^/\.well-known/matrix/client/?$ { default_type application/json; add_header Access-Control-Allow-Origin "*" always; try_files /.well-known/matrix/client =404; } - location ~ ^/\\.well-known/matrix/server/?$ { + location ~ ^/\.well-known/matrix/server/?$ { default_type application/json; add_header Access-Control-Allow-Origin "*" always; try_files /.well-known/matrix/server =404; -- 2.47.2 From af05370ad7dabd64b3982776529ab5e003fd8376 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 19:17:31 -0300 Subject: [PATCH 157/684] communication: fix well-known trailing slash and reload config --- services/communication/wellknown.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml index 803a75f..d0bf785 100644 --- a/services/communication/wellknown.yaml +++ b/services/communication/wellknown.yaml @@ -80,8 +80,8 @@ spec: mountPath: /usr/share/nginx/html/.well-known/matrix readOnly: true - name: nginx-config - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf + mountPath: /etc/nginx/conf.d + readOnly: true volumes: - name: wellknown configMap: -- 2.47.2 From 50c23b592a71087b51166d9373926aa8fce0def8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 19:19:44 -0300 Subject: [PATCH 158/684] communication: serve matrix well-known on matrix.live --- services/communication/wellknown.yaml | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/services/communication/wellknown.yaml b/services/communication/wellknown.yaml index d0bf785..d09ce27 100644 --- a/services/communication/wellknown.yaml +++ b/services/communication/wellknown.yaml @@ -144,3 +144,36 @@ spec: name: matrix-wellknown port: number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: matrix-wellknown-matrix-live + namespace: communication + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + tls: + - hosts: + - matrix.live.bstein.dev + secretName: matrix-live-tls + rules: + - host: matrix.live.bstein.dev + http: + paths: + - path: /.well-known/matrix/client + pathType: Prefix + backend: + service: + name: matrix-wellknown + port: + number: 80 + - path: /.well-known/matrix/server + pathType: Prefix + backend: + service: + name: matrix-wellknown + port: + number: 80 -- 2.47.2 From c32d734a69b069f8c5153b2d1c94da1d7d8ff427 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 19:59:26 -0300 Subject: [PATCH 159/684] communication: set LB traffic policy local --- services/communication/coturn.yaml | 1 + services/communication/livekit.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/services/communication/coturn.yaml b/services/communication/coturn.yaml index 9b06a38..9051082 100644 --- a/services/communication/coturn.yaml +++ b/services/communication/coturn.yaml @@ -98,6 +98,7 @@ spec: type: LoadBalancer loadBalancerClass: metallb loadBalancerIP: 192.168.22.5 + externalTrafficPolicy: Local selector: app: coturn ports: diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index a8e782c..df4ecb1 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -108,6 +108,7 @@ spec: type: LoadBalancer loadBalancerClass: metallb loadBalancerIP: 192.168.22.6 + externalTrafficPolicy: Local selector: app: livekit ports: -- 2.47.2 From 04f46ed491ad647e46dbf90b5ea365d4c1402195 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 20:55:46 -0300 Subject: [PATCH 160/684] communication: use Cluster LB traffic policy --- services/communication/coturn.yaml | 2 +- services/communication/livekit.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/coturn.yaml b/services/communication/coturn.yaml index 9051082..ede5bb4 100644 --- a/services/communication/coturn.yaml +++ b/services/communication/coturn.yaml @@ -98,7 +98,7 @@ spec: type: LoadBalancer loadBalancerClass: metallb loadBalancerIP: 192.168.22.5 - externalTrafficPolicy: Local + externalTrafficPolicy: Cluster selector: app: coturn ports: diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index df4ecb1..ad7f441 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -108,7 +108,7 @@ spec: type: LoadBalancer loadBalancerClass: metallb loadBalancerIP: 192.168.22.6 - externalTrafficPolicy: Local + externalTrafficPolicy: Cluster selector: app: livekit ports: -- 2.47.2 From b4ac308af88b0eb416baeb580a4abc59ceb18886 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 21:00:18 -0300 Subject: [PATCH 161/684] metallb: schedule speaker on rpi4+rpi5 --- .../metallb/patches/node-placement.yaml | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/infrastructure/metallb/patches/node-placement.yaml b/infrastructure/metallb/patches/node-placement.yaml index e32337e..f520c37 100644 --- a/infrastructure/metallb/patches/node-placement.yaml +++ b/infrastructure/metallb/patches/node-placement.yaml @@ -15,8 +15,16 @@ spec: - --webhook-mode=enabled - --tls-min-version=VersionTLS12 - --lb-class=metallb - nodeSelector: - hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi4 + - rpi5 --- apiVersion: apps/v1 kind: DaemonSet @@ -26,5 +34,13 @@ metadata: spec: template: spec: - nodeSelector: - hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi4 + - rpi5 -- 2.47.2 From 8e702f14db894ffd7930a682438cdf8c190d448c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 21:45:12 -0300 Subject: [PATCH 162/684] metallb: run speaker on all nodes --- .../metallb/patches/node-placement.yaml | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/infrastructure/metallb/patches/node-placement.yaml b/infrastructure/metallb/patches/node-placement.yaml index f520c37..c42ae99 100644 --- a/infrastructure/metallb/patches/node-placement.yaml +++ b/infrastructure/metallb/patches/node-placement.yaml @@ -25,22 +25,3 @@ spec: values: - rpi4 - rpi5 ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: metallb-speaker - namespace: metallb-system -spec: - template: - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi4 - - rpi5 -- 2.47.2 From 3a473ff48281fbca22e72a94b7a20d2a7bc2e20d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 22:00:09 -0300 Subject: [PATCH 163/684] metallb: enable speaker debug logs --- infrastructure/metallb/kustomization.yaml | 1 + .../metallb/patches/speaker-loglevel.yaml | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 infrastructure/metallb/patches/speaker-loglevel.yaml diff --git a/infrastructure/metallb/kustomization.yaml b/infrastructure/metallb/kustomization.yaml index f6df7e6..1a1452c 100644 --- a/infrastructure/metallb/kustomization.yaml +++ b/infrastructure/metallb/kustomization.yaml @@ -7,3 +7,4 @@ resources: - ippool.yaml patchesStrategicMerge: - patches/node-placement.yaml + - patches/speaker-loglevel.yaml diff --git a/infrastructure/metallb/patches/speaker-loglevel.yaml b/infrastructure/metallb/patches/speaker-loglevel.yaml new file mode 100644 index 0000000..26f21f9 --- /dev/null +++ b/infrastructure/metallb/patches/speaker-loglevel.yaml @@ -0,0 +1,14 @@ +# infrastructure/metallb/patches/speaker-loglevel.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: metallb-speaker + namespace: metallb-system +spec: + template: + spec: + containers: + - name: speaker + args: + - --port=7472 + - --log-level=debug -- 2.47.2 From b0bd7c97a5fe0bf5eb2496727064aa882959bb7b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 22:15:08 -0300 Subject: [PATCH 164/684] metallb: set speaker lb-class --- infrastructure/metallb/patches/speaker-loglevel.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/infrastructure/metallb/patches/speaker-loglevel.yaml b/infrastructure/metallb/patches/speaker-loglevel.yaml index 26f21f9..7365192 100644 --- a/infrastructure/metallb/patches/speaker-loglevel.yaml +++ b/infrastructure/metallb/patches/speaker-loglevel.yaml @@ -12,3 +12,4 @@ spec: args: - --port=7472 - --log-level=debug + - --lb-class=metallb -- 2.47.2 From 3948602c577aeefaf1dbade08586535d2de75c4a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 31 Dec 2025 22:35:16 -0300 Subject: [PATCH 165/684] metallb: restore speaker log level info --- infrastructure/metallb/patches/speaker-loglevel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/metallb/patches/speaker-loglevel.yaml b/infrastructure/metallb/patches/speaker-loglevel.yaml index 7365192..61b8942 100644 --- a/infrastructure/metallb/patches/speaker-loglevel.yaml +++ b/infrastructure/metallb/patches/speaker-loglevel.yaml @@ -11,5 +11,5 @@ spec: - name: speaker args: - --port=7472 - - --log-level=debug + - --log-level=info - --lb-class=metallb -- 2.47.2 From 10f7f3a8c666484c5c3e2cb9b4cbf347ed151d38 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 03:54:19 -0300 Subject: [PATCH 166/684] communication: advertise TURN over tcp --- services/communication/livekit-config.yaml | 3 +++ services/communication/livekit.yaml | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index 7e3d981..107271c 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -14,6 +14,9 @@ data: - host: turn.live.bstein.dev port: 5349 protocol: tls + - host: turn.live.bstein.dev + port: 3478 + protocol: tcp - host: turn.live.bstein.dev port: 3478 protocol: udp diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index ad7f441..c821cb4 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: livekit-config-v3 + checksum/config: livekit-config-v4 labels: app: livekit spec: @@ -63,6 +63,13 @@ spec: secretKeyRef: name: turn-shared-secret key: TURN_STATIC_AUTH_SECRET + - name: LIVEKIT_RTC__TURN_SERVERS_2__USERNAME + value: livekit + - name: LIVEKIT_RTC__TURN_SERVERS_2__CREDENTIAL + valueFrom: + secretKeyRef: + name: turn-shared-secret + key: TURN_STATIC_AUTH_SECRET ports: - containerPort: 7880 name: http -- 2.47.2 From 5c59640bf502ddd947fd6e9e9e03576e8b995171 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 04:19:12 -0300 Subject: [PATCH 167/684] communication: set LB externalTrafficPolicy Local --- services/communication/coturn.yaml | 2 +- services/communication/livekit.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/coturn.yaml b/services/communication/coturn.yaml index ede5bb4..9051082 100644 --- a/services/communication/coturn.yaml +++ b/services/communication/coturn.yaml @@ -98,7 +98,7 @@ spec: type: LoadBalancer loadBalancerClass: metallb loadBalancerIP: 192.168.22.5 - externalTrafficPolicy: Cluster + externalTrafficPolicy: Local selector: app: coturn ports: diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index c821cb4..83c67e0 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -115,7 +115,7 @@ spec: type: LoadBalancer loadBalancerClass: metallb loadBalancerIP: 192.168.22.6 - externalTrafficPolicy: Cluster + externalTrafficPolicy: Local selector: app: livekit ports: -- 2.47.2 From beb975182a3d7bb9aaa4691178cb3ece09df16d9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 11:31:39 -0300 Subject: [PATCH 168/684] communication: render LiveKit TURN creds --- services/communication/livekit-config.yaml | 6 +++ services/communication/livekit.yaml | 52 ++++++++++++---------- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/services/communication/livekit-config.yaml b/services/communication/livekit-config.yaml index 107271c..c39c783 100644 --- a/services/communication/livekit-config.yaml +++ b/services/communication/livekit-config.yaml @@ -14,11 +14,17 @@ data: - host: turn.live.bstein.dev port: 5349 protocol: tls + username: livekit + credential: "@@TURN_PASSWORD@@" - host: turn.live.bstein.dev port: 3478 protocol: tcp + username: livekit + credential: "@@TURN_PASSWORD@@" - host: turn.live.bstein.dev port: 3478 protocol: udp + username: livekit + credential: "@@TURN_PASSWORD@@" room: auto_create: false diff --git a/services/communication/livekit.yaml b/services/communication/livekit.yaml index 83c67e0..6de11e4 100644 --- a/services/communication/livekit.yaml +++ b/services/communication/livekit.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: livekit-config-v4 + checksum/config: livekit-config-v5 labels: app: livekit spec: @@ -29,6 +29,30 @@ spec: - key: hardware operator: In values: ["rpi5","rpi4"] + initContainers: + - name: render-config + image: alpine:3.20 + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + umask 077 + TURN_PASSWORD_ESCAPED="$(printf '%s' "${TURN_PASSWORD}" | sed 's/[\\/&]/\\&/g')" + sed "s/@@TURN_PASSWORD@@/${TURN_PASSWORD_ESCAPED}/g" /etc/livekit-template/livekit.yaml > /etc/livekit/livekit.yaml + chmod 0644 /etc/livekit/livekit.yaml + env: + - name: TURN_PASSWORD + valueFrom: + secretKeyRef: + name: turn-shared-secret + key: TURN_STATIC_AUTH_SECRET + volumeMounts: + - name: config-template + mountPath: /etc/livekit-template + readOnly: true + - name: config + mountPath: /etc/livekit + readOnly: false containers: - name: livekit image: livekit/livekit-server:v1.9.0 @@ -49,27 +73,6 @@ spec: secretKeyRef: name: livekit-api key: primary - - name: LIVEKIT_RTC__TURN_SERVERS_0__USERNAME - value: livekit - - name: LIVEKIT_RTC__TURN_SERVERS_0__CREDENTIAL - valueFrom: - secretKeyRef: - name: turn-shared-secret - key: TURN_STATIC_AUTH_SECRET - - name: LIVEKIT_RTC__TURN_SERVERS_1__USERNAME - value: livekit - - name: LIVEKIT_RTC__TURN_SERVERS_1__CREDENTIAL - valueFrom: - secretKeyRef: - name: turn-shared-secret - key: TURN_STATIC_AUTH_SECRET - - name: LIVEKIT_RTC__TURN_SERVERS_2__USERNAME - value: livekit - - name: LIVEKIT_RTC__TURN_SERVERS_2__CREDENTIAL - valueFrom: - secretKeyRef: - name: turn-shared-secret - key: TURN_STATIC_AUTH_SECRET ports: - containerPort: 7880 name: http @@ -86,6 +89,7 @@ spec: volumeMounts: - name: config mountPath: /etc/livekit + readOnly: true - name: runtime-keys mountPath: /var/run/livekit resources: @@ -96,12 +100,14 @@ spec: cpu: "2" memory: 1Gi volumes: - - name: config + - name: config-template configMap: name: livekit-config items: - key: livekit.yaml path: livekit.yaml + - name: config + emptyDir: {} - name: runtime-keys emptyDir: {} --- -- 2.47.2 From 1e64075478c7a687d438117eabd298604c41e0b7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 11:37:52 -0300 Subject: [PATCH 169/684] sso(openldap): restore in-cluster LDAP --- .../applications/jellyfin/kustomization.yaml | 1 + .../applications/kustomization.yaml | 1 + .../applications/openldap/kustomization.yaml | 19 ++++ services/openldap/configmap-bootstrap.yaml | 15 ++++ services/openldap/kustomization.yaml | 8 ++ services/openldap/service.yaml | 19 ++++ services/openldap/statefulset.yaml | 87 +++++++++++++++++++ 7 files changed, 150 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/openldap/kustomization.yaml create mode 100644 services/openldap/configmap-bootstrap.yaml create mode 100644 services/openldap/kustomization.yaml create mode 100644 services/openldap/service.yaml create mode 100644 services/openldap/statefulset.yaml diff --git a/clusters/atlas/flux-system/applications/jellyfin/kustomization.yaml b/clusters/atlas/flux-system/applications/jellyfin/kustomization.yaml index 0d314ca..dda35d7 100644 --- a/clusters/atlas/flux-system/applications/jellyfin/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/jellyfin/kustomization.yaml @@ -15,5 +15,6 @@ spec: namespace: flux-system dependsOn: - name: core + - name: openldap wait: true timeout: 5m diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index e1d1feb..6cd5281 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -16,6 +16,7 @@ resources: - jellyfin/kustomization.yaml - xmr-miner/kustomization.yaml - sui-metrics/kustomization.yaml + - openldap/kustomization.yaml - keycloak/kustomization.yaml - oauth2-proxy/kustomization.yaml - mailu/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/openldap/kustomization.yaml b/clusters/atlas/flux-system/applications/openldap/kustomization.yaml new file mode 100644 index 0000000..d4657c0 --- /dev/null +++ b/clusters/atlas/flux-system/applications/openldap/kustomization.yaml @@ -0,0 +1,19 @@ +# clusters/atlas/flux-system/applications/openldap/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: openldap + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + path: ./services/openldap + targetNamespace: sso + dependsOn: + - name: core + wait: true + timeout: 5m diff --git a/services/openldap/configmap-bootstrap.yaml b/services/openldap/configmap-bootstrap.yaml new file mode 100644 index 0000000..c3b90e6 --- /dev/null +++ b/services/openldap/configmap-bootstrap.yaml @@ -0,0 +1,15 @@ +# services/openldap/configmap-bootstrap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: openldap-bootstrap + namespace: sso +data: + 00-organizational-units.ldif: | + dn: ou=users,dc=bstein,dc=dev + objectClass: organizationalUnit + ou: users + + dn: ou=groups,dc=bstein,dc=dev + objectClass: organizationalUnit + ou: groups diff --git a/services/openldap/kustomization.yaml b/services/openldap/kustomization.yaml new file mode 100644 index 0000000..dc15e6e --- /dev/null +++ b/services/openldap/kustomization.yaml @@ -0,0 +1,8 @@ +# services/openldap/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: sso +resources: + - configmap-bootstrap.yaml + - service.yaml + - statefulset.yaml diff --git a/services/openldap/service.yaml b/services/openldap/service.yaml new file mode 100644 index 0000000..38c2176 --- /dev/null +++ b/services/openldap/service.yaml @@ -0,0 +1,19 @@ +# services/openldap/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: openldap + namespace: sso + labels: + app: openldap +spec: + clusterIP: None + selector: + app: openldap + ports: + - name: ldap + port: 389 + targetPort: ldap + - name: ldaps + port: 636 + targetPort: ldaps diff --git a/services/openldap/statefulset.yaml b/services/openldap/statefulset.yaml new file mode 100644 index 0000000..8af04e4 --- /dev/null +++ b/services/openldap/statefulset.yaml @@ -0,0 +1,87 @@ +# services/openldap/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: openldap + namespace: sso + labels: + app: openldap +spec: + serviceName: openldap + replicas: 1 + selector: + matchLabels: + app: openldap + template: + metadata: + labels: + app: openldap + spec: + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: openldap + image: docker.io/osixia/openldap:1.5.0 + imagePullPolicy: IfNotPresent + ports: + - name: ldap + containerPort: 389 + - name: ldaps + containerPort: 636 + env: + - name: LDAP_ORGANISATION + value: Atlas + - name: LDAP_DOMAIN + value: bstein.dev + - name: LDAP_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: openldap-admin + key: LDAP_ADMIN_PASSWORD + - name: LDAP_CONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: openldap-admin + key: LDAP_CONFIG_PASSWORD + readinessProbe: + tcpSocket: + port: ldap + initialDelaySeconds: 10 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: ldap + initialDelaySeconds: 30 + periodSeconds: 20 + volumeMounts: + - name: ldap-data + mountPath: /var/lib/ldap + - name: slapd-config + mountPath: /etc/ldap/slapd.d + - name: bootstrap-ldif + mountPath: /container/service/slapd/assets/config/bootstrap/ldif/custom + readOnly: true + volumes: + - name: bootstrap-ldif + configMap: + name: openldap-bootstrap + volumeClaimTemplates: + - metadata: + name: ldap-data + spec: + accessModes: + - ReadWriteOnce + storageClassName: astreae + resources: + requests: + storage: 1Gi + - metadata: + name: slapd-config + spec: + accessModes: + - ReadWriteOnce + storageClassName: astreae + resources: + requests: + storage: 1Gi -- 2.47.2 From 8c799faa6183535bc612edc0c39a7c15509846bf Mon Sep 17 00:00:00 2001 From: flux-bot Date: Thu, 1 Jan 2026 14:38:43 +0000 Subject: [PATCH 170/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index c93c5d6..22ecc8f 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-26 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-27 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 9c6889440c2ad492db2d59a0b90b9bb60c03beaa Mon Sep 17 00:00:00 2001 From: flux-bot Date: Thu, 1 Jan 2026 14:39:55 +0000 Subject: [PATCH 171/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 36962d1..840ef5c 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-26 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-27 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From a4bcaf89126a981f32c2dcaf79b3539634cd8bd7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 11:47:47 -0300 Subject: [PATCH 172/684] sso(openldap): fix bootstrap ldif mount --- services/openldap/statefulset.yaml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/services/openldap/statefulset.yaml b/services/openldap/statefulset.yaml index 8af04e4..9bdfefb 100644 --- a/services/openldap/statefulset.yaml +++ b/services/openldap/statefulset.yaml @@ -20,6 +20,25 @@ spec: nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" + initContainers: + - name: copy-bootstrap-ldif + image: docker.io/library/alpine:3.20 + securityContext: + runAsUser: 0 + runAsGroup: 0 + command: + - /bin/sh + - -c + - | + set -euxo pipefail + cp -a /bootstrap-src/. /bootstrap-dst/ + chmod -R 0644 /bootstrap-dst || true + volumeMounts: + - name: bootstrap-src + mountPath: /bootstrap-src + readOnly: true + - name: bootstrap-ldif + mountPath: /bootstrap-dst containers: - name: openldap image: docker.io/osixia/openldap:1.5.0 @@ -61,11 +80,12 @@ spec: mountPath: /etc/ldap/slapd.d - name: bootstrap-ldif mountPath: /container/service/slapd/assets/config/bootstrap/ldif/custom - readOnly: true volumes: - - name: bootstrap-ldif + - name: bootstrap-src configMap: name: openldap-bootstrap + - name: bootstrap-ldif + emptyDir: {} volumeClaimTemplates: - metadata: name: ldap-data -- 2.47.2 From 671b28b8f42a761c49d96a1f3a861d471a36a97b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 12:02:21 -0300 Subject: [PATCH 173/684] sso(openldap): remove bootstrap ldif --- services/openldap/configmap-bootstrap.yaml | 15 ------------ services/openldap/kustomization.yaml | 1 - services/openldap/statefulset.yaml | 27 ---------------------- 3 files changed, 43 deletions(-) delete mode 100644 services/openldap/configmap-bootstrap.yaml diff --git a/services/openldap/configmap-bootstrap.yaml b/services/openldap/configmap-bootstrap.yaml deleted file mode 100644 index c3b90e6..0000000 --- a/services/openldap/configmap-bootstrap.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# services/openldap/configmap-bootstrap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: openldap-bootstrap - namespace: sso -data: - 00-organizational-units.ldif: | - dn: ou=users,dc=bstein,dc=dev - objectClass: organizationalUnit - ou: users - - dn: ou=groups,dc=bstein,dc=dev - objectClass: organizationalUnit - ou: groups diff --git a/services/openldap/kustomization.yaml b/services/openldap/kustomization.yaml index dc15e6e..798f7e8 100644 --- a/services/openldap/kustomization.yaml +++ b/services/openldap/kustomization.yaml @@ -3,6 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: sso resources: - - configmap-bootstrap.yaml - service.yaml - statefulset.yaml diff --git a/services/openldap/statefulset.yaml b/services/openldap/statefulset.yaml index 9bdfefb..ee8c792 100644 --- a/services/openldap/statefulset.yaml +++ b/services/openldap/statefulset.yaml @@ -20,25 +20,6 @@ spec: nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" - initContainers: - - name: copy-bootstrap-ldif - image: docker.io/library/alpine:3.20 - securityContext: - runAsUser: 0 - runAsGroup: 0 - command: - - /bin/sh - - -c - - | - set -euxo pipefail - cp -a /bootstrap-src/. /bootstrap-dst/ - chmod -R 0644 /bootstrap-dst || true - volumeMounts: - - name: bootstrap-src - mountPath: /bootstrap-src - readOnly: true - - name: bootstrap-ldif - mountPath: /bootstrap-dst containers: - name: openldap image: docker.io/osixia/openldap:1.5.0 @@ -78,14 +59,6 @@ spec: mountPath: /var/lib/ldap - name: slapd-config mountPath: /etc/ldap/slapd.d - - name: bootstrap-ldif - mountPath: /container/service/slapd/assets/config/bootstrap/ldif/custom - volumes: - - name: bootstrap-src - configMap: - name: openldap-bootstrap - - name: bootstrap-ldif - emptyDir: {} volumeClaimTemplates: - metadata: name: ldap-data -- 2.47.2 From 47ac4a858001bb615fe34980b5f6983c7bcf2cd1 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Thu, 1 Jan 2026 15:09:46 +0000 Subject: [PATCH 174/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 22ecc8f..0823316 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-27 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-28 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 48d4e9c3636c9e850769f9bccd10115157777a3b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Thu, 1 Jan 2026 15:10:58 +0000 Subject: [PATCH 175/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 840ef5c..1e5e4bd 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-27 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-28 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 7955d9133c6006d2b8cf407a6ca1dbf62dcd0c14 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 12:22:22 -0300 Subject: [PATCH 176/684] jellyfin: fix LDAP auth provider id --- services/jellyfin/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index faec4f7..88fa9dd 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -61,7 +61,7 @@ spec: apk add --no-cache sqlite db="/config/data/jellyfin.db" if [ -f "$db" ]; then - sqlite3 "$db" "UPDATE Users SET AuthenticationProviderId='958aad66-3784-4d2a-b89a-a7b6fab6e25c', Password=NULL, EnableLocalPassword=0 WHERE AuthenticationProviderId!='958aad66-3784-4d2a-b89a-a7b6fab6e25c';" + sqlite3 "$db" "UPDATE Users SET AuthenticationProviderId='Jellyfin.Plugin.LDAP_Auth.LdapAuthenticationProviderPlugin', Password=NULL, EnableLocalPassword=0 WHERE AuthenticationProviderId!='Jellyfin.Plugin.LDAP_Auth.LdapAuthenticationProviderPlugin';" else echo "db not found at $db, skipping" fi -- 2.47.2 From 1f554e583a432e3eb6be3f99b8eb6ce1bb32bf7a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 12:32:57 -0300 Subject: [PATCH 177/684] keycloak: read POSTGRES_* db secret keys --- services/keycloak/deployment.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 9336bd9..09599ba 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -75,17 +75,17 @@ spec: valueFrom: secretKeyRef: name: keycloak-db - key: database + key: POSTGRES_DATABASE - name: KC_DB_USERNAME valueFrom: secretKeyRef: name: keycloak-db - key: username + key: POSTGRES_USER - name: KC_DB_PASSWORD valueFrom: secretKeyRef: name: keycloak-db - key: password + key: POSTGRES_PASSWORD - name: KC_DB_SCHEMA value: public - name: KC_HOSTNAME -- 2.47.2 From 0f1f34c52a4e8e6defcc086d219ef1ce0d7afbb2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 12:50:26 -0300 Subject: [PATCH 178/684] communication(atlasbot): reduce spam and use atlasbot user --- services/communication/atlasbot-configmap.yaml | 12 ++++++------ services/communication/atlasbot-deployment.yaml | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index a15352d..b897683 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -53,7 +53,6 @@ data: req("POST", path, token, body={"msgtype": "m.text", "body": text}) history = collections.defaultdict(list) # room_id -> list of str (short transcript) - greeted = set() def ollama_reply(room_id: str, prompt: str) -> str: try: @@ -74,6 +73,11 @@ data: def sync_loop(token: str, room_id: str): since = None + try: + res = req("GET", "/_matrix/client/v3/sync?timeout=0", token, timeout=10) + since = res.get("next_batch") + except Exception: + pass while True: params = {"timeout": 30000} if since: @@ -90,15 +94,11 @@ data: for rid, data in res.get("rooms", {}).get("invite", {}).items(): try: join_room(token, rid) - send_msg(token, rid, "Atlas online.") except Exception: pass # messages for rid, data in res.get("rooms", {}).get("join", {}).items(): - if rid not in greeted and room_id and rid == room_id: - greeted.add(rid) - send_msg(token, rid, "Atlas online.") timeline = data.get("timeline", {}).get("events", []) for ev in timeline: if ev.get("type") != "m.room.message": @@ -113,7 +113,7 @@ data: # Only respond if bot is mentioned or in a DM joined_count = data.get("summary", {}).get("m.joined_member_count") is_dm = joined_count is not None and joined_count <= 2 - mentioned = f"@{USER}" in body or "atlas" in body.lower() + mentioned = f"@{USER}" in body history[rid].append(f"{sender}: {body}") if is_dm or mentioned: reply = ollama_reply(rid, body) diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 5834435..a0b3f05 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -29,14 +29,14 @@ spec: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: BOT_USER - value: atlas + value: atlasbot - name: BOT_PASS valueFrom: secretKeyRef: - name: atlasbot-mas-credentials-runtime - key: password + name: atlasbot-credentials-runtime + key: bot-password - name: CHAT_API_KEY valueFrom: secretKeyRef: -- 2.47.2 From 554061711c7d68045c332ae53d2410d19704b941 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 12:57:00 -0300 Subject: [PATCH 179/684] communication: use MAS for internal password logins --- services/communication/atlasbot-deployment.yaml | 2 +- services/communication/guest-name-job.yaml | 5 ++++- services/communication/pin-othrys-job.yaml | 5 ++++- services/communication/seed-othrys-room.yaml | 5 ++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index a0b3f05..0450f3e 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -29,7 +29,7 @@ spec: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: BOT_USER value: atlasbot - name: BOT_PASS diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index f6f1469..8d8149e 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -19,6 +19,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS @@ -39,10 +41,11 @@ spec: NOUN = ["otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit"] BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) OTHRYS = "!orejZnVfvbAmwQDYba:live.bstein.dev" def login(user, password): - r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", "identifier": {"type": "m.id.user", "user": user}, "password": password, diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml index a03e43d..a45f37a 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/communication/pin-othrys-job.yaml @@ -22,6 +22,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS @@ -39,6 +41,7 @@ spec: import os, requests, urllib.parse BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) ROOM_ALIAS = "#othrys:live.bstein.dev" MESSAGE = ( "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join " @@ -48,7 +51,7 @@ spec: def auth(token): return {"Authorization": f"Bearer {token}"} def login(user, password): - r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", "identifier": {"type": "m.id.user", "user": user}, "password": password, diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml index 09dc1e8..06be4fb 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/communication/seed-othrys-room.yaml @@ -20,6 +20,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS @@ -44,9 +46,10 @@ spec: import os, requests, urllib.parse BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) def login(user, password): - r = requests.post(f"{BASE}/_matrix/client/v3/login", json={ + r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", "identifier": {"type": "m.id.user", "user": user}, "password": password, -- 2.47.2 From 32e98a7836546f9f4d238384ceed7d5b5175c091 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 12:58:55 -0300 Subject: [PATCH 180/684] communication: create comms namespace --- services/communication/kustomization.yaml | 1 + services/communication/namespace-comms.yaml | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 services/communication/namespace-comms.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 39d5890..f301396 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: communication resources: - namespace.yaml + - namespace-comms.yaml - synapse-rendered.yaml - mas-configmap.yaml - mas-deployment.yaml diff --git a/services/communication/namespace-comms.yaml b/services/communication/namespace-comms.yaml new file mode 100644 index 0000000..d037ea6 --- /dev/null +++ b/services/communication/namespace-comms.yaml @@ -0,0 +1,5 @@ +# services/communication/namespace-comms.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: comms -- 2.47.2 From e503c40417b0fa7131bf09fdbe9e6bfe5cf320bf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:00:56 -0300 Subject: [PATCH 181/684] communication: stop staging comms namespace (kustomize conflict) --- services/communication/kustomization.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index f301396..39d5890 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -4,7 +4,6 @@ kind: Kustomization namespace: communication resources: - namespace.yaml - - namespace-comms.yaml - synapse-rendered.yaml - mas-configmap.yaml - mas-deployment.yaml -- 2.47.2 From a48486912b42cce8a01cbc23bc9c63aafdb04cc7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:03:43 -0300 Subject: [PATCH 182/684] comms: create namespace via Flux --- .../applications/comms/kustomization.yaml | 15 +++++++++++++++ .../flux-system/applications/kustomization.yaml | 1 + services/comms/kustomization.yaml | 5 +++++ .../namespace-comms.yaml => comms/namespace.yaml} | 2 +- 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 clusters/atlas/flux-system/applications/comms/kustomization.yaml create mode 100644 services/comms/kustomization.yaml rename services/{communication/namespace-comms.yaml => comms/namespace.yaml} (54%) diff --git a/clusters/atlas/flux-system/applications/comms/kustomization.yaml b/clusters/atlas/flux-system/applications/comms/kustomization.yaml new file mode 100644 index 0000000..42dc736 --- /dev/null +++ b/clusters/atlas/flux-system/applications/comms/kustomization.yaml @@ -0,0 +1,15 @@ +# clusters/atlas/flux-system/applications/comms/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: comms + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/comms + targetNamespace: comms + timeout: 2m diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 6cd5281..37d7699 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization resources: - gitea/kustomization.yaml - vault/kustomization.yaml + - comms/kustomization.yaml - communication/kustomization.yaml - crypto/kustomization.yaml - monerod/kustomization.yaml diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml new file mode 100644 index 0000000..393be76 --- /dev/null +++ b/services/comms/kustomization.yaml @@ -0,0 +1,5 @@ +# services/comms/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml diff --git a/services/communication/namespace-comms.yaml b/services/comms/namespace.yaml similarity index 54% rename from services/communication/namespace-comms.yaml rename to services/comms/namespace.yaml index d037ea6..9d44af2 100644 --- a/services/communication/namespace-comms.yaml +++ b/services/comms/namespace.yaml @@ -1,4 +1,4 @@ -# services/communication/namespace-comms.yaml +# services/comms/namespace.yaml apiVersion: v1 kind: Namespace metadata: -- 2.47.2 From 79f99899eec3e28d1c4f26e6961a2e69a6484fdb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:07:11 -0300 Subject: [PATCH 183/684] communication: prune stack for comms cutover --- services/communication/kustomization.yaml | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 39d5890..ef9cb49 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -4,25 +4,3 @@ kind: Kustomization namespace: communication resources: - namespace.yaml - - synapse-rendered.yaml - - mas-configmap.yaml - - mas-deployment.yaml - - mas-ingress.yaml - - element-rendered.yaml - - livekit-config.yaml - - livekit.yaml - - coturn.yaml - - livekit-token-deployment.yaml - - livekit-ingress.yaml - - livekit-middlewares.yaml - - element-call-config.yaml - - element-call-deployment.yaml - - pin-othrys-job.yaml - - guest-name-job.yaml - - atlasbot-configmap.yaml - - atlasbot-deployment.yaml - - seed-othrys-room.yaml - - wellknown.yaml - -patchesStrategicMerge: - - synapse-deployment-strategy-patch.yaml -- 2.47.2 From d4a830da8831331442681d35ab6ab27d70e61ac2 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Thu, 1 Jan 2026 16:08:50 +0000 Subject: [PATCH 184/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 0823316..1b43a1e 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-28 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-29 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From c6089fbf859b99f02c106565d515ead0b11a68bb Mon Sep 17 00:00:00 2001 From: flux-bot Date: Thu, 1 Jan 2026 16:10:02 +0000 Subject: [PATCH 185/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 1e5e4bd..2a03a24 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-28 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-29 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 6ddfd394cb4f0556f7903b6ab9b355af8bd712e0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:12:29 -0300 Subject: [PATCH 186/684] communication: deploy into comms namespace --- .../communication/kustomization.yaml | 2 +- services/communication/kustomization.yaml | 25 +++++++++++++++++-- services/communication/livekit-ingress.yaml | 2 +- .../communication/livekit-middlewares.yaml | 2 +- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/clusters/atlas/flux-system/applications/communication/kustomization.yaml b/clusters/atlas/flux-system/applications/communication/kustomization.yaml index 0d3b07a..f9f3531 100644 --- a/clusters/atlas/flux-system/applications/communication/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/communication/kustomization.yaml @@ -11,7 +11,7 @@ spec: kind: GitRepository name: flux-system path: ./services/communication - targetNamespace: communication + targetNamespace: comms timeout: 2m dependsOn: - name: traefik diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index ef9cb49..54b387f 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -1,6 +1,27 @@ # services/communication/kustomization.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namespace: communication +namespace: comms resources: - - namespace.yaml + - synapse-rendered.yaml + - mas-configmap.yaml + - mas-deployment.yaml + - mas-ingress.yaml + - element-rendered.yaml + - livekit-config.yaml + - livekit.yaml + - coturn.yaml + - livekit-token-deployment.yaml + - livekit-ingress.yaml + - livekit-middlewares.yaml + - element-call-config.yaml + - element-call-deployment.yaml + - pin-othrys-job.yaml + - guest-name-job.yaml + - atlasbot-configmap.yaml + - atlasbot-deployment.yaml + - seed-othrys-room.yaml + - wellknown.yaml + +patchesStrategicMerge: + - synapse-deployment-strategy-patch.yaml diff --git a/services/communication/livekit-ingress.yaml b/services/communication/livekit-ingress.yaml index 796eb3d..c6f1dae 100644 --- a/services/communication/livekit-ingress.yaml +++ b/services/communication/livekit-ingress.yaml @@ -8,7 +8,7 @@ metadata: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: communication-livekit-sfu-strip@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: comms-livekit-sfu-strip@kubernetescrd cert-manager.io/cluster-issuer: letsencrypt spec: tls: diff --git a/services/communication/livekit-middlewares.yaml b/services/communication/livekit-middlewares.yaml index 49a3e8f..76632fc 100644 --- a/services/communication/livekit-middlewares.yaml +++ b/services/communication/livekit-middlewares.yaml @@ -28,7 +28,7 @@ metadata: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: communication-livekit-jwt-strip@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: comms-livekit-jwt-strip@kubernetescrd cert-manager.io/cluster-issuer: letsencrypt spec: tls: -- 2.47.2 From e47e6d6e458920d9ac950630ebbd2dc1cc30fe4a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:25:59 -0300 Subject: [PATCH 187/684] comms(synapse): ensure signing key secret populated --- services/communication/kustomization.yaml | 1 + .../synapse-signingkey-ensure-job.yaml | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 services/communication/synapse-signingkey-ensure-job.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 54b387f..1b8f17a 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: comms resources: - synapse-rendered.yaml + - synapse-signingkey-ensure-job.yaml - mas-configmap.yaml - mas-deployment.yaml - mas-ingress.yaml diff --git a/services/communication/synapse-signingkey-ensure-job.yaml b/services/communication/synapse-signingkey-ensure-job.yaml new file mode 100644 index 0000000..fc5ba5b --- /dev/null +++ b/services/communication/synapse-signingkey-ensure-job.yaml @@ -0,0 +1,42 @@ +# services/communication/synapse-signingkey-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: othrys-synapse-signingkey-ensure + namespace: comms +spec: + backoffLimit: 2 + template: + spec: + serviceAccountName: othrys-synapse-signingkey-job + restartPolicy: OnFailure + volumes: + - name: work + emptyDir: {} + initContainers: + - name: generate + image: ghcr.io/element-hq/synapse:v1.144.0 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + generate_signing_key -o /work/signing.key + volumeMounts: + - name: work + mountPath: /work + containers: + - name: patch + image: bitnami/kubectl:1.30.4 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + if kubectl -n comms get secret othrys-synapse-signingkey -o jsonpath='{.data.signing\.key}' 2>/dev/null | grep -q .; then + exit 0 + fi + signing_key_b64="$(base64 /work/signing.key | tr -d '\n')" + payload="$(printf '{\"data\":{\"signing.key\":\"%s\"}}' "${signing_key_b64}")" + kubectl -n comms patch secret othrys-synapse-signingkey --type=merge -p "${payload}" >/dev/null + volumeMounts: + - name: work + mountPath: /work -- 2.47.2 From e82e66091c9347b073deb8d6aa293962caf08160 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:31:37 -0300 Subject: [PATCH 188/684] comms(synapse): fix signingkey job image --- services/communication/synapse-signingkey-ensure-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/synapse-signingkey-ensure-job.yaml b/services/communication/synapse-signingkey-ensure-job.yaml index fc5ba5b..9f266bf 100644 --- a/services/communication/synapse-signingkey-ensure-job.yaml +++ b/services/communication/synapse-signingkey-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-synapse-signingkey-ensure + name: othrys-synapse-signingkey-ensure-2 namespace: comms spec: backoffLimit: 2 @@ -26,7 +26,7 @@ spec: mountPath: /work containers: - name: patch - image: bitnami/kubectl:1.30.4 + image: bitnami/kubectl:latest command: ["/bin/sh", "-c"] args: - | -- 2.47.2 From baed4737d9bd2d14b84995d21b875bd7019be770 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:37:21 -0300 Subject: [PATCH 189/684] comms(synapse): fix signingkey secret patch job --- services/communication/synapse-signingkey-ensure-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/synapse-signingkey-ensure-job.yaml b/services/communication/synapse-signingkey-ensure-job.yaml index 9f266bf..02c31a8 100644 --- a/services/communication/synapse-signingkey-ensure-job.yaml +++ b/services/communication/synapse-signingkey-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-synapse-signingkey-ensure-2 + name: othrys-synapse-signingkey-ensure-3 namespace: comms spec: backoffLimit: 2 @@ -35,7 +35,7 @@ spec: exit 0 fi signing_key_b64="$(base64 /work/signing.key | tr -d '\n')" - payload="$(printf '{\"data\":{\"signing.key\":\"%s\"}}' "${signing_key_b64}")" + payload="$(printf '{"data":{"signing.key":"%s"}}' "${signing_key_b64}")" kubectl -n comms patch secret othrys-synapse-signingkey --type=merge -p "${payload}" >/dev/null volumeMounts: - name: work -- 2.47.2 From dcc5714a8b06ded637bdd87073e598e9fd7b2da2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:47:33 -0300 Subject: [PATCH 190/684] comms(synapse): fix signing key RBAC + rerun job --- services/communication/synapse-rendered.yaml | 2 +- services/communication/synapse-signingkey-ensure-job.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index ec6d473..462a699 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -1049,7 +1049,7 @@ roleRef: subjects: - kind: ServiceAccount name: othrys-synapse-signingkey-job - namespace: communication + namespace: comms --- # Source: matrix-synapse/templates/tests/test-connection.yaml apiVersion: v1 diff --git a/services/communication/synapse-signingkey-ensure-job.yaml b/services/communication/synapse-signingkey-ensure-job.yaml index 02c31a8..06e8fa8 100644 --- a/services/communication/synapse-signingkey-ensure-job.yaml +++ b/services/communication/synapse-signingkey-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-synapse-signingkey-ensure-3 + name: othrys-synapse-signingkey-ensure-4 namespace: comms spec: backoffLimit: 2 -- 2.47.2 From 7020d53fd8aa56047b34031c2840f55fba0bdd47 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 13:53:35 -0300 Subject: [PATCH 191/684] communication: drop old namespace manifest --- services/communication/namespace.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 services/communication/namespace.yaml diff --git a/services/communication/namespace.yaml b/services/communication/namespace.yaml deleted file mode 100644 index d566429..0000000 --- a/services/communication/namespace.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# services/communication/namespace.yaml -apiVersion: v1 -kind: Namespace -metadata: - name: communication -- 2.47.2 From 6a76fc0fa3754d52ffef0d69f43af74729663545 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 14:16:08 -0300 Subject: [PATCH 192/684] gpu: enable time-slicing and refresh dashboards --- .../profiles/atlas-ha/kustomization.yaml | 1 + .../device-plugin-config/configmap.yaml | 15 +++ .../device-plugin-config/kustomization.yaml | 5 + .../device-plugin-jetson/daemonset.yaml | 6 ++ .../device-plugin-minipc/daemonset.yaml | 6 ++ .../device-plugin-tethys/daemonset.yaml | 6 ++ .../profiles/tethys-hybrid/kustomization.yaml | 1 + scripts/dashboards_render_atlas.py | 97 +++++++++++-------- services/ai-llm/deployment.yaml | 24 +++-- .../bstein-dev-home/backend-deployment.yaml | 2 +- services/jellyfin/deployment.yaml | 18 +++- services/monitoring/dashboards/atlas-gpu.json | 44 ++++++++- .../monitoring/dashboards/atlas-overview.json | 43 +++++++- .../monitoring/grafana-dashboard-gpu.yaml | 44 ++++++++- .../grafana-dashboard-overview.yaml | 43 +++++++- 15 files changed, 288 insertions(+), 67 deletions(-) create mode 100644 infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml create mode 100644 infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml diff --git a/infrastructure/modules/profiles/atlas-ha/kustomization.yaml b/infrastructure/modules/profiles/atlas-ha/kustomization.yaml index 7e69171..0502e01 100644 --- a/infrastructure/modules/profiles/atlas-ha/kustomization.yaml +++ b/infrastructure/modules/profiles/atlas-ha/kustomization.yaml @@ -2,6 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - ../components/device-plugin-config - ../components/device-plugin-jetson - ../components/device-plugin-minipc - ../components/device-plugin-tethys diff --git a/infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml b/infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml new file mode 100644 index 0000000..73c61cf --- /dev/null +++ b/infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml @@ -0,0 +1,15 @@ +# infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: nvidia-device-plugin-config + namespace: kube-system +data: + config.yaml: | + version: v1 + sharing: + timeSlicing: + renameByDefault: true + resources: + - name: nvidia.com/gpu + replicas: 4 diff --git a/infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml b/infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml new file mode 100644 index 0000000..346f526 --- /dev/null +++ b/infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml @@ -0,0 +1,5 @@ +# infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - configmap.yaml diff --git a/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml b/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml index f4953ea..0fa8376 100644 --- a/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml +++ b/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml @@ -31,6 +31,7 @@ spec: args: - "--fail-on-init-error=false" - "--device-list-strategy=envvar,cdi" + - "--config-file=/config/config.yaml" securityContext: privileged: true env: @@ -41,7 +42,12 @@ spec: volumeMounts: - name: device-plugin mountPath: /var/lib/kubelet/device-plugins + - name: config + mountPath: /config volumes: - name: device-plugin hostPath: path: /var/lib/kubelet/device-plugins + - name: config + configMap: + name: nvidia-device-plugin-config diff --git a/infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml b/infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml index 76b6c06..309593a 100644 --- a/infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml +++ b/infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml @@ -32,6 +32,7 @@ spec: - "--fail-on-init-error=false" - "--device-list-strategy=envvar" - "--mig-strategy=none" + - "--config-file=/config/config.yaml" securityContext: privileged: true env: @@ -42,7 +43,12 @@ spec: volumeMounts: - name: device-plugin mountPath: /var/lib/kubelet/device-plugins + - name: config + mountPath: /config volumes: - name: device-plugin hostPath: path: /var/lib/kubelet/device-plugins + - name: config + configMap: + name: nvidia-device-plugin-config diff --git a/infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml b/infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml index a15930a..884befa 100644 --- a/infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml +++ b/infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml @@ -33,6 +33,7 @@ spec: - "--fail-on-init-error=false" - "--device-list-strategy=envvar" - "--mig-strategy=none" + - "--config-file=/config/config.yaml" securityContext: privileged: true env: @@ -43,7 +44,12 @@ spec: volumeMounts: - name: device-plugin mountPath: /var/lib/kubelet/device-plugins + - name: config + mountPath: /config volumes: - name: device-plugin hostPath: path: /var/lib/kubelet/device-plugins + - name: config + configMap: + name: nvidia-device-plugin-config diff --git a/infrastructure/modules/profiles/tethys-hybrid/kustomization.yaml b/infrastructure/modules/profiles/tethys-hybrid/kustomization.yaml index b55c059..ad951ec 100644 --- a/infrastructure/modules/profiles/tethys-hybrid/kustomization.yaml +++ b/infrastructure/modules/profiles/tethys-hybrid/kustomization.yaml @@ -2,4 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - ../components/device-plugin-config - ../components/device-plugin-tethys diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 7ad117b..7994cf7 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -171,9 +171,8 @@ def node_io_expr(scope=""): def namespace_share_expr(resource_expr): - selected = f"( {resource_expr} ) and on(namespace) ( {NAMESPACE_TOP_FILTER} )" - total = f"clamp_min(sum( {selected} ), 1)" - return f"100 * ( {selected} ) / {total}" + total = f"clamp_min(sum( {resource_expr} ), 1)" + return f"100 * ( {resource_expr} ) / {total}" def namespace_cpu_share_expr(): @@ -185,7 +184,10 @@ def namespace_ram_share_expr(): def namespace_gpu_share_expr(): - return namespace_share_expr(NAMESPACE_GPU_RAW) + total = f"(sum({NAMESPACE_GPU_USAGE_INSTANT}) or on() vector(0))" + share = f"100 * ({NAMESPACE_GPU_USAGE_INSTANT}) / clamp_min({total}, 1)" + idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)" + return f"({share}) or {idle}" PROBLEM_PODS_EXPR = ( @@ -270,46 +272,20 @@ STUCK_TABLE_EXPR = ( ")" ) +NAMESPACE_SCOPE_WORKLOAD = 'namespace!~"(^kube.*|.*-system$|^traefik$)"' +NAMESPACE_SCOPE_ALL = 'namespace=~".*"' +NAMESPACE_SCOPE_INFRA = 'namespace=~"(^kube.*|.*-system$|^traefik$)"' +NAMESPACE_SCOPE_VAR = "$namespace_scope" +NAMESPACE_SELECTOR = f'namespace!="",pod!="",container!="",{NAMESPACE_SCOPE_VAR}' +NAMESPACE_GPU_SELECTOR = f'namespace!="",pod!="",{NAMESPACE_SCOPE_VAR}' + NAMESPACE_CPU_RAW = ( - 'sum(rate(container_cpu_usage_seconds_total{namespace!="",pod!="",container!=""}[5m])) by (namespace)' -) -NAMESPACE_RAM_RAW = ( - 'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)' + f'sum(rate(container_cpu_usage_seconds_total{{{NAMESPACE_SELECTOR}}}[5m])) by (namespace)' ) +NAMESPACE_RAM_RAW = f'sum(container_memory_working_set_bytes{{{NAMESPACE_SELECTOR}}}) by (namespace)' GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) -NAMESPACE_GPU_ALLOC = ( - 'sum((kube_pod_container_resource_requests{namespace!="",resource="nvidia.com/gpu"}' - ' or kube_pod_container_resource_limits{namespace!="",resource="nvidia.com/gpu"})) by (namespace)' -) -NAMESPACE_GPU_USAGE_SHARE = ( - 'sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))' -) -NAMESPACE_GPU_USAGE_INSTANT = 'sum(DCGM_FI_DEV_GPU_UTIL{namespace!="",pod!=""}) by (namespace)' -NAMESPACE_GPU_RAW = ( - "(" - + NAMESPACE_GPU_USAGE_SHARE - + ") or on(namespace) (" - + NAMESPACE_CPU_RAW - + " * 0)" -) -NAMESPACE_GPU_WEIGHT = ( - "(" - + NAMESPACE_GPU_ALLOC - + ") or on(namespace) (" - + NAMESPACE_CPU_RAW - + " * 0)" -) -NAMESPACE_ACTIVITY_SCORE = ( - "( " - + NAMESPACE_CPU_RAW - + " ) + (" - + NAMESPACE_RAM_RAW - + " / 1e9) + (" - + NAMESPACE_GPU_WEIGHT - + " * 100)" -) -NAMESPACE_TOP_FILTER = "(topk(10, " + NAMESPACE_ACTIVITY_SCORE + ") >= bool 0)" +NAMESPACE_GPU_USAGE_INSTANT = f'sum(DCGM_FI_DEV_GPU_UTIL{{{NAMESPACE_GPU_SELECTOR}}}) by (namespace)' TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" TRAEFIK_NET_INGRESS = ( 'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))' @@ -588,6 +564,44 @@ def pie_panel(panel_id, title, expr, grid): } +def namespace_scope_variable(): + options = [ + { + "text": "workload namespaces only", + "value": NAMESPACE_SCOPE_WORKLOAD, + "selected": True, + }, + {"text": "all namespaces", "value": NAMESPACE_SCOPE_ALL, "selected": False}, + { + "text": "infrastructure namespaces only", + "value": NAMESPACE_SCOPE_INFRA, + "selected": False, + }, + ] + query = ( + "workload namespaces only : " + + NAMESPACE_SCOPE_WORKLOAD + + ",all namespaces : " + + NAMESPACE_SCOPE_ALL + + ",infrastructure namespaces only : " + + NAMESPACE_SCOPE_INFRA + ) + return { + "name": "namespace_scope", + "label": "Namespace filter", + "type": "custom", + "query": query, + "current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True}, + "options": options, + "hide": 0, + "multi": False, + "includeAll": False, + "refresh": 1, + "sort": 0, + "skipUrlSync": False, + } + + def bargauge_panel( panel_id, title, @@ -1063,7 +1077,7 @@ def build_overview(): "schemaVersion": 39, "style": "dark", "tags": ["atlas", "overview"], - "templating": {"list": []}, + "templating": {"list": [namespace_scope_variable()]}, "time": {"from": "now-1h", "to": "now"}, "refresh": "1m", "links": [], @@ -1757,6 +1771,7 @@ def build_gpu_dashboard(): "schemaVersion": 39, "style": "dark", "tags": ["atlas", "gpu"], + "templating": {"list": [namespace_scope_variable()]}, } diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index fb0d0e7..b74dc0a 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -16,10 +16,20 @@ spec: app: ollama annotations: ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0 - ai.bstein.dev/gpu: RTX 3080 8GB (titan-24) + ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24) spec: - nodeSelector: - kubernetes.io/hostname: titan-24 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - titan-20 + - titan-21 + - titan-22 + - titan-24 runtimeClassName: nvidia volumes: - name: models @@ -55,9 +65,9 @@ spec: requests: cpu: 250m memory: 1Gi - nvidia.com/gpu: 1 + nvidia.com/gpu.shared: 1 limits: - nvidia.com/gpu: 1 + nvidia.com/gpu.shared: 1 containers: - name: ollama image: ollama/ollama:latest @@ -83,8 +93,8 @@ spec: requests: cpu: "2" memory: 8Gi - nvidia.com/gpu: 1 + nvidia.com/gpu.shared: 1 limits: cpu: "4" memory: 12Gi - nvidia.com/gpu: 1 + nvidia.com/gpu.shared: 1 diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 2a03a24..21f74ba 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -39,7 +39,7 @@ spec: fieldPath: spec.nodeName - name: AI_NODE_GPU_MAP value: | - {"titan-24": "RTX 3080 8GB (local GPU)", "titan-22": "RTX 3050 8GB (local GPU)"} + {"titan-20": "Jetson Xavier (edge GPU)", "titan-21": "Jetson Xavier (edge GPU)", "titan-22": "RTX 3050 8GB (local GPU)", "titan-24": "RTX 3080 8GB (local GPU)"} ports: - name: http containerPort: 8080 diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index 88fa9dd..1177a06 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -68,8 +68,18 @@ spec: volumeMounts: - name: config mountPath: /config - nodeSelector: - jellyfin: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - titan-20 + - titan-21 + - titan-22 + - titan-24 securityContext: runAsUser: 1000 fsGroup: 65532 @@ -96,11 +106,11 @@ spec: value: "002" resources: limits: - nvidia.com/gpu: 1 + nvidia.com/gpu.shared: 1 # cpu: "4" # memory: 8Gi requests: - nvidia.com/gpu: 1 + nvidia.com/gpu.shared: 1 cpu: "500m" memory: 1Gi volumeMounts: diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index 572c2c6..e0a631b 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -71,7 +71,7 @@ }, "targets": [ { - "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}) by (namespace)", + "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -182,5 +182,43 @@ "tags": [ "atlas", "gpu" - ] + ], + "templating": { + "list": [ + { + "name": "namespace_scope", + "label": "Namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 70062e0..087b9af 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1086,7 +1086,7 @@ }, "targets": [ { - "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1137,7 +1137,7 @@ }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1188,7 +1188,7 @@ }, "targets": [ { - "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1791,7 +1791,42 @@ "overview" ], "templating": { - "list": [] + "list": [ + { + "name": "namespace_scope", + "label": "Namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + } + ] }, "time": { "from": "now-1h", diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 48725de..b90dddb 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -80,7 +80,7 @@ data: }, "targets": [ { - "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}) by (namespace)", + "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -191,5 +191,43 @@ data: "tags": [ "atlas", "gpu" - ] + ], + "templating": { + "list": [ + { + "name": "namespace_scope", + "label": "Namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index cfd2cd6..b2aca02 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1095,7 +1095,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1146,7 +1146,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1197,7 +1197,7 @@ data: }, "targets": [ { - "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1800,7 +1800,42 @@ data: "overview" ], "templating": { - "list": [] + "list": [ + { + "name": "namespace_scope", + "label": "Namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + } + ] }, "time": { "from": "now-1h", -- 2.47.2 From f18f1df1ce3a6cbb8f72496e31e876acb24c87e9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 14:21:43 -0300 Subject: [PATCH 193/684] monitoring: ensure gpu idle share renders --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-gpu.json | 2 +- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-gpu.yaml | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 7994cf7..58da298 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -187,7 +187,7 @@ def namespace_gpu_share_expr(): total = f"(sum({NAMESPACE_GPU_USAGE_INSTANT}) or on() vector(0))" share = f"100 * ({NAMESPACE_GPU_USAGE_INSTANT}) / clamp_min({total}, 1)" idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)" - return f"({share}) or {idle}" + return f"({share}) or ({idle})" PROBLEM_PODS_EXPR = ( diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index e0a631b..303ec2e 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 087b9af..d7042ed 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1137,7 +1137,7 @@ }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index b90dddb..680cccc 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index b2aca02..ed7432e 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1146,7 +1146,7 @@ data: }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } -- 2.47.2 From 2d8540907ad0f7bec666aa3f7c5f4dae15b13010 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 14:28:11 -0300 Subject: [PATCH 194/684] comms(atlasbot): respond to @atlas mentions and keep context --- .../communication/atlasbot-configmap.yaml | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index b897683..aba33da 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -5,7 +5,7 @@ metadata: name: atlasbot data: bot.py: | - import json, os, time, collections + import json, os, time, collections, re from urllib import request, parse, error BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") @@ -16,6 +16,9 @@ data: OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") API_KEY = os.environ.get("CHAT_API_KEY", "") + BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{USER},atlas") + MENTION_LOCALPARTS = [m.strip().lstrip("@") for m in BOT_MENTIONS.split(",") if m.strip()] + MENTION_RE = re.compile(r"(? list of str (short transcript) + history = collections.defaultdict(list) # (room_id, sender|None) -> list of str (short transcript) - def ollama_reply(room_id: str, prompt: str) -> str: + def key_for(room_id: str, sender: str, is_dm: bool): + return (room_id, None) if is_dm else (room_id, sender) + + def ollama_reply(hist_key, prompt: str) -> str: try: # Keep short context as plain text transcript - transcript = "\n".join(history[room_id][-12:] + [f"User: {prompt}"]) + transcript = "\n".join( + ["System: You are Atlas, the Titan lab assistant for Othrys. Be helpful, direct, and concise."] + + history[hist_key][-24:] + + [f"User: {prompt}"] + ) payload = {"model": MODEL, "message": transcript} headers = {"Content-Type": "application/json"} if API_KEY: @@ -66,7 +76,7 @@ data: with request.urlopen(r, timeout=15) as resp: data = json.loads(resp.read().decode()) reply = data.get("message") or data.get("response") or data.get("reply") or "I'm here to help." - history[room_id].append(f"Atlas: {reply}") + history[hist_key].append(f"Atlas: {reply}") return reply except Exception: return "Hi! I'm Atlas." @@ -113,10 +123,12 @@ data: # Only respond if bot is mentioned or in a DM joined_count = data.get("summary", {}).get("m.joined_member_count") is_dm = joined_count is not None and joined_count <= 2 - mentioned = f"@{USER}" in body - history[rid].append(f"{sender}: {body}") + mentioned = MENTION_RE.search(body) is not None + hist_key = key_for(rid, sender, is_dm) + history[hist_key].append(f"{sender}: {body}") + history[hist_key] = history[hist_key][-80:] if is_dm or mentioned: - reply = ollama_reply(rid, body) + reply = ollama_reply(hist_key, body) send_msg(token, rid, reply) def main(): -- 2.47.2 From 7c31d25c24458a4eb909817c1191b8d40b756dde Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 14:30:49 -0300 Subject: [PATCH 195/684] comms(atlasbot): rollout on config changes --- services/communication/atlasbot-deployment.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 0450f3e..12f3e30 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -3,7 +3,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: atlasbot - namespace: communication + namespace: comms labels: app: atlasbot spec: @@ -15,6 +15,8 @@ spec: metadata: labels: app: atlasbot + annotations: + checksum/atlasbot-configmap: cb29da6de63e087511e1936d41a6d12eff7f43c55cf266d1e71c3c2e14661c7b spec: nodeSelector: hardware: rpi5 -- 2.47.2 From 5093f77c0a34f04e2d9946e2b6125fed90cbadf0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 14:44:33 -0300 Subject: [PATCH 196/684] monitoring: per-panel namespace share filters --- scripts/dashboards_render_atlas.py | 130 +++++++++++---- services/ai-llm/deployment.yaml | 2 + services/communication/element-rendered.yaml | 21 --- services/monitoring/dashboards/atlas-gpu.json | 106 +++++++++++-- .../monitoring/dashboards/atlas-nodes.json | 2 +- .../monitoring/dashboards/atlas-overview.json | 150 ++++++++++++++++-- .../monitoring/dashboards/atlas-pods.json | 2 +- .../monitoring/grafana-dashboard-gpu.yaml | 106 +++++++++++-- .../monitoring/grafana-dashboard-nodes.yaml | 2 +- .../grafana-dashboard-overview.yaml | 150 ++++++++++++++++-- .../monitoring/grafana-dashboard-pods.yaml | 2 +- 11 files changed, 568 insertions(+), 105 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 58da298..34a108a 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -9,6 +9,7 @@ Usage: import argparse import json import textwrap +import urllib.parse from pathlib import Path # --------------------------------------------------------------------------- @@ -80,7 +81,7 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES) WORKER_TOTAL = len(WORKER_NODES) CONTROL_SUFFIX = f"/{CONTROL_TOTAL}" WORKER_SUFFIX = f"/{WORKER_TOTAL}" -CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system" +CP_ALLOWED_NS = "(^kube.*|.*-system$|^traefik$|^monitoring$)" LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]" GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4] CONTROL_WORKLOADS_EXPR = ( @@ -170,22 +171,43 @@ def node_io_expr(scope=""): return scoped_node_expr(base, scope) +def namespace_selector(scope_var): + return f'namespace!="",pod!="",container!="",{scope_var}' + + +def namespace_gpu_selector(scope_var): + return f'namespace!="",pod!="",{scope_var}' + + +def namespace_cpu_raw(scope_var): + return f"sum(rate(container_cpu_usage_seconds_total{{{namespace_selector(scope_var)}}}[5m])) by (namespace)" + + +def namespace_ram_raw(scope_var): + return f"sum(container_memory_working_set_bytes{{{namespace_selector(scope_var)}}}) by (namespace)" + + +def namespace_gpu_usage_instant(scope_var): + return f"sum(DCGM_FI_DEV_GPU_UTIL{{{namespace_gpu_selector(scope_var)}}}) by (namespace)" + + def namespace_share_expr(resource_expr): total = f"clamp_min(sum( {resource_expr} ), 1)" return f"100 * ( {resource_expr} ) / {total}" -def namespace_cpu_share_expr(): - return namespace_share_expr(NAMESPACE_CPU_RAW) +def namespace_cpu_share_expr(scope_var): + return namespace_share_expr(namespace_cpu_raw(scope_var)) -def namespace_ram_share_expr(): - return namespace_share_expr(NAMESPACE_RAM_RAW) +def namespace_ram_share_expr(scope_var): + return namespace_share_expr(namespace_ram_raw(scope_var)) -def namespace_gpu_share_expr(): - total = f"(sum({NAMESPACE_GPU_USAGE_INSTANT}) or on() vector(0))" - share = f"100 * ({NAMESPACE_GPU_USAGE_INSTANT}) / clamp_min({total}, 1)" +def namespace_gpu_share_expr(scope_var): + usage = namespace_gpu_usage_instant(scope_var) + total = f"(sum({usage}) or on() vector(0))" + share = f"100 * ({usage}) / clamp_min({total}, 1)" idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)" return f"({share}) or ({idle})" @@ -272,20 +294,12 @@ STUCK_TABLE_EXPR = ( ")" ) -NAMESPACE_SCOPE_WORKLOAD = 'namespace!~"(^kube.*|.*-system$|^traefik$)"' +NAMESPACE_SCOPE_WORKLOAD = 'namespace!~"(^kube.*|.*-system$|^traefik$|^monitoring$)"' NAMESPACE_SCOPE_ALL = 'namespace=~".*"' -NAMESPACE_SCOPE_INFRA = 'namespace=~"(^kube.*|.*-system$|^traefik$)"' -NAMESPACE_SCOPE_VAR = "$namespace_scope" -NAMESPACE_SELECTOR = f'namespace!="",pod!="",container!="",{NAMESPACE_SCOPE_VAR}' -NAMESPACE_GPU_SELECTOR = f'namespace!="",pod!="",{NAMESPACE_SCOPE_VAR}' - -NAMESPACE_CPU_RAW = ( - f'sum(rate(container_cpu_usage_seconds_total{{{NAMESPACE_SELECTOR}}}[5m])) by (namespace)' -) -NAMESPACE_RAM_RAW = f'sum(container_memory_working_set_bytes{{{NAMESPACE_SELECTOR}}}) by (namespace)' +NAMESPACE_SCOPE_INFRA = 'namespace=~"(^kube.*|.*-system$|^traefik$|^monitoring$)"' +NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"] GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) -NAMESPACE_GPU_USAGE_INSTANT = f'sum(DCGM_FI_DEV_GPU_UTIL{{{NAMESPACE_GPU_SELECTOR}}}) by (namespace)' TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" TRAEFIK_NET_INGRESS = ( 'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))' @@ -536,9 +550,9 @@ def table_panel( return panel -def pie_panel(panel_id, title, expr, grid): +def pie_panel(panel_id, title, expr, grid, *, links=None, description=None): """Return a pie chart panel with readable namespace labels.""" - return { + panel = { "id": panel_id, "type": "piechart", "title": title, @@ -562,9 +576,14 @@ def pie_panel(panel_id, title, expr, grid): "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, }, } + if links: + panel["links"] = links + if description: + panel["description"] = description + return panel -def namespace_scope_variable(): +def namespace_scope_variable(var_name, label): options = [ { "text": "workload namespaces only", @@ -587,13 +606,13 @@ def namespace_scope_variable(): + NAMESPACE_SCOPE_INFRA ) return { - "name": "namespace_scope", - "label": "Namespace filter", + "name": var_name, + "label": label, "type": "custom", "query": query, "current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True}, "options": options, - "hide": 0, + "hide": 2, "multi": False, "includeAll": False, "refresh": 1, @@ -602,6 +621,28 @@ def namespace_scope_variable(): } +def namespace_scope_links(var_name): + def with_value(value): + encoded = urllib.parse.quote(value, safe="") + params = [] + for other in NAMESPACE_SCOPE_VARS: + if other == var_name: + params.append(f"var-{other}={encoded}") + else: + params.append(f"var-{other}=${{{other}}}") + return "?" + "&".join(params) + + return [ + {"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False}, + {"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False}, + { + "title": "Infrastructure namespaces only", + "url": with_value(NAMESPACE_SCOPE_INFRA), + "targetBlank": False, + }, + ] + + def bargauge_panel( panel_id, title, @@ -890,28 +931,38 @@ def build_overview(): ) ) + cpu_scope = "$namespace_scope_cpu" + gpu_scope = "$namespace_scope_gpu" + ram_scope = "$namespace_scope_ram" + panels.append( pie_panel( 11, "Namespace CPU Share", - namespace_cpu_share_expr(), + namespace_cpu_share_expr(cpu_scope), {"h": 9, "w": 8, "x": 0, "y": 16}, + links=namespace_scope_links("namespace_scope_cpu"), + description="Use panel links to switch namespace scope.", ) ) panels.append( pie_panel( 12, "Namespace GPU Share", - namespace_gpu_share_expr(), + namespace_gpu_share_expr(gpu_scope), {"h": 9, "w": 8, "x": 8, "y": 16}, + links=namespace_scope_links("namespace_scope_gpu"), + description="Use panel links to switch namespace scope.", ) ) panels.append( pie_panel( 13, "Namespace RAM Share", - namespace_ram_share_expr(), + namespace_ram_share_expr(ram_scope), {"h": 9, "w": 8, "x": 16, "y": 16}, + links=namespace_scope_links("namespace_scope_ram"), + description="Use panel links to switch namespace scope.", ) ) @@ -1077,7 +1128,13 @@ def build_overview(): "schemaVersion": 39, "style": "dark", "tags": ["atlas", "overview"], - "templating": {"list": [namespace_scope_variable()]}, + "templating": { + "list": [ + namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"), + namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"), + namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"), + ] + }, "time": {"from": "now-1h", "to": "now"}, "refresh": "1m", "links": [], @@ -1718,19 +1775,22 @@ def build_network_dashboard(): def build_gpu_dashboard(): panels = [] + gpu_scope = "$namespace_scope_gpu" panels.append( pie_panel( 1, "Namespace GPU Share", - namespace_gpu_share_expr(), + namespace_gpu_share_expr(gpu_scope), {"h": 8, "w": 12, "x": 0, "y": 0}, + links=namespace_scope_links("namespace_scope_gpu"), + description="Use panel links to switch namespace scope.", ) ) panels.append( timeseries_panel( 2, "GPU Util by Namespace", - NAMESPACE_GPU_USAGE_INSTANT, + namespace_gpu_usage_instant(gpu_scope), {"h": 8, "w": 12, "x": 12, "y": 0}, unit="percent", legend="{{namespace}}", @@ -1771,7 +1831,13 @@ def build_gpu_dashboard(): "schemaVersion": 39, "style": "dark", "tags": ["atlas", "gpu"], - "templating": {"list": [namespace_scope_variable()]}, + "templating": { + "list": [ + namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"), + namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"), + namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"), + ] + }, } diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index b74dc0a..0bdc275 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -7,6 +7,8 @@ metadata: spec: replicas: 1 revisionHistoryLimit: 2 + strategy: + type: Recreate selector: matchLabels: app: ollama diff --git a/services/communication/element-rendered.yaml b/services/communication/element-rendered.yaml index c0b03c1..f04dda2 100644 --- a/services/communication/element-rendered.yaml +++ b/services/communication/element-rendered.yaml @@ -200,24 +200,3 @@ spec: port: number: 80 pathType: Prefix ---- -# Source: element-web/templates/tests/test-connection.yaml -apiVersion: v1 -kind: Pod -metadata: - name: "othrys-element-element-web-test-connection" - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm - annotations: - "helm.sh/hook": test-success -spec: - containers: - - name: wget - image: busybox - command: ['wget'] - args: ['othrys-element-element-web:80'] - restartPolicy: Never diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index 303ec2e..d4ad913 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } @@ -53,7 +53,25 @@ "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 2, @@ -71,7 +89,7 @@ }, "targets": [ { - "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)", + "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -186,19 +204,19 @@ "templating": { "list": [ { - "name": "namespace_scope", - "label": "Namespace filter", + "name": "namespace_scope_cpu", + "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, { @@ -208,11 +226,79 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": false } ], - "hide": 0, + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_gpu", + "label": "GPU namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_ram", + "label": "RAM namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, "multi": false, "includeAll": false, "refresh": 1, diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index 495c622..ff69739 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -142,7 +142,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index d7042ed..ce1b0a3 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -76,7 +76,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"}) or on() vector(0)", "refId": "A" } ], @@ -1086,7 +1086,7 @@ }, "targets": [ { - "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1119,7 +1119,25 @@ "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=namespace%3D~%22.%2A%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 12, @@ -1137,7 +1155,7 @@ }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1170,7 +1188,25 @@ "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 13, @@ -1188,7 +1224,7 @@ }, "targets": [ { - "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1221,7 +1257,25 @@ "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22.%2A%22", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 14, @@ -1793,19 +1847,19 @@ "templating": { "list": [ { - "name": "namespace_scope", - "label": "Namespace filter", + "name": "namespace_scope_cpu", + "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, { @@ -1815,11 +1869,79 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": false } ], - "hide": 0, + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_gpu", + "label": "GPU namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_ram", + "label": "RAM namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, "multi": false, "includeAll": false, "refresh": 1, diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index 4b2a54a..b6d0be0 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -200,7 +200,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 680cccc..41b4734 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } @@ -62,7 +62,25 @@ data: "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 2, @@ -80,7 +98,7 @@ data: }, "targets": [ { - "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)", + "expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -195,19 +213,19 @@ data: "templating": { "list": [ { - "name": "namespace_scope", - "label": "Namespace filter", + "name": "namespace_scope_cpu", + "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, { @@ -217,11 +235,79 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": false } ], - "hide": 0, + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_gpu", + "label": "GPU namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_ram", + "label": "RAM namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, "multi": false, "includeAll": false, "refresh": 1, diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 542daca..854f68a 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -151,7 +151,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index ed7432e..557d120 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -85,7 +85,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"}) or on() vector(0)", "refId": "A" } ], @@ -1095,7 +1095,7 @@ data: }, "targets": [ { - "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1128,7 +1128,25 @@ data: "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=namespace%3D~%22.%2A%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 12, @@ -1146,7 +1164,7 @@ data: }, "targets": [ { - "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))", + "expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1179,7 +1197,25 @@ data: "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 13, @@ -1197,7 +1233,7 @@ data: }, "targets": [ { - "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1230,7 +1266,25 @@ data: "fields": "", "values": false } - } + }, + "links": [ + { + "title": "Workload namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "targetBlank": false + }, + { + "title": "All namespaces", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22.%2A%22", + "targetBlank": false + }, + { + "title": "Infrastructure namespaces only", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "targetBlank": false + } + ], + "description": "Use panel links to switch namespace scope." }, { "id": 14, @@ -1802,19 +1856,19 @@ data: "templating": { "list": [ { - "name": "namespace_scope", - "label": "Namespace filter", + "name": "namespace_scope_cpu", + "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": true }, { @@ -1824,11 +1878,79 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", "selected": false } ], - "hide": 0, + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_gpu", + "label": "GPU namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, + "multi": false, + "includeAll": false, + "refresh": 1, + "sort": 0, + "skipUrlSync": false + }, + { + "name": "namespace_scope_ram", + "label": "RAM namespace filter", + "type": "custom", + "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "current": { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + "options": [ + { + "text": "workload namespaces only", + "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": true + }, + { + "text": "all namespaces", + "value": "namespace=~\".*\"", + "selected": false + }, + { + "text": "infrastructure namespaces only", + "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "selected": false + } + ], + "hide": 2, "multi": false, "includeAll": false, "refresh": 1, diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index b7c49d5..7d02e22 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -209,7 +209,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", "refId": "A" } ], -- 2.47.2 From 0c1989c67832d8d77aca92a1c0214ddc877315f4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 14:48:54 -0300 Subject: [PATCH 197/684] ai-llm: serialize rollout for RWO pvc --- services/ai-llm/deployment.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index 0bdc275..b6e6701 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -8,7 +8,10 @@ spec: replicas: 1 revisionHistoryLimit: 2 strategy: - type: Recreate + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 selector: matchLabels: app: ollama -- 2.47.2 From 5da36a38c3c8507d619e81e11722f954998e3489 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 15:32:30 -0300 Subject: [PATCH 198/684] comms: fix atlas mention detection --- .../communication/atlasbot-configmap.yaml | 27 +++++++++++++++++-- .../communication/atlasbot-deployment.yaml | 2 +- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index aba33da..034d55e 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -17,8 +17,31 @@ data: MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") API_KEY = os.environ.get("CHAT_API_KEY", "") BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{USER},atlas") - MENTION_LOCALPARTS = [m.strip().lstrip("@") for m in BOT_MENTIONS.split(",") if m.strip()] + SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") + MENTION_TOKENS = [m.strip() for m in BOT_MENTIONS.split(",") if m.strip()] + MENTION_LOCALPARTS = [m.lstrip("@").split(":", 1)[0] for m in MENTION_TOKENS] MENTION_RE = re.compile(r"(? str: + t = token.strip() + if not t: + return "" + if t.startswith("@") and ":" in t: + return t + t = t.lstrip("@") + if ":" in t: + return f"@{t}" + return f"@{t}:{SERVER_NAME}" + + MENTION_USER_IDS = {normalize_user_id(t).lower() for t in MENTION_TOKENS if normalize_user_id(t)} + + def is_mentioned(content: dict, body: str) -> bool: + if MENTION_RE.search(body or "") is not None: + return True + mentions = content.get("m.mentions", {}) + user_ids = mentions.get("user_ids", []) + if not isinstance(user_ids, list): + return False + return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids) def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): url = (base or BASE) + path @@ -123,7 +146,7 @@ data: # Only respond if bot is mentioned or in a DM joined_count = data.get("summary", {}).get("m.joined_member_count") is_dm = joined_count is not None and joined_count <= 2 - mentioned = MENTION_RE.search(body) is not None + mentioned = is_mentioned(content, body) hist_key = key_for(rid, sender, is_dm) history[hist_key].append(f"{sender}: {body}") history[hist_key] = history[hist_key][-80:] diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 12f3e30..fbb9b3d 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: cb29da6de63e087511e1936d41a6d12eff7f43c55cf266d1e71c3c2e14661c7b + checksum/atlasbot-configmap: c57538d33dc02db7aaf7b2f4681f50620c2cbcde8ddc1c51ccb5fa693247b00a spec: nodeSelector: hardware: rpi5 -- 2.47.2 From dca01199ce45079c34261fd4a0d83cc7b37b2e01 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 16:29:11 -0300 Subject: [PATCH 199/684] comms: reset othrys room --- .../communication/atlasbot-deployment.yaml | 4 +- services/communication/guest-name-job.yaml | 18 +- services/communication/kustomization.yaml | 1 + services/communication/pin-othrys-job.yaml | 2 +- .../communication/reset-othrys-room-job.yaml | 231 ++++++++++++++++++ services/communication/seed-othrys-room.yaml | 4 +- 6 files changed, 251 insertions(+), 9 deletions(-) create mode 100644 services/communication/reset-othrys-room-job.yaml diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index fbb9b3d..9778005 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -33,7 +33,9 @@ spec: - name: AUTH_BASE value: http://matrix-authentication-service:8080 - name: BOT_USER - value: atlasbot + value: atlas + - name: BOT_MENTIONS + value: atlas - name: BOT_PASS valueFrom: secretKeyRef: diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index 8d8149e..6bd0761 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -3,7 +3,7 @@ apiVersion: batch/v1 kind: CronJob metadata: name: guest-name-randomizer - namespace: communication + namespace: comms spec: schedule: "*/1 * * * *" suspend: true @@ -42,7 +42,7 @@ spec: BASE = os.environ["SYNAPSE_BASE"] AUTH_BASE = os.environ.get("AUTH_BASE", BASE) - OTHRYS = "!orejZnVfvbAmwQDYba:live.bstein.dev" + ROOM_ALIAS = "#othrys:live.bstein.dev" def login(user, password): r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ @@ -53,6 +53,13 @@ spec: r.raise_for_status() return r.json()["access_token"] + def resolve_alias(token, alias): + headers = {"Authorization": f"Bearer {token}"} + enc = urllib.parse.quote(alias) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=headers) + r.raise_for_status() + return r.json()["room_id"] + def list_guests(token): headers = {"Authorization": f"Bearer {token}"} users = [] @@ -73,22 +80,23 @@ spec: break return users - def set_displayname(token, user_id, name): + def set_displayname(token, room_id, user_id, name): headers = {"Authorization": f"Bearer {token}"} payload = {"displayname": name} # Update global profile r = requests.put(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname", headers=headers, json=payload) r.raise_for_status() # Update Othrys member event so clients see the change quickly - state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(OTHRYS)}/state/m.room.member/{urllib.parse.quote(user_id)}" + state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}" r2 = requests.get(state_url, headers=headers) content = r2.json() if r2.status_code == 200 else {"membership": "join"} content["displayname"] = name requests.put(state_url, headers=headers, json=content) token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + room_id = resolve_alias(token, ROOM_ALIAS) guests = list_guests(token) for g in guests: new = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - set_displayname(token, g, new) + set_displayname(token, room_id, g, new) PY diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 1b8f17a..b4d5eb4 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -17,6 +17,7 @@ resources: - livekit-middlewares.yaml - element-call-config.yaml - element-call-deployment.yaml + - reset-othrys-room-job.yaml - pin-othrys-job.yaml - guest-name-job.yaml - atlasbot-configmap.yaml diff --git a/services/communication/pin-othrys-job.yaml b/services/communication/pin-othrys-job.yaml index a45f37a..b0a4c4d 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/communication/pin-othrys-job.yaml @@ -3,7 +3,7 @@ apiVersion: batch/v1 kind: CronJob metadata: name: pin-othrys-invite - namespace: communication + namespace: comms spec: schedule: "*/30 * * * *" suspend: true diff --git a/services/communication/reset-othrys-room-job.yaml b/services/communication/reset-othrys-room-job.yaml new file mode 100644 index 0000000..ddcc0a7 --- /dev/null +++ b/services/communication/reset-othrys-room-job.yaml @@ -0,0 +1,231 @@ +# services/communication/reset-othrys-room-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: othrys-room-reset-1 + namespace: comms +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: reset + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 + - name: SERVER_NAME + value: live.bstein.dev + - name: ROOM_ALIAS + value: "#othrys:live.bstein.dev" + - name: ROOM_NAME + value: Othrys + - name: PIN_MESSAGE + value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + - name: BOT_USER + value: atlas + - name: BOT_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: bot-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import os, sys, urllib.parse, requests + + BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) + SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") + ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev") + ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys") + PIN_MESSAGE = os.environ["PIN_MESSAGE"] + SEEDER_USER = os.environ["SEEDER_USER"] + SEEDER_PASS = os.environ["SEEDER_PASS"] + BOT_USER = os.environ["BOT_USER"] + BOT_PASS = os.environ["BOT_PASS"] + + POWER_LEVELS = { + "ban": 50, + "events": { + "m.room.avatar": 50, + "m.room.canonical_alias": 50, + "m.room.encryption": 100, + "m.room.history_visibility": 100, + "m.room.name": 50, + "m.room.power_levels": 100, + "m.room.server_acl": 100, + "m.room.tombstone": 100, + }, + "events_default": 0, + "historical": 100, + "invite": 50, + "kick": 50, + "m.call.invite": 50, + "redact": 50, + "state_default": 50, + "users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100}, + "users_default": 0, + } + + def auth(token): return {"Authorization": f"Bearer {token}"} + + def login(user, password): + r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }) + if r.status_code != 200: + raise SystemExit(f"login failed: {r.status_code} {r.text}") + return r.json()["access_token"] + + def resolve_alias(token, alias): + enc = urllib.parse.quote(alias) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) + if r.status_code == 404: + return None + r.raise_for_status() + return r.json()["room_id"] + + def ensure_user(token, localpart, password, admin): + user_id = f"@{localpart}:{SERVER_NAME}" + url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}" + payload = {"password": password, "admin": admin, "deactivated": False} + r = requests.put(url, headers=auth(token), json=payload) + if r.status_code not in (200, 201): + raise SystemExit(f"ensure user {user_id} failed: {r.status_code} {r.text}") + return user_id + + def create_room(token): + r = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=auth(token), json={ + "preset": "public_chat", + "name": ROOM_NAME, + "room_version": "11", + }) + r.raise_for_status() + return r.json()["room_id"] + + def put_state(token, room_id, ev_type, content): + r = requests.put( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}", + headers=auth(token), + json=content, + ) + r.raise_for_status() + + def set_directory_visibility(token, room_id, visibility): + r = requests.put( + f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}", + headers=auth(token), + json={"visibility": visibility}, + ) + r.raise_for_status() + + def delete_alias(token, alias): + enc = urllib.parse.quote(alias) + r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) + if r.status_code in (200, 202, 404): + return + r.raise_for_status() + + def put_alias(token, alias, room_id): + enc = urllib.parse.quote(alias) + r = requests.put( + f"{BASE}/_matrix/client/v3/directory/room/{enc}", + headers=auth(token), + json={"room_id": room_id}, + ) + r.raise_for_status() + + def admin_join(token, room_id, user_id): + r = requests.post( + f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", + headers=auth(token), + json={"user_id": user_id}, + ) + r.raise_for_status() + + def join_all_locals(token, room_id): + users = [] + from_token = None + while True: + url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" + if from_token: + url += f"&from={from_token}" + res = requests.get(url, headers=auth(token)) + res.raise_for_status() + data = res.json() + for u in data.get("users", []): + if u.get("is_guest"): + continue + users.append(u["name"]) + from_token = data.get("next_token") + if not from_token: + break + for uid in users: + admin_join(token, room_id, uid) + + def send_message(token, room_id, body): + r = requests.post( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", + headers=auth(token), + json={"msgtype": "m.text", "body": body}, + ) + r.raise_for_status() + return r.json()["event_id"] + + token = login(SEEDER_USER, SEEDER_PASS) + + old_room_id = resolve_alias(token, ROOM_ALIAS) + if not old_room_id: + raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed") + + bot_user_id = ensure_user(token, BOT_USER, BOT_PASS, admin=False) + + new_room_id = create_room(token) + + # Configure the new room. + put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"}) + put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"}) + put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"}) + put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS) + + # Move the alias. + delete_alias(token, ROOM_ALIAS) + put_alias(token, ROOM_ALIAS, new_room_id) + put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS}) + + set_directory_visibility(token, new_room_id, "public") + + # Join the bot and all local non-guest users. + admin_join(token, new_room_id, bot_user_id) + join_all_locals(token, new_room_id) + + # Pin the guest invite message in the new room. + event_id = send_message(token, new_room_id, PIN_MESSAGE) + put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]}) + + # De-list and tombstone the old room. + set_directory_visibility(token, old_room_id, "private") + put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"}) + put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"}) + put_state(token, old_room_id, "m.room.tombstone", {"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id}) + + print(f"old_room_id={old_room_id}") + print(f"new_room_id={new_room_id}") + PY diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml index 06be4fb..a54f0de 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/communication/seed-othrys-room.yaml @@ -3,7 +3,7 @@ apiVersion: batch/v1 kind: CronJob metadata: name: seed-othrys-room - namespace: communication + namespace: comms spec: schedule: "*/10 * * * *" suspend: true @@ -30,7 +30,7 @@ spec: name: atlasbot-credentials-runtime key: seeder-password - name: BOT_USER - value: atlasbot + value: atlas - name: BOT_PASS valueFrom: secretKeyRef: -- 2.47.2 From f6dba2b8c11914e6dabb44c74cf199677e7a9ea9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 16:36:55 -0300 Subject: [PATCH 200/684] comms: reset othrys without synapse admin --- .../communication/atlasbot-deployment.yaml | 4 +- .../communication/reset-othrys-room-job.yaml | 77 ++++++++----------- services/communication/seed-othrys-room.yaml | 2 +- 3 files changed, 35 insertions(+), 48 deletions(-) diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 9778005..fbb9b3d 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -33,9 +33,7 @@ spec: - name: AUTH_BASE value: http://matrix-authentication-service:8080 - name: BOT_USER - value: atlas - - name: BOT_MENTIONS - value: atlas + value: atlasbot - name: BOT_PASS valueFrom: secretKeyRef: diff --git a/services/communication/reset-othrys-room-job.yaml b/services/communication/reset-othrys-room-job.yaml index ddcc0a7..e282b44 100644 --- a/services/communication/reset-othrys-room-job.yaml +++ b/services/communication/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-1 + name: othrys-room-reset-2 namespace: comms spec: backoffLimit: 0 @@ -33,12 +33,7 @@ spec: name: atlasbot-credentials-runtime key: seeder-password - name: BOT_USER - value: atlas - - name: BOT_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: bot-password + value: atlasbot command: - /bin/sh - -c @@ -57,7 +52,6 @@ spec: SEEDER_USER = os.environ["SEEDER_USER"] SEEDER_PASS = os.environ["SEEDER_PASS"] BOT_USER = os.environ["BOT_USER"] - BOT_PASS = os.environ["BOT_PASS"] POWER_LEVELS = { "ban": 50, @@ -102,15 +96,6 @@ spec: r.raise_for_status() return r.json()["room_id"] - def ensure_user(token, localpart, password, admin): - user_id = f"@{localpart}:{SERVER_NAME}" - url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}" - payload = {"password": password, "admin": admin, "deactivated": False} - r = requests.put(url, headers=auth(token), json=payload) - if r.status_code not in (200, 201): - raise SystemExit(f"ensure user {user_id} failed: {r.status_code} {r.text}") - return user_id - def create_room(token): r = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=auth(token), json={ "preset": "public_chat", @@ -152,34 +137,32 @@ spec: ) r.raise_for_status() - def admin_join(token, room_id, user_id): + def list_joined_members(token, room_id): + r = requests.get( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join", + headers=auth(token), + ) + r.raise_for_status() + members = [] + for ev in r.json().get("chunk", []): + if ev.get("type") != "m.room.member": + continue + uid = ev.get("state_key") + if not isinstance(uid, str) or not uid.startswith("@"): + continue + members.append(uid) + return members + + def invite_user(token, room_id, user_id): r = requests.post( - f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite", headers=auth(token), json={"user_id": user_id}, ) + if r.status_code in (200, 202): + return r.raise_for_status() - def join_all_locals(token, room_id): - users = [] - from_token = None - while True: - url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" - if from_token: - url += f"&from={from_token}" - res = requests.get(url, headers=auth(token)) - res.raise_for_status() - data = res.json() - for u in data.get("users", []): - if u.get("is_guest"): - continue - users.append(u["name"]) - from_token = data.get("next_token") - if not from_token: - break - for uid in users: - admin_join(token, room_id, uid) - def send_message(token, room_id, body): r = requests.post( f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", @@ -195,8 +178,6 @@ spec: if not old_room_id: raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed") - bot_user_id = ensure_user(token, BOT_USER, BOT_PASS, admin=False) - new_room_id = create_room(token) # Configure the new room. @@ -212,9 +193,16 @@ spec: set_directory_visibility(token, new_room_id, "public") - # Join the bot and all local non-guest users. - admin_join(token, new_room_id, bot_user_id) - join_all_locals(token, new_room_id) + # Invite the bot and all joined members of the old room. + bot_user_id = f"@{BOT_USER}:{SERVER_NAME}" + invite_user(token, new_room_id, bot_user_id) + for uid in list_joined_members(token, old_room_id): + if uid == f"@{SEEDER_USER}:{SERVER_NAME}": + continue + localpart = uid.split(":", 1)[0].lstrip("@") + if localpart.isdigit(): + continue + invite_user(token, new_room_id, uid) # Pin the guest invite message in the new room. event_id = send_message(token, new_room_id, PIN_MESSAGE) @@ -225,6 +213,7 @@ spec: put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"}) put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"}) put_state(token, old_room_id, "m.room.tombstone", {"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id}) + send_message(token, old_room_id, "Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join") print(f"old_room_id={old_room_id}") print(f"new_room_id={new_room_id}") diff --git a/services/communication/seed-othrys-room.yaml b/services/communication/seed-othrys-room.yaml index a54f0de..a80b388 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/communication/seed-othrys-room.yaml @@ -30,7 +30,7 @@ spec: name: atlasbot-credentials-runtime key: seeder-password - name: BOT_USER - value: atlas + value: atlasbot - name: BOT_PASS valueFrom: secretKeyRef: -- 2.47.2 From e1e95f9bef0de15e2a20f186b3b6184c4af1841c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 16:53:53 -0300 Subject: [PATCH 201/684] monitoring: drop anonymous folder role --- services/monitoring/grafana-folders.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/monitoring/grafana-folders.yaml b/services/monitoring/grafana-folders.yaml index 24ce305..54b278f 100644 --- a/services/monitoring/grafana-folders.yaml +++ b/services/monitoring/grafana-folders.yaml @@ -13,8 +13,6 @@ data: - uid: overview title: Overview permissions: - - role: Anonymous - permission: View - role: Viewer permission: View - role: Editor -- 2.47.2 From 8aecb88af3f2996cdad9e026e0d3ce67116cf342 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:01:55 -0300 Subject: [PATCH 202/684] comms: force leave old rooms --- .../communication/bstein-force-leave-job.yaml | 177 ++++++++++++++++++ services/communication/kustomization.yaml | 1 + 2 files changed, 178 insertions(+) create mode 100644 services/communication/bstein-force-leave-job.yaml diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml new file mode 100644 index 0000000..1b8476d --- /dev/null +++ b/services/communication/bstein-force-leave-job.yaml @@ -0,0 +1,177 @@ +# services/communication/bstein-force-leave-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: bstein-force-leave-1 + namespace: comms +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: force-leave + image: python:3.11-slim + env: + - name: POSTGRES_HOST + value: postgres-service.postgres.svc.cluster.local + - name: POSTGRES_PORT + value: "5432" + - name: POSTGRES_DB + value: synapse + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 + - name: SERVER_NAME + value: live.bstein.dev + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + - name: TARGET_USER_ID + value: "@bstein:live.bstein.dev" + - name: TARGET_ROOMS + value: "!OkltaJguODUnZrbcUp:live.bstein.dev,!pMKAVvSRheIOCPIjDM:live.bstein.dev" + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests psycopg2-binary >/dev/null + python - <<'PY' + import json, os, sys, urllib.parse + import requests + import psycopg2 + + DB = dict( + host=os.environ["POSTGRES_HOST"], + port=int(os.environ["POSTGRES_PORT"]), + dbname=os.environ["POSTGRES_DB"], + user=os.environ["POSTGRES_USER"], + password=os.environ["POSTGRES_PASSWORD"], + ) + + SYNAPSE_BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ["AUTH_BASE"] + SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") + SEEDER_USER = os.environ["SEEDER_USER"] + SEEDER_PASS = os.environ["SEEDER_PASS"] + TARGET_USER_ID = os.environ["TARGET_USER_ID"] + TARGET_ROOMS = [r.strip() for r in os.environ["TARGET_ROOMS"].split(",") if r.strip()] + + def db_connect(): + return psycopg2.connect(**DB) + + def db_get_admin(conn, user_id): + with conn.cursor() as cur: + cur.execute("SELECT admin FROM users WHERE name = %s", (user_id,)) + row = cur.fetchone() + if not row: + raise RuntimeError(f"user not found in synapse db: {user_id}") + return bool(row[0]) + + def db_set_admin(conn, user_id, is_admin): + with conn.cursor() as cur: + cur.execute("UPDATE users SET admin = %s WHERE name = %s", (bool(is_admin), user_id)) + + def login(user, password): + r = requests.post( + f"{AUTH_BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }, + timeout=20, + ) + if r.status_code != 200: + raise RuntimeError(f"login failed: {r.status_code} {r.text}") + return r.json()["access_token"] + + def admin_get_room_members(token, room_id): + url = f"{SYNAPSE_BASE}/_synapse/admin/v1/rooms/{urllib.parse.quote(room_id)}/members" + r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=20) + if r.status_code == 404: + return None + r.raise_for_status() + data = r.json() + return data.get("members") or data.get("memberships") or data + + def admin_kick(token, room_id, user_id): + url = f"{SYNAPSE_BASE}/_synapse/admin/v1/rooms/{urllib.parse.quote(room_id)}/kick" + r = requests.post( + url, + headers={"Authorization": f"Bearer {token}"}, + json={"user_id": user_id, "reason": "room cleanup"}, + timeout=20, + ) + if r.status_code == 404: + raise RuntimeError(f"kick endpoint not found on synapse ({url})") + if r.status_code not in (200, 202): + raise RuntimeError(f"kick failed for {room_id}: {r.status_code} {r.text}") + + seeder_user_id = f"@{SEEDER_USER}:{SERVER_NAME}" + + results = {"seeder_user_id": seeder_user_id, "target_user_id": TARGET_USER_ID, "rooms": {}} + + conn = db_connect() + conn.autocommit = False + try: + was_admin = db_get_admin(conn, seeder_user_id) + results["seeder_was_admin"] = was_admin + if not was_admin: + db_set_admin(conn, seeder_user_id, True) + conn.commit() + + token = login(SEEDER_USER, SEEDER_PASS) + + # Verify admin access now works. + # (If this still 403s, we fail and restore admin flag.) + for room_id in TARGET_ROOMS: + room_res = {} + results["rooms"][room_id] = room_res + try: + before = admin_get_room_members(token, room_id) + room_res["members_before"] = "unavailable" if before is None else "ok" + except Exception as e: + room_res["members_before_error"] = str(e) + + try: + admin_kick(token, room_id, TARGET_USER_ID) + room_res["kicked"] = True + except Exception as e: + room_res["kicked"] = False + room_res["kick_error"] = str(e) + + try: + after = admin_get_room_members(token, room_id) + room_res["members_after"] = "unavailable" if after is None else "ok" + except Exception as e: + room_res["members_after_error"] = str(e) + + print(json.dumps(results, indent=2, sort_keys=True)) + finally: + # Restore previous admin flag + try: + if "results" in locals(): + desired = results.get("seeder_was_admin", False) + db_set_admin(conn, seeder_user_id, desired) + conn.commit() + except Exception as e: + print(f"WARNING: failed to restore seeder admin flag: {e}", file=sys.stderr) + conn.close() + PY diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index b4d5eb4..2baa863 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -18,6 +18,7 @@ resources: - element-call-config.yaml - element-call-deployment.yaml - reset-othrys-room-job.yaml + - bstein-force-leave-job.yaml - pin-othrys-job.yaml - guest-name-job.yaml - atlasbot-configmap.yaml -- 2.47.2 From da972215d3bdb1bd93ec4a300578048b9f557aec Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:14:27 -0300 Subject: [PATCH 203/684] comms: force leave old rooms (v2) --- .../communication/bstein-force-leave-job.yaml | 123 ++++++++++-------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml index 1b8476d..94f1886 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/communication/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-force-leave-1 + name: bstein-force-leave-2 namespace: comms spec: backoffLimit: 0 @@ -67,7 +67,6 @@ spec: SYNAPSE_BASE = os.environ["SYNAPSE_BASE"] AUTH_BASE = os.environ["AUTH_BASE"] - SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") SEEDER_USER = os.environ["SEEDER_USER"] SEEDER_PASS = os.environ["SEEDER_PASS"] TARGET_USER_ID = os.environ["TARGET_USER_ID"] @@ -82,11 +81,12 @@ spec: row = cur.fetchone() if not row: raise RuntimeError(f"user not found in synapse db: {user_id}") - return bool(row[0]) + # Synapse stores admin as an int (0/1) + return int(row[0]) def db_set_admin(conn, user_id, is_admin): with conn.cursor() as cur: - cur.execute("UPDATE users SET admin = %s WHERE name = %s", (bool(is_admin), user_id)) + cur.execute("UPDATE users SET admin = %s WHERE name = %s", (1 if is_admin else 0, user_id)) def login(user, password): r = requests.post( @@ -102,76 +102,91 @@ spec: raise RuntimeError(f"login failed: {r.status_code} {r.text}") return r.json()["access_token"] - def admin_get_room_members(token, room_id): - url = f"{SYNAPSE_BASE}/_synapse/admin/v1/rooms/{urllib.parse.quote(room_id)}/members" - r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=20) - if r.status_code == 404: - return None - r.raise_for_status() - data = r.json() - return data.get("members") or data.get("memberships") or data - - def admin_kick(token, room_id, user_id): - url = f"{SYNAPSE_BASE}/_synapse/admin/v1/rooms/{urllib.parse.quote(room_id)}/kick" - r = requests.post( - url, + def whoami(token): + r = requests.get( + f"{SYNAPSE_BASE}/_matrix/client/v3/whoami", headers={"Authorization": f"Bearer {token}"}, - json={"user_id": user_id, "reason": "room cleanup"}, timeout=20, ) - if r.status_code == 404: - raise RuntimeError(f"kick endpoint not found on synapse ({url})") - if r.status_code not in (200, 202): - raise RuntimeError(f"kick failed for {room_id}: {r.status_code} {r.text}") + r.raise_for_status() + return r.json()["user_id"] - seeder_user_id = f"@{SEEDER_USER}:{SERVER_NAME}" + def admin_login_as(token, user_id): + url = f"{SYNAPSE_BASE}/_synapse/admin/v1/users/{urllib.parse.quote(user_id)}/login" + r = requests.post(url, headers={"Authorization": f"Bearer {token}"}, json={}, timeout=20) + if r.status_code != 200: + raise RuntimeError(f"admin login-as failed: {r.status_code} {r.text}") + return r.json()["access_token"] - results = {"seeder_user_id": seeder_user_id, "target_user_id": TARGET_USER_ID, "rooms": {}} + def client_leave(token, room_id): + url = f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/leave" + r = requests.post(url, headers={"Authorization": f"Bearer {token}"}, json={}, timeout=20) + if r.status_code in (200, 202): + return True, None + # If already left/unknown membership, treat as non-fatal. + return False, f"{r.status_code} {r.text}" + + def client_forget(token, room_id): + url = f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/forget" + r = requests.post(url, headers={"Authorization": f"Bearer {token}"}, json={}, timeout=20) + if r.status_code in (200, 202): + return True, None + return False, f"{r.status_code} {r.text}" + + def admin_joined_rooms(token, user_id): + url = f"{SYNAPSE_BASE}/_synapse/admin/v1/users/{urllib.parse.quote(user_id)}/joined_rooms" + r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=20) + if r.status_code != 200: + raise RuntimeError(f"admin joined_rooms failed: {r.status_code} {r.text}") + return r.json().get("joined_rooms", []) + + results = {"target_user_id": TARGET_USER_ID, "rooms": {}} conn = db_connect() conn.autocommit = False try: - was_admin = db_get_admin(conn, seeder_user_id) - results["seeder_was_admin"] = was_admin - if not was_admin: + token = login(SEEDER_USER, SEEDER_PASS) + seeder_user_id = whoami(token) + results["seeder_user_id"] = seeder_user_id + + try: + seeder_admin = db_get_admin(conn, seeder_user_id) + except Exception as e: + seeder_admin = None + results["seeder_admin_db_error"] = str(e) + results["seeder_admin_db"] = seeder_admin + + # If the seeder user isn't marked admin in Synapse DB, promote it. + # This is safe and reversible, and required for Synapse admin APIs. + if seeder_admin == 0: db_set_admin(conn, seeder_user_id, True) conn.commit() + results["seeder_admin_db_promoted"] = True + else: + results["seeder_admin_db_promoted"] = False - token = login(SEEDER_USER, SEEDER_PASS) + # Use admin endpoint to mint a puppet token for @bstein so we can + # perform a normal leave+forget (instead of deleting rooms). + bstein_token = admin_login_as(token, TARGET_USER_ID) - # Verify admin access now works. - # (If this still 403s, we fail and restore admin flag.) for room_id in TARGET_ROOMS: room_res = {} results["rooms"][room_id] = room_res - try: - before = admin_get_room_members(token, room_id) - room_res["members_before"] = "unavailable" if before is None else "ok" - except Exception as e: - room_res["members_before_error"] = str(e) - try: - admin_kick(token, room_id, TARGET_USER_ID) - room_res["kicked"] = True - except Exception as e: - room_res["kicked"] = False - room_res["kick_error"] = str(e) + ok, err = client_leave(bstein_token, room_id) + room_res["left"] = ok + if err: + room_res["leave_error"] = err - try: - after = admin_get_room_members(token, room_id) - room_res["members_after"] = "unavailable" if after is None else "ok" - except Exception as e: - room_res["members_after_error"] = str(e) + ok2, err2 = client_forget(bstein_token, room_id) + room_res["forgot"] = ok2 + if err2: + room_res["forget_error"] = err2 + + # Verify the user is no longer joined to the rooms (best effort). + results["target_joined_rooms_after"] = admin_joined_rooms(token, TARGET_USER_ID) print(json.dumps(results, indent=2, sort_keys=True)) finally: - # Restore previous admin flag - try: - if "results" in locals(): - desired = results.get("seeder_was_admin", False) - db_set_admin(conn, seeder_user_id, desired) - conn.commit() - except Exception as e: - print(f"WARNING: failed to restore seeder admin flag: {e}", file=sys.stderr) conn.close() PY -- 2.47.2 From 4a584f538d9732d3ae72bcb2fea16e59270bbe52 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:16:57 -0300 Subject: [PATCH 204/684] comms: force leave old rooms (v3) --- services/communication/bstein-force-leave-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml index 94f1886..5d76b8b 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/communication/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-force-leave-2 + name: bstein-force-leave-3 namespace: comms spec: backoffLimit: 0 @@ -104,7 +104,7 @@ spec: def whoami(token): r = requests.get( - f"{SYNAPSE_BASE}/_matrix/client/v3/whoami", + f"{SYNAPSE_BASE}/_matrix/client/v3/account/whoami", headers={"Authorization": f"Bearer {token}"}, timeout=20, ) -- 2.47.2 From b6c955e7da770ccecd0402073c30e67516902345 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:20:28 -0300 Subject: [PATCH 205/684] comms: delete old test rooms for bstein --- .../communication/bstein-force-leave-job.yaml | 52 ++++++------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml index 5d76b8b..d7f2a26 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/communication/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-force-leave-3 + name: bstein-force-leave-4 namespace: comms spec: backoffLimit: 0 @@ -111,27 +111,17 @@ spec: r.raise_for_status() return r.json()["user_id"] - def admin_login_as(token, user_id): - url = f"{SYNAPSE_BASE}/_synapse/admin/v1/users/{urllib.parse.quote(user_id)}/login" - r = requests.post(url, headers={"Authorization": f"Bearer {token}"}, json={}, timeout=20) + def admin_delete_room(token, room_id): + url = f"{SYNAPSE_BASE}/_synapse/admin/v1/rooms/{urllib.parse.quote(room_id)}" + r = requests.delete( + url, + headers={"Authorization": f"Bearer {token}"}, + json={"purge": False, "block": False}, + timeout=60, + ) if r.status_code != 200: - raise RuntimeError(f"admin login-as failed: {r.status_code} {r.text}") - return r.json()["access_token"] - - def client_leave(token, room_id): - url = f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/leave" - r = requests.post(url, headers={"Authorization": f"Bearer {token}"}, json={}, timeout=20) - if r.status_code in (200, 202): - return True, None - # If already left/unknown membership, treat as non-fatal. - return False, f"{r.status_code} {r.text}" - - def client_forget(token, room_id): - url = f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/forget" - r = requests.post(url, headers={"Authorization": f"Bearer {token}"}, json={}, timeout=20) - if r.status_code in (200, 202): - return True, None - return False, f"{r.status_code} {r.text}" + raise RuntimeError(f"admin delete room failed: {room_id}: {r.status_code} {r.text}") + return r.json() def admin_joined_rooms(token, user_id): url = f"{SYNAPSE_BASE}/_synapse/admin/v1/users/{urllib.parse.quote(user_id)}/joined_rooms" @@ -165,23 +155,15 @@ spec: else: results["seeder_admin_db_promoted"] = False - # Use admin endpoint to mint a puppet token for @bstein so we can - # perform a normal leave+forget (instead of deleting rooms). - bstein_token = admin_login_as(token, TARGET_USER_ID) - for room_id in TARGET_ROOMS: room_res = {} results["rooms"][room_id] = room_res - - ok, err = client_leave(bstein_token, room_id) - room_res["left"] = ok - if err: - room_res["leave_error"] = err - - ok2, err2 = client_forget(bstein_token, room_id) - room_res["forgot"] = ok2 - if err2: - room_res["forget_error"] = err2 + try: + room_res["delete"] = admin_delete_room(token, room_id) + room_res["deleted"] = True + except Exception as e: + room_res["deleted"] = False + room_res["delete_error"] = str(e) # Verify the user is no longer joined to the rooms (best effort). results["target_joined_rooms_after"] = admin_joined_rooms(token, TARGET_USER_ID) -- 2.47.2 From 4e701c63402938576370f0205669098f206feb99 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:22:55 -0300 Subject: [PATCH 206/684] comms: debug bstein room cleanup --- .../communication/bstein-force-leave-job.yaml | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml index d7f2a26..668e7ac 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/communication/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-force-leave-4 + name: bstein-force-leave-5 namespace: comms spec: backoffLimit: 0 @@ -135,25 +135,37 @@ spec: conn = db_connect() conn.autocommit = False try: - token = login(SEEDER_USER, SEEDER_PASS) - seeder_user_id = whoami(token) - results["seeder_user_id"] = seeder_user_id + try: + token = login(SEEDER_USER, SEEDER_PASS) + results["seeder_login"] = "ok" + except Exception as e: + results["seeder_login"] = "error" + results["seeder_login_error"] = str(e) + print(json.dumps(results, indent=2, sort_keys=True)) + raise try: - seeder_admin = db_get_admin(conn, seeder_user_id) + seeder_user_id = whoami(token) + results["seeder_user_id"] = seeder_user_id except Exception as e: - seeder_admin = None - results["seeder_admin_db_error"] = str(e) - results["seeder_admin_db"] = seeder_admin + results["seeder_user_id_error"] = str(e) + seeder_user_id = None - # If the seeder user isn't marked admin in Synapse DB, promote it. - # This is safe and reversible, and required for Synapse admin APIs. - if seeder_admin == 0: - db_set_admin(conn, seeder_user_id, True) - conn.commit() - results["seeder_admin_db_promoted"] = True - else: - results["seeder_admin_db_promoted"] = False + if seeder_user_id: + try: + results["seeder_admin_db"] = db_get_admin(conn, seeder_user_id) + except Exception as e: + results["seeder_admin_db_error"] = str(e) + + if results.get("seeder_admin_db") == 0: + try: + db_set_admin(conn, seeder_user_id, True) + conn.commit() + results["seeder_admin_db_promoted"] = True + except Exception as e: + results["seeder_admin_db_promote_error"] = str(e) + else: + results["seeder_admin_db_promoted"] = False for room_id in TARGET_ROOMS: room_res = {} @@ -165,8 +177,10 @@ spec: room_res["deleted"] = False room_res["delete_error"] = str(e) - # Verify the user is no longer joined to the rooms (best effort). - results["target_joined_rooms_after"] = admin_joined_rooms(token, TARGET_USER_ID) + try: + results["target_joined_rooms_after"] = admin_joined_rooms(token, TARGET_USER_ID) + except Exception as e: + results["target_joined_rooms_after_error"] = str(e) print(json.dumps(results, indent=2, sort_keys=True)) finally: -- 2.47.2 From a8149bd993ae1dde6e0877e690d97053a4810047 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:25:09 -0300 Subject: [PATCH 207/684] comms: restart synapse to refresh admin cache --- services/communication/synapse-rendered.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 462a699..d4dc341 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -677,7 +677,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-4 + checksum/config: manual-rtc-enable-5 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From 70059dda33c6ebccf8c51ff59d2353989e04c115 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:27:24 -0300 Subject: [PATCH 208/684] comms: rerun bstein room cleanup after synapse restart --- services/communication/bstein-force-leave-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml index 668e7ac..2052e42 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/communication/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-force-leave-5 + name: bstein-force-leave-6 namespace: comms spec: backoffLimit: 0 -- 2.47.2 From 32f78c4f82b1545ba68a12f0877024a74591191b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:36:23 -0300 Subject: [PATCH 209/684] nextcloud: fix mail sync idempotency --- scripts/nextcloud-mail-sync.sh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 7feeec6..b4d171f 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -11,9 +11,11 @@ if ! command -v jq >/dev/null 2>&1; then fi account_exists() { - # Skip if the account email is already present in the mail app. - runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq " ${1}" || \ - runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq "${1} " + local user_id="${1}" + local email="${2}" + + # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). + php occ mail:account:export "${user_id}" 2>/dev/null | grep -Fq -- "- E-Mail: ${email}" } token=$( @@ -29,6 +31,8 @@ if [[ -z "${token}" || "${token}" == "null" ]]; then exit 1 fi +cd /var/www/html + users=$(curl -s -H "Authorization: Bearer ${token}" \ "${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000") @@ -37,13 +41,13 @@ echo "${users}" | jq -c '.[]' | while read -r user; do email=$(echo "${user}" | jq -r '.email // empty') app_pw=$(echo "${user}" | jq -r '.attributes.mailu_app_password[0] // empty') [[ -z "${email}" || -z "${app_pw}" ]] && continue - if account_exists "${email}"; then + if account_exists "${username}" "${email}"; then echo "Skipping ${email}, already exists" continue fi echo "Syncing ${email}" - runuser -u www-data -- php occ mail:account:create \ + php occ mail:account:create \ "${username}" "${username}" "${email}" \ mail.bstein.dev 993 ssl "${email}" "${app_pw}" \ - mail.bstein.dev 587 tls "${email}" "${app_pw}" login || true + mail.bstein.dev 587 tls "${email}" "${app_pw}" || true done -- 2.47.2 From eb3a6824e6697fd8e9cdcc4eeee427b9bad139b7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:47:07 -0300 Subject: [PATCH 210/684] nextcloud: flux-manage mail sync --- .../flux-system/applications/kustomization.yaml | 1 + .../nextcloud-mail-sync/kustomization.yaml | 17 +++++++++++++++++ scripts/nextcloud-mail-sync.sh | 4 ++-- services/nextcloud-mail-sync/kustomization.yaml | 10 ++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml create mode 100644 services/nextcloud-mail-sync/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 37d7699..a503520 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -25,3 +25,4 @@ resources: - ci-demo/kustomization.yaml - ci-demo/image-automation.yaml - ai-llm/kustomization.yaml + - nextcloud-mail-sync/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml b/clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml new file mode 100644 index 0000000..1eef5c4 --- /dev/null +++ b/clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml @@ -0,0 +1,17 @@ +# clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: nextcloud-mail-sync + namespace: flux-system +spec: + interval: 10m + prune: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./services/nextcloud-mail-sync + targetNamespace: nextcloud + timeout: 2m + dependsOn: + - name: keycloak diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index b4d171f..8cfbf64 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -15,7 +15,7 @@ account_exists() { local email="${2}" # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). - php occ mail:account:export "${user_id}" 2>/dev/null | grep -Fq -- "- E-Mail: ${email}" + /usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>/dev/null | grep -Fq -- "- E-Mail: ${email}" } token=$( @@ -46,7 +46,7 @@ echo "${users}" | jq -c '.[]' | while read -r user; do continue fi echo "Syncing ${email}" - php occ mail:account:create \ + /usr/sbin/runuser -u www-data -- php occ mail:account:create \ "${username}" "${username}" "${email}" \ mail.bstein.dev 993 ssl "${email}" "${app_pw}" \ mail.bstein.dev 587 tls "${email}" "${app_pw}" || true diff --git a/services/nextcloud-mail-sync/kustomization.yaml b/services/nextcloud-mail-sync/kustomization.yaml new file mode 100644 index 0000000..cc1fa68 --- /dev/null +++ b/services/nextcloud-mail-sync/kustomization.yaml @@ -0,0 +1,10 @@ +# services/nextcloud-mail-sync/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: nextcloud +configMapGenerator: + - name: nextcloud-mail-sync-script + files: + - sync.sh=../../scripts/nextcloud-mail-sync.sh + options: + disableNameSuffixHash: true -- 2.47.2 From ed23d831b9d7832fa686dc959248d99b13eab540 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:48:39 -0300 Subject: [PATCH 211/684] comms(mas): bootstrap admin client secret --- services/communication/kustomization.yaml | 2 + .../mas-admin-client-secret-ensure-job.yaml | 75 +++++++++++++++++++ .../mas-admin-client-secret.yaml | 9 +++ 3 files changed, 86 insertions(+) create mode 100644 services/communication/mas-admin-client-secret-ensure-job.yaml create mode 100644 services/communication/mas-admin-client-secret.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 2baa863..7213794 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -6,6 +6,8 @@ resources: - synapse-rendered.yaml - synapse-signingkey-ensure-job.yaml - mas-configmap.yaml + - mas-admin-client-secret.yaml + - mas-admin-client-secret-ensure-job.yaml - mas-deployment.yaml - mas-ingress.yaml - element-rendered.yaml diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml new file mode 100644 index 0000000..9005b78 --- /dev/null +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -0,0 +1,75 @@ +# services/communication/mas-admin-client-secret-ensure-job.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mas-admin-client-secret-writer + namespace: comms +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: mas-admin-client-secret-writer + namespace: comms +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["mas-admin-client"] + verbs: ["get", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: mas-admin-client-secret-writer + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: mas-admin-client-secret-writer +subjects: + - kind: ServiceAccount + name: mas-admin-client-secret-writer + namespace: comms +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: mas-admin-client-secret-ensure-1 + namespace: comms +spec: + backoffLimit: 2 + template: + spec: + serviceAccountName: mas-admin-client-secret-writer + restartPolicy: OnFailure + volumes: + - name: work + emptyDir: {} + initContainers: + - name: generate + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + umask 077 + openssl rand -hex 32 > /work/client_secret + volumeMounts: + - name: work + mountPath: /work + containers: + - name: patch + image: bitnami/kubectl:latest + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + if kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then + exit 0 + fi + secret_b64="$(base64 /work/client_secret | tr -d '\n')" + payload="$(printf '{"data":{"client_secret":"%s"}}' "${secret_b64}")" + kubectl -n comms patch secret mas-admin-client --type=merge -p "${payload}" >/dev/null + volumeMounts: + - name: work + mountPath: /work + diff --git a/services/communication/mas-admin-client-secret.yaml b/services/communication/mas-admin-client-secret.yaml new file mode 100644 index 0000000..7f4ec19 --- /dev/null +++ b/services/communication/mas-admin-client-secret.yaml @@ -0,0 +1,9 @@ +# services/communication/mas-admin-client-secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: mas-admin-client + namespace: comms +type: Opaque +data: {} + -- 2.47.2 From c72e1e1f9b7441f7fcec3af011f39eadbe0f32a9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:52:18 -0300 Subject: [PATCH 212/684] comms(mas): fix admin client secret job --- .../communication/mas-admin-client-secret-ensure-job.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml index 9005b78..eed2dcd 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -33,7 +33,7 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-1 + name: mas-admin-client-secret-ensure-2 namespace: comms spec: backoffLimit: 2 @@ -52,7 +52,7 @@ spec: - | set -euo pipefail umask 077 - openssl rand -hex 32 > /work/client_secret + dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n' > /work/client_secret volumeMounts: - name: work mountPath: /work @@ -72,4 +72,3 @@ spec: volumeMounts: - name: work mountPath: /work - -- 2.47.2 From 100a11e0de6c88fddc4e0c928534ea088c6f2dea Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:54:01 -0300 Subject: [PATCH 213/684] monitoring: split overview org --- .../monitoring/grafana-org-bootstrap.yaml | 110 ++++++++++++++++++ services/monitoring/helmrelease.yaml | 13 ++- services/monitoring/kustomization.yaml | 1 + 3 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 services/monitoring/grafana-org-bootstrap.yaml diff --git a/services/monitoring/grafana-org-bootstrap.yaml b/services/monitoring/grafana-org-bootstrap.yaml new file mode 100644 index 0000000..0872f4a --- /dev/null +++ b/services/monitoring/grafana-org-bootstrap.yaml @@ -0,0 +1,110 @@ +# services/monitoring/grafana-org-bootstrap.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: grafana-org-bootstrap-1 + namespace: monitoring +spec: + backoffLimit: 2 + template: + spec: + restartPolicy: OnFailure + containers: + - name: bootstrap + image: python:3.11-alpine + env: + - name: GRAFANA_URL + value: http://grafana + - name: OVERVIEW_ORG_NAME + value: Overview + - name: GRAFANA_USER + valueFrom: + secretKeyRef: + name: grafana-admin + key: admin-user + - name: GRAFANA_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-admin + key: admin-password + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import base64 + import json + import os + import time + import urllib.error + import urllib.request + + grafana_url = os.environ["GRAFANA_URL"].rstrip("/") + org_name = os.environ["OVERVIEW_ORG_NAME"] + user = os.environ["GRAFANA_USER"] + password = os.environ["GRAFANA_PASSWORD"] + + auth = base64.b64encode(f"{user}:{password}".encode()).decode() + base_headers = { + "Authorization": f"Basic {auth}", + "Content-Type": "application/json", + } + + def request(path, method="GET", data=None, org_id=None): + headers = dict(base_headers) + if org_id is not None: + headers["X-Grafana-Org-Id"] = str(org_id) + payload = None + if data is not None: + payload = json.dumps(data).encode() + req = urllib.request.Request( + f"{grafana_url}{path}", + data=payload, + headers=headers, + method=method, + ) + return urllib.request.urlopen(req, timeout=10) + + for _ in range(60): + try: + with request("/api/health") as resp: + if resp.status == 200: + break + except Exception: + time.sleep(2) + else: + raise SystemExit("Grafana API did not become ready in time") + + with request("/api/orgs") as resp: + orgs = json.load(resp) + org_id = next((org["id"] for org in orgs if org["name"] == org_name), None) + if org_id is None: + with request("/api/orgs", method="POST", data={"name": org_name}) as resp: + org_id = json.load(resp).get("orgId") + if org_id is None: + raise SystemExit(f"Unable to resolve org ID for {org_name}") + + datasource = { + "name": "VictoriaMetrics", + "type": "prometheus", + "access": "proxy", + "url": "http://victoria-metrics-single-server:8428", + "isDefault": True, + "uid": "atlas-vm", + "jsonData": {"timeInterval": "15s"}, + } + try: + with request("/api/datasources/uid/atlas-vm", org_id=org_id) as resp: + if resp.status != 200: + raise urllib.error.HTTPError(resp.url, resp.status, resp.reason, resp.headers, None) + except urllib.error.HTTPError as err: + if err.code != 404: + raise + with request("/api/datasources", method="POST", data=datasource, org_id=org_id): + pass + + with request("/api/admin/provisioning/datasources/reload", method="POST"): + pass + with request("/api/admin/provisioning/dashboards/reload", method="POST"): + pass + PY diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index a07d207..79f1305 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -251,6 +251,7 @@ spec: GF_AUTH_GENERIC_OAUTH_CLIENT_ID: "grafana" GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "" GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_NAME: "Overview" GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer" GF_SECURITY_ALLOW_EMBEDDING: "true" GF_AUTH_GENERIC_OAUTH_ENABLED: "true" @@ -298,12 +299,22 @@ spec: jsonData: timeInterval: "15s" uid: atlas-vm + orgId: 1 + - name: VictoriaMetrics + type: prometheus + access: proxy + url: http://victoria-metrics-single-server:8428 + isDefault: true + jsonData: + timeInterval: "15s" + uid: atlas-vm + orgId: 2 dashboardProviders: dashboardproviders.yaml: apiVersion: 1 providers: - name: overview - orgId: 1 + orgId: 2 folder: Overview type: file disableDeletion: false diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index a50a1c1..ad53bb5 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -14,3 +14,4 @@ resources: - dcgm-exporter.yaml - grafana-folders.yaml - helmrelease.yaml + - grafana-org-bootstrap.yaml -- 2.47.2 From 0f36576bace839ccc6d0f4fe6ff9e266764b8917 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 17:56:39 -0300 Subject: [PATCH 214/684] comms(mas): patch admin secret via stringData --- .../communication/mas-admin-client-secret-ensure-job.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml index eed2dcd..3863f2b 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -33,7 +33,7 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-2 + name: mas-admin-client-secret-ensure-3 namespace: comms spec: backoffLimit: 2 @@ -66,8 +66,8 @@ spec: if kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then exit 0 fi - secret_b64="$(base64 /work/client_secret | tr -d '\n')" - payload="$(printf '{"data":{"client_secret":"%s"}}' "${secret_b64}")" + secret="$(cat /work/client_secret)" + payload="$(printf '{"stringData":{"client_secret":"%s"}}' "${secret}")" kubectl -n comms patch secret mas-admin-client --type=merge -p "${payload}" >/dev/null volumeMounts: - name: work -- 2.47.2 From 353f2e921089be162b986724d42757473647c263 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:00:07 -0300 Subject: [PATCH 215/684] monitoring: recreate grafana rollouts --- services/monitoring/helmrelease.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 79f1305..68a6a47 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -245,6 +245,8 @@ spec: enabled: true size: 20Gi storageClassName: astreae + deploymentStrategy: + type: Recreate service: type: ClusterIP env: -- 2.47.2 From 9d979a69fe31387ef63fa2ba6c735224fe912d62 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:02:31 -0300 Subject: [PATCH 216/684] comms(mas): make secret ensure job portable --- .../communication/mas-admin-client-secret-ensure-job.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml index 3863f2b..5a9241d 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -33,7 +33,7 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-3 + name: mas-admin-client-secret-ensure-4 namespace: comms spec: backoffLimit: 2 @@ -50,7 +50,7 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail + set -eu umask 077 dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n' > /work/client_secret volumeMounts: @@ -62,8 +62,8 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail - if kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then + set -eu + if [ -n "$(kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null || true)" ]; then exit 0 fi secret="$(cat /work/client_secret)" -- 2.47.2 From ae335fcff2b31d0d455a25ba620f83b4d730c369 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:09:08 -0300 Subject: [PATCH 217/684] comms(mas): debug admin secret ensure job --- .../mas-admin-client-secret-ensure-job.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml index 5a9241d..da37915 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -33,14 +33,14 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-4 + name: mas-admin-client-secret-ensure-5 namespace: comms spec: - backoffLimit: 2 + backoffLimit: 0 template: spec: serviceAccountName: mas-admin-client-secret-writer - restartPolicy: OnFailure + restartPolicy: Never volumes: - name: work emptyDir: {} @@ -50,7 +50,7 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -eu + set -euo pipefail umask 077 dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n' > /work/client_secret volumeMounts: @@ -62,12 +62,12 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -eu - if [ -n "$(kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null || true)" ]; then + set -euo pipefail + if kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then exit 0 fi - secret="$(cat /work/client_secret)" - payload="$(printf '{"stringData":{"client_secret":"%s"}}' "${secret}")" + secret_b64="$(base64 /work/client_secret | tr -d '\n')" + payload="$(printf '{"data":{"client_secret":"%s"}}' "${secret_b64}")" kubectl -n comms patch secret mas-admin-client --type=merge -p "${payload}" >/dev/null volumeMounts: - name: work -- 2.47.2 From 0a7410302d719edf612f7b0db033901ada29569d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:12:21 -0300 Subject: [PATCH 218/684] comms(mas): fix admin secret job permissions --- .../communication/mas-admin-client-secret-ensure-job.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml index da37915..210e729 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -33,14 +33,14 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-5 + name: mas-admin-client-secret-ensure-6 namespace: comms spec: - backoffLimit: 0 + backoffLimit: 2 template: spec: serviceAccountName: mas-admin-client-secret-writer - restartPolicy: Never + restartPolicy: OnFailure volumes: - name: work emptyDir: {} @@ -53,6 +53,7 @@ spec: set -euo pipefail umask 077 dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n' > /work/client_secret + chmod 0644 /work/client_secret volumeMounts: - name: work mountPath: /work -- 2.47.2 From 324ee3464837132345d0cf0b9fd8e9f3d0c4e6b2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:15:16 -0300 Subject: [PATCH 219/684] comms(mas): stop managing admin client secret data --- services/communication/mas-admin-client-secret.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/communication/mas-admin-client-secret.yaml b/services/communication/mas-admin-client-secret.yaml index 7f4ec19..706ec5f 100644 --- a/services/communication/mas-admin-client-secret.yaml +++ b/services/communication/mas-admin-client-secret.yaml @@ -5,5 +5,3 @@ metadata: name: mas-admin-client namespace: comms type: Opaque -data: {} - -- 2.47.2 From e1f163253b4dae900f6ad879134dccd896f023bd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:19:56 -0300 Subject: [PATCH 220/684] comms(mas): create admin client runtime secret --- services/communication/kustomization.yaml | 1 - .../mas-admin-client-secret-ensure-job.yaml | 17 +++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 7213794..2cf8b4f 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -6,7 +6,6 @@ resources: - synapse-rendered.yaml - synapse-signingkey-ensure-job.yaml - mas-configmap.yaml - - mas-admin-client-secret.yaml - mas-admin-client-secret-ensure-job.yaml - mas-deployment.yaml - mas-ingress.yaml diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/communication/mas-admin-client-secret-ensure-job.yaml index 210e729..ff8d282 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/communication/mas-admin-client-secret-ensure-job.yaml @@ -13,8 +13,11 @@ metadata: rules: - apiGroups: [""] resources: ["secrets"] - resourceNames: ["mas-admin-client"] + resourceNames: ["mas-admin-client-runtime"] verbs: ["get", "patch", "update"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding @@ -33,7 +36,7 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-6 + name: mas-admin-client-secret-ensure-7 namespace: comms spec: backoffLimit: 2 @@ -64,12 +67,18 @@ spec: args: - | set -euo pipefail - if kubectl -n comms get secret mas-admin-client -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then + if kubectl -n comms get secret mas-admin-client-runtime >/dev/null 2>&1; then + if kubectl -n comms get secret mas-admin-client-runtime -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then + exit 0 + fi + else + kubectl -n comms create secret generic mas-admin-client-runtime \ + --from-file=client_secret=/work/client_secret >/dev/null exit 0 fi secret_b64="$(base64 /work/client_secret | tr -d '\n')" payload="$(printf '{"data":{"client_secret":"%s"}}' "${secret_b64}")" - kubectl -n comms patch secret mas-admin-client --type=merge -p "${payload}" >/dev/null + kubectl -n comms patch secret mas-admin-client-runtime --type=merge -p "${payload}" >/dev/null volumeMounts: - name: work mountPath: /work -- 2.47.2 From b9dbeb98b0e99c8bb95276e3f2706126b36f220f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:20:03 -0300 Subject: [PATCH 221/684] comms(mas): drop flux-managed admin client secret --- services/communication/mas-admin-client-secret.yaml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 services/communication/mas-admin-client-secret.yaml diff --git a/services/communication/mas-admin-client-secret.yaml b/services/communication/mas-admin-client-secret.yaml deleted file mode 100644 index 706ec5f..0000000 --- a/services/communication/mas-admin-client-secret.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# services/communication/mas-admin-client-secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: mas-admin-client - namespace: comms -type: Opaque -- 2.47.2 From 32f1532508eaedc8fad1c2c0639f4005ffc0f0f5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:20:40 -0300 Subject: [PATCH 222/684] monitoring: dual-provision overview orgs --- services/monitoring/helmrelease.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 68a6a47..9634405 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -316,13 +316,21 @@ spec: apiVersion: 1 providers: - name: overview - orgId: 2 + orgId: 1 folder: Overview type: file disableDeletion: false editable: false options: path: /var/lib/grafana/dashboards/overview + - name: overview-public + orgId: 2 + folder: Overview + type: file + disableDeletion: false + editable: false + options: + path: /var/lib/grafana/dashboards/overview-public - name: pods orgId: 1 folder: Atlas Internal @@ -365,6 +373,7 @@ spec: path: /var/lib/grafana/dashboards/network dashboardsConfigMaps: overview: grafana-dashboard-overview + overview-public: grafana-dashboard-overview pods: grafana-dashboard-pods nodes: grafana-dashboard-nodes storage: grafana-dashboard-storage -- 2.47.2 From 144467dfe2a4ca83e914a9a207fd227a26dad5c9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:22:32 -0300 Subject: [PATCH 223/684] comms(mas): enable internal admin API --- services/communication/mas-configmap.yaml | 11 +++++++++-- services/communication/mas-deployment.yaml | 18 +++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/services/communication/mas-configmap.yaml b/services/communication/mas-configmap.yaml index fab43c2..ea5c33c 100644 --- a/services/communication/mas-configmap.yaml +++ b/services/communication/mas-configmap.yaml @@ -22,13 +22,18 @@ data: - name: internal resources: - name: health + - name: adminapi binds: - - host: localhost - port: 8081 + - address: "0.0.0.0:8081" database: uri: "postgresql://mas:@@MAS_DB_PASSWORD@@@postgres-service.postgres.svc.cluster.local:5432/mas?sslmode=prefer" + clients: + - client_id: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + client_auth_method: client_secret_basic + client_secret_file: /etc/mas/admin-client/client_secret + secrets: encryption_file: /etc/mas/secrets/encryption keys: @@ -72,6 +77,8 @@ data: policy: data: + admin_clients: + - 01KDXMVQBQ5JNY6SEJPZW6Z8BM client_registration: allow_insecure_uris: true allow_host_mismatch: true diff --git a/services/communication/mas-deployment.yaml b/services/communication/mas-deployment.yaml index 9643175..7034fc7 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/communication/mas-deployment.yaml @@ -14,7 +14,7 @@ spec: template: metadata: annotations: - checksum/config: v4-bind-1 + checksum/config: v5-adminapi-1 labels: app: matrix-authentication-service spec: @@ -80,6 +80,9 @@ spec: - name: http containerPort: 8080 protocol: TCP + - name: internal + containerPort: 8081 + protocol: TCP volumeMounts: - name: rendered mountPath: /rendered @@ -87,6 +90,9 @@ spec: - name: secrets mountPath: /etc/mas/secrets readOnly: true + - name: admin-client + mountPath: /etc/mas/admin-client + readOnly: true - name: keys mountPath: /etc/mas/keys readOnly: true @@ -122,6 +128,12 @@ spec: items: - key: rsa_key path: rsa_key + - name: admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret --- apiVersion: v1 kind: Service @@ -136,3 +148,7 @@ spec: port: 8080 targetPort: http protocol: TCP + - name: internal + port: 8081 + targetPort: internal + protocol: TCP -- 2.47.2 From d43e40d515f7bf0277370921f3c8c674827c2d6e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:26:50 -0300 Subject: [PATCH 224/684] comms: leave stuck rooms via MAS admin --- .../communication/bstein-force-leave-job.yaml | 286 +++++++++--------- 1 file changed, 136 insertions(+), 150 deletions(-) diff --git a/services/communication/bstein-force-leave-job.yaml b/services/communication/bstein-force-leave-job.yaml index 2052e42..5763290 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/communication/bstein-force-leave-job.yaml @@ -2,48 +2,40 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-force-leave-6 + name: bstein-leave-rooms-1 namespace: comms spec: backoffLimit: 0 template: spec: restartPolicy: Never + volumes: + - name: mas-admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret containers: - - name: force-leave + - name: leave image: python:3.11-slim + volumeMounts: + - name: mas-admin-client + mountPath: /etc/mas-admin-client + readOnly: true env: - - name: POSTGRES_HOST - value: postgres-service.postgres.svc.cluster.local - - name: POSTGRES_PORT - value: "5432" - - name: POSTGRES_DB - value: synapse - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_USER - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD + - name: MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: MAS_ADMIN_CLIENT_SECRET_FILE + value: /etc/mas-admin-client/client_secret + - name: MAS_TOKEN_URL + value: http://matrix-authentication-service:8080/oauth2/token + - name: MAS_ADMIN_API_BASE + value: http://matrix-authentication-service:8081/api/admin/v1 - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - - name: SERVER_NAME - value: live.bstein.dev - - name: SEEDER_USER - value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - - name: TARGET_USER_ID - value: "@bstein:live.bstein.dev" + - name: TARGET_USERNAME + value: bstein - name: TARGET_ROOMS value: "!OkltaJguODUnZrbcUp:live.bstein.dev,!pMKAVvSRheIOCPIjDM:live.bstein.dev" command: @@ -51,138 +43,132 @@ spec: - -c - | set -euo pipefail - pip install --no-cache-dir requests psycopg2-binary >/dev/null python - <<'PY' - import json, os, sys, urllib.parse - import requests - import psycopg2 + import base64 + import json + import os + import urllib.error + import urllib.parse + import urllib.request - DB = dict( - host=os.environ["POSTGRES_HOST"], - port=int(os.environ["POSTGRES_PORT"]), - dbname=os.environ["POSTGRES_DB"], - user=os.environ["POSTGRES_USER"], - password=os.environ["POSTGRES_PASSWORD"], - ) - - SYNAPSE_BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ["AUTH_BASE"] - SEEDER_USER = os.environ["SEEDER_USER"] - SEEDER_PASS = os.environ["SEEDER_PASS"] - TARGET_USER_ID = os.environ["TARGET_USER_ID"] + MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] + MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] + MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] + MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") + SYNAPSE_BASE = os.environ["SYNAPSE_BASE"].rstrip("/") + TARGET_USERNAME = os.environ["TARGET_USERNAME"] TARGET_ROOMS = [r.strip() for r in os.environ["TARGET_ROOMS"].split(",") if r.strip()] - def db_connect(): - return psycopg2.connect(**DB) + def http_json(method, url, *, headers=None, json_body=None, form=None, timeout=30): + req_headers = dict(headers or {}) + data = None - def db_get_admin(conn, user_id): - with conn.cursor() as cur: - cur.execute("SELECT admin FROM users WHERE name = %s", (user_id,)) - row = cur.fetchone() - if not row: - raise RuntimeError(f"user not found in synapse db: {user_id}") - # Synapse stores admin as an int (0/1) - return int(row[0]) + if json_body is not None and form is not None: + raise ValueError("choose json_body or form, not both") - def db_set_admin(conn, user_id, is_admin): - with conn.cursor() as cur: - cur.execute("UPDATE users SET admin = %s WHERE name = %s", (1 if is_admin else 0, user_id)) + if json_body is not None: + data = json.dumps(json_body).encode() + req_headers.setdefault("Content-Type", "application/json") - def login(user, password): - r = requests.post( - f"{AUTH_BASE}/_matrix/client/v3/login", - json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }, - timeout=20, - ) - if r.status_code != 200: - raise RuntimeError(f"login failed: {r.status_code} {r.text}") - return r.json()["access_token"] + if form is not None: + data = urllib.parse.urlencode(form).encode() + req_headers.setdefault("Content-Type", "application/x-www-form-urlencoded") - def whoami(token): - r = requests.get( - f"{SYNAPSE_BASE}/_matrix/client/v3/account/whoami", - headers={"Authorization": f"Bearer {token}"}, - timeout=20, - ) - r.raise_for_status() - return r.json()["user_id"] + req = urllib.request.Request(url, data=data, method=method, headers=req_headers) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode("utf-8")) if raw else None + return resp.status, payload + except urllib.error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode("utf-8")) if raw else None + except Exception: + payload = None + return e.code, payload - def admin_delete_room(token, room_id): - url = f"{SYNAPSE_BASE}/_synapse/admin/v1/rooms/{urllib.parse.quote(room_id)}" - r = requests.delete( - url, - headers={"Authorization": f"Bearer {token}"}, - json={"purge": False, "block": False}, - timeout=60, - ) - if r.status_code != 200: - raise RuntimeError(f"admin delete room failed: {room_id}: {r.status_code} {r.text}") - return r.json() + with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: + mas_admin_client_secret = f.read().strip() + if not mas_admin_client_secret: + raise RuntimeError("MAS admin client secret file is empty") - def admin_joined_rooms(token, user_id): - url = f"{SYNAPSE_BASE}/_synapse/admin/v1/users/{urllib.parse.quote(user_id)}/joined_rooms" - r = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=20) - if r.status_code != 200: - raise RuntimeError(f"admin joined_rooms failed: {r.status_code} {r.text}") - return r.json().get("joined_rooms", []) + basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{mas_admin_client_secret}".encode()).decode() + token_status, token_payload = http_json( + "POST", + MAS_TOKEN_URL, + headers={"Authorization": f"Basic {basic}"}, + form={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, + timeout=30, + ) + if token_status != 200 or not token_payload or "access_token" not in token_payload: + raise RuntimeError(f"MAS admin token request failed (HTTP {token_status})") + mas_admin_token = token_payload["access_token"] - results = {"target_user_id": TARGET_USER_ID, "rooms": {}} + user_status, user_payload = http_json( + "GET", + f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(TARGET_USERNAME)}", + headers={"Authorization": f"Bearer {mas_admin_token}"}, + timeout=30, + ) + if user_status != 200 or not user_payload or "data" not in user_payload or "id" not in user_payload["data"]: + raise RuntimeError(f"MAS user lookup failed (HTTP {user_status})") + actor_user_id = user_payload["data"]["id"] + + sess_status, sess_payload = http_json( + "POST", + f"{MAS_ADMIN_API_BASE}/personal-sessions", + headers={"Authorization": f"Bearer {mas_admin_token}"}, + json_body={ + "actor_user_id": actor_user_id, + "human_name": "bstein room cleanup", + "scope": "urn:matrix:client:api:*", + "expires_in": 300, + }, + timeout=30, + ) + if sess_status != 201 or not sess_payload or "data" not in sess_payload: + raise RuntimeError(f"MAS personal session create failed (HTTP {sess_status})") + + personal_session_id = sess_payload["data"]["id"] + personal_token = (sess_payload.get("data", {}).get("attributes", {}) or {}).get("access_token") + if not personal_token: + raise RuntimeError("MAS personal session did not return an access token") + + results = {"rooms": {}, "revoke": None} + failures = [] - conn = db_connect() - conn.autocommit = False try: - try: - token = login(SEEDER_USER, SEEDER_PASS) - results["seeder_login"] = "ok" - except Exception as e: - results["seeder_login"] = "error" - results["seeder_login_error"] = str(e) - print(json.dumps(results, indent=2, sort_keys=True)) - raise - - try: - seeder_user_id = whoami(token) - results["seeder_user_id"] = seeder_user_id - except Exception as e: - results["seeder_user_id_error"] = str(e) - seeder_user_id = None - - if seeder_user_id: - try: - results["seeder_admin_db"] = db_get_admin(conn, seeder_user_id) - except Exception as e: - results["seeder_admin_db_error"] = str(e) - - if results.get("seeder_admin_db") == 0: - try: - db_set_admin(conn, seeder_user_id, True) - conn.commit() - results["seeder_admin_db_promoted"] = True - except Exception as e: - results["seeder_admin_db_promote_error"] = str(e) - else: - results["seeder_admin_db_promoted"] = False - for room_id in TARGET_ROOMS: - room_res = {} - results["rooms"][room_id] = room_res - try: - room_res["delete"] = admin_delete_room(token, room_id) - room_res["deleted"] = True - except Exception as e: - room_res["deleted"] = False - room_res["delete_error"] = str(e) - - try: - results["target_joined_rooms_after"] = admin_joined_rooms(token, TARGET_USER_ID) - except Exception as e: - results["target_joined_rooms_after_error"] = str(e) - - print(json.dumps(results, indent=2, sort_keys=True)) + room_q = urllib.parse.quote(room_id, safe="") + leave_status, _ = http_json( + "POST", + f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{room_q}/leave", + headers={"Authorization": f"Bearer {personal_token}"}, + json_body={}, + timeout=30, + ) + forget_status, _ = http_json( + "POST", + f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{room_q}/forget", + headers={"Authorization": f"Bearer {personal_token}"}, + json_body={}, + timeout=30, + ) + results["rooms"][room_id] = {"leave": leave_status, "forget": forget_status} + if leave_status != 200 or forget_status != 200: + failures.append(room_id) finally: - conn.close() + revoke_status, _ = http_json( + "POST", + f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(personal_session_id)}/revoke", + headers={"Authorization": f"Bearer {mas_admin_token}"}, + json_body={}, + timeout=30, + ) + results["revoke"] = revoke_status + + print(json.dumps(results, indent=2, sort_keys=True)) + if failures: + raise SystemExit(f"failed to leave/forget rooms: {', '.join(failures)}") PY -- 2.47.2 From ce6537a155d2a521db93e62fc3fa0717598d74e3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 18:44:47 -0300 Subject: [PATCH 225/684] comms(synapse): enable MSC4108 QR login --- services/communication/synapse-rendered.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index d4dc341..22b68cf 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -339,6 +339,7 @@ data: default_room_version: "11" experimental_features: msc3266_enabled: true + msc4108_enabled: true msc4143_enabled: true msc4222_enabled: true max_event_delay_duration: 24h @@ -677,7 +678,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-5 + checksum/config: manual-rtc-enable-6 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From 41d81ee41a075efca49e9ac44dc93410b0577a71 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 00:43:28 +0000 Subject: [PATCH 226/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 1b43a1e..1befa9d 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-29 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-30 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 335ead9df58e8202284a1f9e21884940c32eaba3 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 00:44:39 +0000 Subject: [PATCH 227/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 21f74ba..487468f 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-29 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-30 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 592539e2d3735889d398d029fe6333bf50190aa8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 21:45:33 -0300 Subject: [PATCH 228/684] bstein-dev-home: enable Keycloak portal --- .../bstein-dev-home/backend-deployment.yaml | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 487468f..a66e02b 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -40,6 +40,29 @@ spec: - name: AI_NODE_GPU_MAP value: | {"titan-20": "Jetson Xavier (edge GPU)", "titan-21": "Jetson Xavier (edge GPU)", "titan-22": "RTX 3050 8GB (local GPU)", "titan-24": "RTX 3080 8GB (local GPU)"} + - name: KEYCLOAK_ENABLED + value: "true" + - name: KEYCLOAK_URL + value: https://sso.bstein.dev + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_CLIENT_ID + value: bstein-dev-home + - name: KEYCLOAK_ISSUER + value: https://sso.bstein.dev/realms/atlas + - name: KEYCLOAK_JWKS_URL + value: http://keycloak.sso.svc.cluster.local/realms/atlas/protocol/openid-connect/certs + - name: KEYCLOAK_ADMIN_URL + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_ADMIN_REALM + value: atlas + - name: KEYCLOAK_ADMIN_CLIENT_ID + value: bstein-dev-home-admin + - name: KEYCLOAK_ADMIN_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: bstein-dev-home-keycloak-admin + key: client_secret ports: - name: http containerPort: 8080 -- 2.47.2 From 77761c1e42de7784a8b5330078f0d4a55927d1fd Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 00:57:28 +0000 Subject: [PATCH 229/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 1befa9d..ee8d5b6 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-30 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-31 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 157c036371b522d5c2125e4bc0a0d424c48ffba6 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 00:58:40 +0000 Subject: [PATCH 230/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a66e02b..6faae73 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-30 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-31 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From d431b0411414140c00c900434b69aa3bf7b72a56 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 01:19:29 +0000 Subject: [PATCH 231/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index ee8d5b6..c66db7a 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-31 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-32 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 58d14f1cb6c1248bcf8bcfd8b3684593dee8bdec Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 01:20:41 +0000 Subject: [PATCH 232/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 6faae73..0095af9 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-31 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-32 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From a56235f391dbcb00fcc10cdca6b1369631e51fe3 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 02:22:34 +0000 Subject: [PATCH 233/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index c66db7a..3e52167 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-32 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-33 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 90b071566aac9b0d45ab25efa930931cb7ef0c0c Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 02:23:46 +0000 Subject: [PATCH 234/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 0095af9..e82bc95 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-32 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-33 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From edd2189f3c0863066d0826ac8dad2a809b866bf1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 1 Jan 2026 23:24:11 -0300 Subject: [PATCH 235/684] nextcloud: make mail sync idempotent --- scripts/nextcloud-mail-sync.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 8cfbf64..816b56a 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -15,7 +15,14 @@ account_exists() { local email="${2}" # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). - /usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>/dev/null | grep -Fq -- "- E-Mail: ${email}" + local export + if ! export=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>/dev/null); then + echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2 + return 0 + fi + + # Output formatting varies by Nextcloud/Mail versions and locale; match by email address. + grep -Fq -- "${email}" <<<"${export}" } token=$( -- 2.47.2 From de8721cbaa920f0555012e6ac7a83e1206443d00 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 02:45:36 +0000 Subject: [PATCH 236/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 3e52167..0e8222d 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-33 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-34 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From a30df479aa0c91edaaa5ef4f60e130bc84af11ec Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 02:46:47 +0000 Subject: [PATCH 237/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index e82bc95..61005e7 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-33 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-34 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 1995ba7ec971ece2c572c827724e85cf3ae1e849 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 00:42:02 -0300 Subject: [PATCH 238/684] bstein-dev-home: add portal db + relax account gating --- services/bstein-dev-home/backend-deployment.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 61005e7..fb458b3 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -63,6 +63,13 @@ spec: secretKeyRef: name: bstein-dev-home-keycloak-admin key: client_secret + - name: ACCOUNT_ALLOWED_GROUPS + value: "" + - name: PORTAL_DATABASE_URL + valueFrom: + secretKeyRef: + name: atlas-portal-db + key: PORTAL_DATABASE_URL ports: - name: http containerPort: 8080 -- 2.47.2 From 04c5ee91a00e07d7a54a67d4a5dacac8497bea3a Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 03:47:40 +0000 Subject: [PATCH 239/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 0e8222d..bcee056 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-34 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-35 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ee9fa7fd36a53adc4b608d57e80379d901939cbe Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 03:48:52 +0000 Subject: [PATCH 240/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index fb458b3..4ae4ae4 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-34 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-35 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 3ab4c866ea0393a7f7b0ced7f7669a638b644b3f Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 03:58:41 +0000 Subject: [PATCH 241/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index bcee056..d94990d 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-35 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-36 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 6eb3ca1fce729a38f1f50169a87ec12db4780842 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 03:59:52 +0000 Subject: [PATCH 242/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 4ae4ae4..0af5d0c 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-35 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-36 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 7a97aa257bad9c1d37b3494d4afe44f6c2fbdf82 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 01:12:35 -0300 Subject: [PATCH 243/684] services: scaffold postgres and vaultwarden manifests --- services/postgres/kustomization.yaml | 8 +++ services/postgres/namespace.yaml | 5 ++ services/postgres/service.yaml | 15 ++++++ services/postgres/statefulset.yaml | 68 +++++++++++++++++++++++++ services/vaultwarden/deployment.yaml | 43 ++++++++++++++++ services/vaultwarden/ingress.yaml | 28 ++++++++++ services/vaultwarden/kustomization.yaml | 10 ++++ services/vaultwarden/namespace.yaml | 5 ++ services/vaultwarden/pvc.yaml | 12 +++++ services/vaultwarden/service.yaml | 15 ++++++ 10 files changed, 209 insertions(+) create mode 100644 services/postgres/kustomization.yaml create mode 100644 services/postgres/namespace.yaml create mode 100644 services/postgres/service.yaml create mode 100644 services/postgres/statefulset.yaml create mode 100644 services/vaultwarden/deployment.yaml create mode 100644 services/vaultwarden/ingress.yaml create mode 100644 services/vaultwarden/kustomization.yaml create mode 100644 services/vaultwarden/namespace.yaml create mode 100644 services/vaultwarden/pvc.yaml create mode 100644 services/vaultwarden/service.yaml diff --git a/services/postgres/kustomization.yaml b/services/postgres/kustomization.yaml new file mode 100644 index 0000000..1d7c8c0 --- /dev/null +++ b/services/postgres/kustomization.yaml @@ -0,0 +1,8 @@ +# services/postgres/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: postgres +resources: + - namespace.yaml + - service.yaml + - statefulset.yaml diff --git a/services/postgres/namespace.yaml b/services/postgres/namespace.yaml new file mode 100644 index 0000000..c5503ce --- /dev/null +++ b/services/postgres/namespace.yaml @@ -0,0 +1,5 @@ +# services/postgres/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: postgres diff --git a/services/postgres/service.yaml b/services/postgres/service.yaml new file mode 100644 index 0000000..52c4656 --- /dev/null +++ b/services/postgres/service.yaml @@ -0,0 +1,15 @@ +# services/postgres/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: postgres-service + namespace: postgres +spec: + clusterIP: None + ports: + - name: postgres + port: 5432 + protocol: TCP + targetPort: 5432 + selector: + app: postgres diff --git a/services/postgres/statefulset.yaml b/services/postgres/statefulset.yaml new file mode 100644 index 0000000..014567b --- /dev/null +++ b/services/postgres/statefulset.yaml @@ -0,0 +1,68 @@ +# services/postgres/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres + namespace: postgres + labels: + app: postgres +spec: + serviceName: postgres-service + replicas: 1 + selector: + matchLabels: + app: postgres + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: postgres + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: In + values: ["true"] + - key: hardware + operator: In + values: ["rpi4", "rpi5"] + containers: + - name: postgres + image: postgres:15 + ports: + - name: postgres + containerPort: 5432 + protocol: TCP + env: + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + - name: POSTGRES_USER + value: postgres + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-auth + key: POSTGRES_PASSWORD + - name: POSTGRES_DB + value: postgres + volumeMounts: + - name: postgres-data + mountPath: /var/lib/postgresql/data + volumeClaimTemplates: + - metadata: + name: postgres-data + spec: + accessModes: ["ReadWriteOnce"] + storageClassName: astreae + resources: + requests: + storage: 100Gi diff --git a/services/vaultwarden/deployment.yaml b/services/vaultwarden/deployment.yaml new file mode 100644 index 0000000..175cbca --- /dev/null +++ b/services/vaultwarden/deployment.yaml @@ -0,0 +1,43 @@ +# services/vaultwarden/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vaultwarden + namespace: vaultwarden +spec: + replicas: 1 + selector: + matchLabels: + app: vaultwarden + template: + metadata: + labels: + app: vaultwarden + spec: + containers: + - name: vaultwarden + image: vaultwarden/server:1.33.2 + env: + - name: SIGNUPS_ALLOWED + value: "true" + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: vaultwarden-db-url + key: DATABASE_URL + - name: ADMIN_TOKEN + valueFrom: + secretKeyRef: + name: vaultwarden-admin + key: ADMIN_TOKEN + ports: + - name: http + containerPort: 80 + protocol: TCP + volumeMounts: + - name: vaultwarden-data + mountPath: /data + volumes: + - name: vaultwarden-data + persistentVolumeClaim: + claimName: vaultwarden-data diff --git a/services/vaultwarden/ingress.yaml b/services/vaultwarden/ingress.yaml new file mode 100644 index 0000000..2eaa991 --- /dev/null +++ b/services/vaultwarden/ingress.yaml @@ -0,0 +1,28 @@ +# services/vaultwarden/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: vaultwarden-ingress + namespace: vaultwarden + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt-prod + cert-manager.io/cluster-issuer: letsencrypt-prod +spec: + ingressClassName: traefik + rules: + - host: vault.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: vaultwarden-service + port: + number: 80 + tls: + - hosts: + - vault.bstein.dev + secretName: vaultwarden-tls diff --git a/services/vaultwarden/kustomization.yaml b/services/vaultwarden/kustomization.yaml new file mode 100644 index 0000000..f0d02fd --- /dev/null +++ b/services/vaultwarden/kustomization.yaml @@ -0,0 +1,10 @@ +# services/vaultwarden/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: vaultwarden +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - service.yaml + - ingress.yaml diff --git a/services/vaultwarden/namespace.yaml b/services/vaultwarden/namespace.yaml new file mode 100644 index 0000000..2e97e87 --- /dev/null +++ b/services/vaultwarden/namespace.yaml @@ -0,0 +1,5 @@ +# services/vaultwarden/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: vaultwarden diff --git a/services/vaultwarden/pvc.yaml b/services/vaultwarden/pvc.yaml new file mode 100644 index 0000000..b4e0617 --- /dev/null +++ b/services/vaultwarden/pvc.yaml @@ -0,0 +1,12 @@ +# services/vaultwarden/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: vaultwarden-data + namespace: vaultwarden +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: astreae + resources: + requests: + storage: 100Gi diff --git a/services/vaultwarden/service.yaml b/services/vaultwarden/service.yaml new file mode 100644 index 0000000..7cc05a0 --- /dev/null +++ b/services/vaultwarden/service.yaml @@ -0,0 +1,15 @@ +# services/vaultwarden/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: vaultwarden-service + namespace: vaultwarden +spec: + type: ClusterIP + selector: + app: vaultwarden + ports: + - name: http + port: 80 + protocol: TCP + targetPort: http -- 2.47.2 From 616c82807eb780eddc901a54d05f7f4a417d09d2 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 04:38:44 +0000 Subject: [PATCH 244/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index d94990d..a540f20 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-36 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-37 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 0d79c4bcdcf630872078632865eae84ec8c52a90 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 04:39:56 +0000 Subject: [PATCH 245/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 0af5d0c..b8f26a2 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-36 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-37 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 20ad6a76ca5d97c90dca5e2ed6c93eb7a0ddb06b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 04:51:45 +0000 Subject: [PATCH 246/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index a540f20..7e018e5 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-37 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-38 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 9daf8b345a624eb449a93ab4f4da7ab6e06535ac Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 04:52:56 +0000 Subject: [PATCH 247/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b8f26a2..d803c3d 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-37 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-38 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From b95eab58762fb62eac0d02ee94c58a4dc3c21108 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 02:53:55 -0300 Subject: [PATCH 248/684] mailu: add wait-mode sync endpoint Also bump portal timeouts and relax access request rate limits. --- .../bstein-dev-home/backend-deployment.yaml | 10 +++ services/mailu/mailu-sync-listener.yaml | 63 ++++++++++++++++--- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index d803c3d..6bbff8a 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -70,6 +70,16 @@ spec: secretKeyRef: name: atlas-portal-db key: PORTAL_DATABASE_URL + - name: HTTP_CHECK_TIMEOUT_SEC + value: "10" + - name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT + value: "30" + - name: ACCESS_REQUEST_SUBMIT_RATE_WINDOW_SEC + value: "3600" + - name: ACCESS_REQUEST_STATUS_RATE_LIMIT + value: "120" + - name: ACCESS_REQUEST_STATUS_RATE_WINDOW_SEC + value: "60" ports: - name: http containerPort: 8080 diff --git a/services/mailu/mailu-sync-listener.yaml b/services/mailu/mailu-sync-listener.yaml index 04e8070..4d70716 100644 --- a/services/mailu/mailu-sync-listener.yaml +++ b/services/mailu/mailu-sync-listener.yaml @@ -120,28 +120,75 @@ data: MIN_INTERVAL_SECONDS = 10 last_run = 0.0 lock = threading.Lock() + sync_done = threading.Event() + sync_done.set() + sync_running = False - def trigger_sync(): - global last_run + def _run_sync_blocking() -> int: + global last_run, sync_running + with lock: + if sync_running: + return 0 + sync_running = True + sync_done.clear() + + try: + print("mailu-sync-listener: starting sync", flush=True) + proc = subprocess.run(["python", "/app/sync.py"], check=False) + rc = int(proc.returncode) + print(f"mailu-sync-listener: sync completed rc={rc}", flush=True) + return rc + finally: + with lock: + sync_running = False + last_run = time() + sync_done.set() + + def _trigger_sync_async() -> bool: with lock: now = time() + if sync_running: + return False if now - last_run < MIN_INTERVAL_SECONDS: - return - last_run = now - # Fire and forget; output to stdout - subprocess.Popen(["python", "/app/sync.py"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + return False + + thread = threading.Thread(target=_run_sync_blocking, daemon=True) + thread.start() + return True class Handler(http.server.BaseHTTPRequestHandler): def do_POST(self): length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(length) if length else b"" try: - json.loads(body or b"{}") + payload = json.loads(body or b"{}") except json.JSONDecodeError: self.send_response(400) self.end_headers() return - trigger_sync() + + wait = False + if isinstance(payload, dict): + wait = bool(payload.get("wait")) + + if wait: + # If a sync is already running, wait for it to complete. + with lock: + already_running = sync_running + if already_running: + sync_done.wait(timeout=120) + with lock: + still_running = sync_running + self.send_response(200 if not still_running else 503) + self.end_headers() + return + + rc = _run_sync_blocking() + self.send_response(200 if rc == 0 else 500) + self.end_headers() + return + + _trigger_sync_async() self.send_response(202) self.end_headers() -- 2.47.2 From b7e34865fe23003356445959d9a12b85941a056e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 02:57:18 -0300 Subject: [PATCH 249/684] mailu: roll listener on script changes Generate mailu-sync-listener ConfigMap from scripts/ and enable name-suffix hashing to trigger Deployment rollout. --- scripts/mailu_sync_listener.py | 93 +++++++++++++++++++++++ services/mailu/kustomization.yaml | 4 + services/mailu/mailu-sync-listener.yaml | 99 ------------------------- 3 files changed, 97 insertions(+), 99 deletions(-) create mode 100644 scripts/mailu_sync_listener.py diff --git a/scripts/mailu_sync_listener.py b/scripts/mailu_sync_listener.py new file mode 100644 index 0000000..27070c0 --- /dev/null +++ b/scripts/mailu_sync_listener.py @@ -0,0 +1,93 @@ +import http.server +import json +import subprocess +import threading + +from time import time + +# Simple debounce to avoid hammering on bursts +MIN_INTERVAL_SECONDS = 10 +last_run = 0.0 +lock = threading.Lock() +sync_done = threading.Event() +sync_done.set() +sync_running = False + + +def _run_sync_blocking() -> int: + global last_run, sync_running + with lock: + if sync_running: + return 0 + sync_running = True + sync_done.clear() + + try: + print("mailu-sync-listener: starting sync", flush=True) + proc = subprocess.run(["python", "/app/sync.py"], check=False) + rc = int(proc.returncode) + print(f"mailu-sync-listener: sync completed rc={rc}", flush=True) + return rc + finally: + with lock: + sync_running = False + last_run = time() + sync_done.set() + + +def _trigger_sync_async() -> bool: + with lock: + now = time() + if sync_running: + return False + if now - last_run < MIN_INTERVAL_SECONDS: + return False + + thread = threading.Thread(target=_run_sync_blocking, daemon=True) + thread.start() + return True + + +class Handler(http.server.BaseHTTPRequestHandler): + def do_POST(self): + length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(length) if length else b"" + try: + payload = json.loads(body or b"{}") + except json.JSONDecodeError: + self.send_response(400) + self.end_headers() + return + + wait = False + if isinstance(payload, dict): + wait = bool(payload.get("wait")) + + if wait: + with lock: + already_running = sync_running + if already_running: + sync_done.wait(timeout=120) + with lock: + still_running = sync_running + self.send_response(200 if not still_running else 503) + self.end_headers() + return + + rc = _run_sync_blocking() + self.send_response(200 if rc == 0 else 500) + self.end_headers() + return + + _trigger_sync_async() + self.send_response(202) + self.end_headers() + + def log_message(self, fmt, *args): + # Quiet logging + return + + +if __name__ == "__main__": + server = http.server.ThreadingHTTPServer(("", 8080), Handler) + server.serve_forever() diff --git a/services/mailu/kustomization.yaml b/services/mailu/kustomization.yaml index a23e0b1..9e9359b 100644 --- a/services/mailu/kustomization.yaml +++ b/services/mailu/kustomization.yaml @@ -22,3 +22,7 @@ configMapGenerator: - sync.py=../../scripts/mailu_sync.py options: disableNameSuffixHash: true + - name: mailu-sync-listener + namespace: mailu-mailserver + files: + - listener.py=../../scripts/mailu_sync_listener.py diff --git a/services/mailu/mailu-sync-listener.yaml b/services/mailu/mailu-sync-listener.yaml index 4d70716..2127313 100644 --- a/services/mailu/mailu-sync-listener.yaml +++ b/services/mailu/mailu-sync-listener.yaml @@ -100,102 +100,3 @@ spec: configMap: name: mailu-sync-listener defaultMode: 0444 ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: mailu-sync-listener - namespace: mailu-mailserver -data: - listener.py: | - import http.server - import json - import os - import subprocess - import threading - - from time import time - - # Simple debounce to avoid hammering on bursts - MIN_INTERVAL_SECONDS = 10 - last_run = 0.0 - lock = threading.Lock() - sync_done = threading.Event() - sync_done.set() - sync_running = False - - def _run_sync_blocking() -> int: - global last_run, sync_running - with lock: - if sync_running: - return 0 - sync_running = True - sync_done.clear() - - try: - print("mailu-sync-listener: starting sync", flush=True) - proc = subprocess.run(["python", "/app/sync.py"], check=False) - rc = int(proc.returncode) - print(f"mailu-sync-listener: sync completed rc={rc}", flush=True) - return rc - finally: - with lock: - sync_running = False - last_run = time() - sync_done.set() - - def _trigger_sync_async() -> bool: - with lock: - now = time() - if sync_running: - return False - if now - last_run < MIN_INTERVAL_SECONDS: - return False - - thread = threading.Thread(target=_run_sync_blocking, daemon=True) - thread.start() - return True - - class Handler(http.server.BaseHTTPRequestHandler): - def do_POST(self): - length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(length) if length else b"" - try: - payload = json.loads(body or b"{}") - except json.JSONDecodeError: - self.send_response(400) - self.end_headers() - return - - wait = False - if isinstance(payload, dict): - wait = bool(payload.get("wait")) - - if wait: - # If a sync is already running, wait for it to complete. - with lock: - already_running = sync_running - if already_running: - sync_done.wait(timeout=120) - with lock: - still_running = sync_running - self.send_response(200 if not still_running else 503) - self.end_headers() - return - - rc = _run_sync_blocking() - self.send_response(200 if rc == 0 else 500) - self.end_headers() - return - - _trigger_sync_async() - self.send_response(202) - self.end_headers() - - def log_message(self, fmt, *args): - # Quiet logging - return - - if __name__ == "__main__": - server = http.server.ThreadingHTTPServer(("", 8080), Handler) - server.serve_forever() -- 2.47.2 From 05216a972f3d7d1d37534d764dfd5824dec3eb84 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 05:58:49 +0000 Subject: [PATCH 250/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 7e018e5..c66ba0e 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-38 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-39 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 26f11db28590711f9bcab2ecfeb4e8778e696d5d Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 06:00:01 +0000 Subject: [PATCH 251/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 6bbff8a..54852b0 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-38 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-39 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 5f7ea4544d67ae2def325f6180ad22dd00e3a139 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 03:09:26 -0300 Subject: [PATCH 252/684] mailu: store app password as list --- scripts/mailu_sync.py | 2 +- scripts/nextcloud-mail-sync.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mailu_sync.py b/scripts/mailu_sync.py index ee8aa18..4d08a62 100644 --- a/scripts/mailu_sync.py +++ b/scripts/mailu_sync.py @@ -181,7 +181,7 @@ def main(): if not app_pw: app_pw = random_password() - attrs["mailu_app_password"] = app_pw + attrs["mailu_app_password"] = [app_pw] kc_update_attributes(token, user, attrs) log(f"Set mailu_app_password for {email}") diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 816b56a..4476e7f 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -46,7 +46,7 @@ users=$(curl -s -H "Authorization: Bearer ${token}" \ echo "${users}" | jq -c '.[]' | while read -r user; do username=$(echo "${user}" | jq -r '.username') email=$(echo "${user}" | jq -r '.email // empty') - app_pw=$(echo "${user}" | jq -r '.attributes.mailu_app_password[0] // empty') + app_pw=$(echo "${user}" | jq -r '(.attributes.mailu_app_password[0] // .attributes.mailu_app_password // empty)') [[ -z "${email}" || -z "${app_pw}" ]] && continue if account_exists "${username}" "${email}"; then echo "Skipping ${email}, already exists" -- 2.47.2 From d46d411154c196e491a0db17334fac7f25126fba Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 06:15:51 +0000 Subject: [PATCH 253/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index c66ba0e..da4d2b9 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-39 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-40 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 89228d2d5e7931ec4c0de2d1df75faf4b2761dcc Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 06:17:02 +0000 Subject: [PATCH 254/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 54852b0..b714f69 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-39 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-40 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 7e464d3ec8a45206dc8042fddd271fa626c4b092 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 03:38:50 -0300 Subject: [PATCH 255/684] keycloak: enable reset password --- services/keycloak/kustomization.yaml | 1 + services/keycloak/realm-settings-job.yaml | 141 ++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 services/keycloak/realm-settings-job.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index a65715c..1e5fd38 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -6,5 +6,6 @@ resources: - namespace.yaml - pvc.yaml - deployment.yaml + - realm-settings-job.yaml - service.yaml - ingress.yaml diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml new file mode 100644 index 0000000..988a747 --- /dev/null +++ b/services/keycloak/realm-settings-job.yaml @@ -0,0 +1,141 @@ +# services/keycloak/realm-settings-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-realm-settings-1 + namespace: sso +spec: + backoffLimit: 2 + template: + spec: + restartPolicy: OnFailure + containers: + - name: configure + image: python:3.11-alpine + env: + - name: KEYCLOAK_URL + value: http://keycloak.sso.svc.cluster.local:8080 + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_USER + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + - name: KEYCLOAK_SMTP_HOST + value: mailu-front.mailu-mailserver.svc.cluster.local + - name: KEYCLOAK_SMTP_PORT + value: "25" + - name: KEYCLOAK_SMTP_FROM + value: no-reply@bstein.dev + - name: KEYCLOAK_SMTP_FROM_NAME + value: Atlas SSO + - name: KEYCLOAK_SMTP_REPLY_TO + value: no-reply@bstein.dev + - name: KEYCLOAK_SMTP_REPLY_TO_NAME + value: Atlas SSO + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import json + import os + import time + import urllib.error + import urllib.parse + import urllib.request + + base_url = os.environ["KEYCLOAK_URL"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + + smtp_defaults = { + "host": os.environ["KEYCLOAK_SMTP_HOST"], + "port": os.environ["KEYCLOAK_SMTP_PORT"], + "from": os.environ["KEYCLOAK_SMTP_FROM"], + "fromDisplayName": os.environ["KEYCLOAK_SMTP_FROM_NAME"], + "replyTo": os.environ["KEYCLOAK_SMTP_REPLY_TO"], + "replyToDisplayName": os.environ["KEYCLOAK_SMTP_REPLY_TO_NAME"], + "auth": "false", + "starttls": "false", + "ssl": "false", + } + + def request(path, method="GET", data=None, headers=None): + if headers is None: + headers = {} + payload = None + if data is not None: + payload = json.dumps(data).encode() + headers = dict(headers) + headers["Content-Type"] = "application/json" + req = urllib.request.Request( + f"{base_url}{path}", + data=payload, + headers=headers, + method=method, + ) + return urllib.request.urlopen(req, timeout=10) + + for _ in range(60): + try: + with request("/health/ready") as resp: + if resp.status == 200: + break + except Exception: + time.sleep(2) + else: + raise SystemExit("Keycloak API did not become ready in time") + + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + token_req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + with urllib.request.urlopen(token_req, timeout=10) as resp: + token_body = json.loads(resp.read().decode()) + access_token = token_body["access_token"] + auth_headers = {"Authorization": f"Bearer {access_token}"} + + with request(f"/admin/realms/{realm}", headers=auth_headers) as resp: + realm_data = json.loads(resp.read().decode()) + + changed = False + if not realm_data.get("resetPasswordAllowed", False): + realm_data["resetPasswordAllowed"] = True + changed = True + + smtp = realm_data.get("smtpServer") or {} + if not smtp.get("host"): + smtp.update(smtp_defaults) + realm_data["smtpServer"] = smtp + changed = True + + if not changed: + raise SystemExit(0) + + with request( + f"/admin/realms/{realm}", + method="PUT", + data=realm_data, + headers=auth_headers, + ) as resp: + if resp.status not in (200, 204): + raise SystemExit(f"Unexpected response: {resp.status}") + PY -- 2.47.2 From 2ef3b7d45c7ec0a55b6b8d25353b7fceb066feb3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 03:45:44 -0300 Subject: [PATCH 256/684] keycloak: pin realm job to rpi nodes --- services/keycloak/realm-settings-job.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 988a747..cbd971e 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,12 +2,22 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-1 + name: keycloak-realm-settings-2 namespace: sso spec: backoffLimit: 2 template: spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + - key: node-role.kubernetes.io/worker + operator: Exists restartPolicy: OnFailure containers: - name: configure -- 2.47.2 From 816abca2df0c0d7418186a2c9e36505ab95ce83a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 03:49:19 -0300 Subject: [PATCH 257/684] keycloak: fix realm job service URL --- services/keycloak/realm-settings-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index cbd971e..2952e1c 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-2 + name: keycloak-realm-settings-3 namespace: sso spec: backoffLimit: 2 @@ -24,7 +24,7 @@ spec: image: python:3.11-alpine env: - name: KEYCLOAK_URL - value: http://keycloak.sso.svc.cluster.local:8080 + value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - name: KEYCLOAK_ADMIN_USER -- 2.47.2 From 0736c4255ef93f996f550532562cc6897b95bcee Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 06:53:54 +0000 Subject: [PATCH 258/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index da4d2b9..30a72e5 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-40 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-41 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 6a155a7a7abd0aa44e7d192f106c9eff9ea99b93 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 06:55:05 +0000 Subject: [PATCH 259/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b714f69..746ff7d 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-40 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-41 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 77beacec539d9960e3b0e84be82201642fcf2f56 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 03:55:08 -0300 Subject: [PATCH 260/684] keycloak: switch realm job to kcadm --- services/keycloak/realm-settings-job.yaml | 118 ++++------------------ 1 file changed, 19 insertions(+), 99 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 2952e1c..22843cb 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-3 + name: keycloak-realm-settings-4 namespace: sso spec: backoffLimit: 2 @@ -21,9 +21,9 @@ spec: restartPolicy: OnFailure containers: - name: configure - image: python:3.11-alpine + image: quay.io/keycloak/keycloak:26.0.7 env: - - name: KEYCLOAK_URL + - name: KEYCLOAK_SERVER value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas @@ -53,99 +53,19 @@ spec: args: - | set -euo pipefail - python - <<'PY' - import json - import os - import time - import urllib.error - import urllib.parse - import urllib.request - - base_url = os.environ["KEYCLOAK_URL"].rstrip("/") - realm = os.environ["KEYCLOAK_REALM"] - admin_user = os.environ["KEYCLOAK_ADMIN_USER"] - admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] - - smtp_defaults = { - "host": os.environ["KEYCLOAK_SMTP_HOST"], - "port": os.environ["KEYCLOAK_SMTP_PORT"], - "from": os.environ["KEYCLOAK_SMTP_FROM"], - "fromDisplayName": os.environ["KEYCLOAK_SMTP_FROM_NAME"], - "replyTo": os.environ["KEYCLOAK_SMTP_REPLY_TO"], - "replyToDisplayName": os.environ["KEYCLOAK_SMTP_REPLY_TO_NAME"], - "auth": "false", - "starttls": "false", - "ssl": "false", - } - - def request(path, method="GET", data=None, headers=None): - if headers is None: - headers = {} - payload = None - if data is not None: - payload = json.dumps(data).encode() - headers = dict(headers) - headers["Content-Type"] = "application/json" - req = urllib.request.Request( - f"{base_url}{path}", - data=payload, - headers=headers, - method=method, - ) - return urllib.request.urlopen(req, timeout=10) - - for _ in range(60): - try: - with request("/health/ready") as resp: - if resp.status == 200: - break - except Exception: - time.sleep(2) - else: - raise SystemExit("Keycloak API did not become ready in time") - - token_data = urllib.parse.urlencode( - { - "grant_type": "password", - "client_id": "admin-cli", - "username": admin_user, - "password": admin_password, - } - ).encode() - token_req = urllib.request.Request( - f"{base_url}/realms/master/protocol/openid-connect/token", - data=token_data, - headers={"Content-Type": "application/x-www-form-urlencoded"}, - method="POST", - ) - with urllib.request.urlopen(token_req, timeout=10) as resp: - token_body = json.loads(resp.read().decode()) - access_token = token_body["access_token"] - auth_headers = {"Authorization": f"Bearer {access_token}"} - - with request(f"/admin/realms/{realm}", headers=auth_headers) as resp: - realm_data = json.loads(resp.read().decode()) - - changed = False - if not realm_data.get("resetPasswordAllowed", False): - realm_data["resetPasswordAllowed"] = True - changed = True - - smtp = realm_data.get("smtpServer") or {} - if not smtp.get("host"): - smtp.update(smtp_defaults) - realm_data["smtpServer"] = smtp - changed = True - - if not changed: - raise SystemExit(0) - - with request( - f"/admin/realms/{realm}", - method="PUT", - data=realm_data, - headers=auth_headers, - ) as resp: - if resp.status not in (200, 204): - raise SystemExit(f"Unexpected response: {resp.status}") - PY + /opt/keycloak/bin/kcadm.sh config credentials \ + --server "${KEYCLOAK_SERVER}" \ + --realm master \ + --user "${KEYCLOAK_ADMIN_USER}" \ + --password "${KEYCLOAK_ADMIN_PASSWORD}" + /opt/keycloak/bin/kcadm.sh update "realms/${KEYCLOAK_REALM}" \ + -s resetPasswordAllowed=true \ + -s "smtpServer.host=${KEYCLOAK_SMTP_HOST}" \ + -s "smtpServer.port=${KEYCLOAK_SMTP_PORT}" \ + -s "smtpServer.from=${KEYCLOAK_SMTP_FROM}" \ + -s "smtpServer.fromDisplayName=${KEYCLOAK_SMTP_FROM_NAME}" \ + -s "smtpServer.replyTo=${KEYCLOAK_SMTP_REPLY_TO}" \ + -s "smtpServer.replyToDisplayName=${KEYCLOAK_SMTP_REPLY_TO_NAME}" \ + -s smtpServer.auth=false \ + -s smtpServer.starttls=false \ + -s smtpServer.ssl=false -- 2.47.2 From 9fa081ca36e17a582cf8358c38cd3a2e07b81d68 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 03:58:37 -0300 Subject: [PATCH 261/684] keycloak: set realm smtp server --- services/keycloak/realm-settings-job.yaml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 22843cb..a6a3087 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-4 + name: keycloak-realm-settings-5 namespace: sso spec: backoffLimit: 2 @@ -58,14 +58,10 @@ spec: --realm master \ --user "${KEYCLOAK_ADMIN_USER}" \ --password "${KEYCLOAK_ADMIN_PASSWORD}" + smtp_json="$(cat < Date: Fri, 2 Jan 2026 04:03:27 -0300 Subject: [PATCH 262/684] keycloak: apply realm smtp via api --- services/keycloak/realm-settings-job.yaml | 73 ++++++++++++++++++----- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index a6a3087..48569ae 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-5 + name: keycloak-realm-settings-6 namespace: sso spec: backoffLimit: 2 @@ -21,7 +21,7 @@ spec: restartPolicy: OnFailure containers: - name: configure - image: quay.io/keycloak/keycloak:26.0.7 + image: python:3.11-alpine env: - name: KEYCLOAK_SERVER value: http://keycloak.sso.svc.cluster.local @@ -53,15 +53,60 @@ spec: args: - | set -euo pipefail - /opt/keycloak/bin/kcadm.sh config credentials \ - --server "${KEYCLOAK_SERVER}" \ - --realm master \ - --user "${KEYCLOAK_ADMIN_USER}" \ - --password "${KEYCLOAK_ADMIN_PASSWORD}" - smtp_json="$(cat < Date: Fri, 2 Jan 2026 07:33:56 +0000 Subject: [PATCH 263/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 30a72e5..a2786ab 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-41 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-42 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From d44b759f0b3cb7c87b5eb69598bc91314f900310 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 07:35:08 +0000 Subject: [PATCH 264/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 746ff7d..c0db7d7 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-41 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-42 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From d1294a0dc95830234e1269acd52d790daaf44ca0 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 12:46:18 +0000 Subject: [PATCH 265/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index a2786ab..83e6893 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-42 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-43 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 30373c19e728272926d7a8e10eeaa5b48dca0394 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 12:47:30 +0000 Subject: [PATCH 266/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index c0db7d7..a0ee2fd 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-42 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-43 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From b06ae2c89d10d7e5f1b3df94a23cfd19c89e7884 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 13:33:22 +0000 Subject: [PATCH 267/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 83e6893..fb22d51 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-43 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-44 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From f05a8a220062ab022f4407a06cf0759f43c5bc66 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 13:34:33 +0000 Subject: [PATCH 268/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a0ee2fd..5bf60ec 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-43 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-44 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 9c627087eb572a88ab4cf8604afa40b53a1e064f Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 14:18:25 +0000 Subject: [PATCH 269/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index fb22d51..2772d7d 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-44 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-45 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 78afccc53ae8ceed72f6d189090c8a43591617cf Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 14:19:37 +0000 Subject: [PATCH 270/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 5bf60ec..c134051 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-44 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-45 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From f9462aae109ae330833b63126ed52d84b76720c3 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 15:17:30 +0000 Subject: [PATCH 271/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 2772d7d..8961d27 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-45 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-46 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From a9b7f8604639af92f5b0edcb4c9c33e9e2c315a8 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 15:18:41 +0000 Subject: [PATCH 272/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index c134051..abf0b19 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-45 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-46 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 750f1a2cbf0221b2aea68c9459acd2e9a22d0b31 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 16:12:33 +0000 Subject: [PATCH 273/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 8961d27..9cc24d0 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-46 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-47 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ee90817040e5d207c224280ca666369b45088cc4 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 16:13:45 +0000 Subject: [PATCH 274/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index abf0b19..406c5db 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-46 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-47 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From de14d68fc9bcbf9e615a6d06dc1eb32d647da3b1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 13:18:11 -0300 Subject: [PATCH 275/684] sso: codify openldap bootstrap and keycloak federation --- services/keycloak/kustomization.yaml | 1 + services/keycloak/ldap-federation-job.yaml | 250 +++++++++++++++++++++ services/openldap/bootstrap-job.yaml | 63 ++++++ services/openldap/kustomization.yaml | 1 + 4 files changed, 315 insertions(+) create mode 100644 services/keycloak/ldap-federation-job.yaml create mode 100644 services/openldap/bootstrap-job.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 1e5fd38..5fb05ef 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -7,5 +7,6 @@ resources: - pvc.yaml - deployment.yaml - realm-settings-job.yaml + - ldap-federation-job.yaml - service.yaml - ingress.yaml diff --git a/services/keycloak/ldap-federation-job.yaml b/services/keycloak/ldap-federation-job.yaml new file mode 100644 index 0000000..ad90ac7 --- /dev/null +++ b/services/keycloak/ldap-federation-job.yaml @@ -0,0 +1,250 @@ +# services/keycloak/ldap-federation-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-ldap-federation-1 + namespace: sso +spec: + backoffLimit: 2 + template: + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + - key: node-role.kubernetes.io/worker + operator: Exists + restartPolicy: OnFailure + containers: + - name: configure + image: python:3.11-alpine + imagePullPolicy: IfNotPresent + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_USER + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + - name: LDAP_URL + value: ldap://openldap.sso.svc.cluster.local:389 + - name: LDAP_BIND_DN + value: cn=admin,dc=bstein,dc=dev + - name: LDAP_BIND_PASSWORD + valueFrom: + secretKeyRef: + name: openldap-admin + key: LDAP_ADMIN_PASSWORD + - name: LDAP_USERS_DN + value: ou=users,dc=bstein,dc=dev + - name: LDAP_GROUPS_DN + value: ou=groups,dc=bstein,dc=dev + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import json + import os + import time + import urllib.parse + import urllib.request + + base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + + ldap_url = os.environ["LDAP_URL"] + ldap_bind_dn = os.environ["LDAP_BIND_DN"] + ldap_bind_password = os.environ["LDAP_BIND_PASSWORD"] + ldap_users_dn = os.environ["LDAP_USERS_DN"] + ldap_groups_dn = os.environ["LDAP_GROUPS_DN"] + + def http_json(method: str, url: str, token: str, payload=None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None, dict(resp.headers) + return resp.status, json.loads(body.decode()), dict(resp.headers) + + def get_token(): + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + token_req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + with urllib.request.urlopen(token_req, timeout=30) as resp: + token_body = json.loads(resp.read().decode()) + return token_body["access_token"] + + def wait_for_keycloak(): + for _ in range(60): + try: + token = get_token() + if token: + return token + except Exception: + time.sleep(2) + raise SystemExit("Keycloak not ready") + + token = wait_for_keycloak() + + # Find existing LDAP user federation provider (if any) + status, components, _ = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/components?type=org.keycloak.storage.UserStorageProvider", + token, + ) + if status != 200: + raise SystemExit(f"Unexpected components response: {status}") + components = components or [] + + ldap_component = None + for c in components: + if c.get("providerId") == "ldap" and c.get("name") in ("openldap", "ldap"): + ldap_component = c + break + ldap_component_id = ldap_component["id"] if ldap_component else None + + desired = { + "name": "openldap", + "providerId": "ldap", + "providerType": "org.keycloak.storage.UserStorageProvider", + "parentId": realm, + "config": { + "enabled": ["true"], + "priority": ["0"], + "importEnabled": ["true"], + "editMode": ["WRITABLE"], + "syncRegistrations": ["true"], + "vendor": ["other"], + "connectionUrl": [ldap_url], + "bindDn": [ldap_bind_dn], + "bindCredential": [ldap_bind_password], + "authType": ["simple"], + "usersDn": [ldap_users_dn], + "searchScope": ["1"], + "pagination": ["true"], + "usernameLDAPAttribute": ["uid"], + "rdnLDAPAttribute": ["uid"], + "uuidLDAPAttribute": ["entryUUID"], + "userObjectClasses": ["inetOrgPerson, organizationalPerson, person, top"], + "trustEmail": ["true"], + "useTruststoreSpi": ["never"], + "connectionPooling": ["true"], + "cachePolicy": ["DEFAULT"], + "useKerberosForPasswordAuthentication": ["false"], + "allowKerberosAuthentication": ["false"], + }, + } + + if ldap_component: + desired["id"] = ldap_component["id"] + print(f"Updating LDAP federation provider: {desired['id']}") + status, _, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/components/{desired['id']}", + token, + desired, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected update status: {status}") + else: + print("Creating LDAP federation provider") + status, _, headers = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/components", + token, + desired, + ) + if status not in (201, 204): + raise SystemExit(f"Unexpected create status: {status}") + location = headers.get("Location", "") + if location: + ldap_component_id = location.rstrip("/").split("/")[-1] + + # Ensure a basic LDAP group mapper exists (optional but harmless). + if not ldap_component_id: + print("WARNING: unable to determine LDAP component id; skipping group mapper") + raise SystemExit(0) + + status, components, _ = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/components?type=org.keycloak.storage.ldap.mappers.LDAPStorageMapper", + token, + ) + components = components or [] + group_mapper = None + for c in components: + if c.get("name") == "openldap-groups" and c.get("parentId") == ldap_component_id: + group_mapper = c + break + + mapper_payload = { + "name": "openldap-groups", + "providerId": "group-ldap-mapper", + "providerType": "org.keycloak.storage.ldap.mappers.LDAPStorageMapper", + "parentId": ldap_component_id, + "config": { + "groups.dn": [ldap_groups_dn], + "group.name.ldap.attribute": ["cn"], + "group.object.classes": ["groupOfNames"], + "membership.ldap.attribute": ["member"], + "membership.attribute.type": ["DN"], + "mode": ["LDAP_ONLY"], + "user.roles.retrieve.strategy": ["LOAD_GROUPS_BY_MEMBER_ATTRIBUTE"], + "preserve.group.inheritance": ["true"], + }, + } + + if group_mapper: + mapper_payload["id"] = group_mapper["id"] + mapper_payload["parentId"] = group_mapper.get("parentId", mapper_payload["parentId"]) + print(f"Updating LDAP group mapper: {mapper_payload['id']}") + status, _, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/components/{mapper_payload['id']}", + token, + mapper_payload, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected group mapper update status: {status}") + else: + print("Creating LDAP group mapper") + status, _, _ = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/components", + token, + mapper_payload, + ) + if status not in (201, 204): + raise SystemExit(f"Unexpected group mapper create status: {status}") + PY diff --git a/services/openldap/bootstrap-job.yaml b/services/openldap/bootstrap-job.yaml new file mode 100644 index 0000000..2f94f88 --- /dev/null +++ b/services/openldap/bootstrap-job.yaml @@ -0,0 +1,63 @@ +# services/openldap/bootstrap-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: openldap-bootstrap-1 + namespace: sso +spec: + backoffLimit: 3 + template: + spec: + restartPolicy: OnFailure + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: bootstrap + image: docker.io/osixia/openldap:1.5.0 + imagePullPolicy: IfNotPresent + env: + - name: LDAP_DOMAIN + value: bstein.dev + - name: LDAP_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: openldap-admin + key: LDAP_ADMIN_PASSWORD + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + + domain="${LDAP_DOMAIN}" + base_dn="$(printf '%s' "${domain}" | awk -F. '{for (i=1;i<=NF;i++) printf("%sdc=%s", (i==1?"":","), $i)}')" + admin_dn="cn=admin,${base_dn}" + ldap_uri="ldap://openldap.sso.svc.cluster.local:389" + + echo "Waiting for OpenLDAP..." + for i in $(seq 1 60); do + if ldapsearch -x -H "${ldap_uri}" -b "${base_dn}" -s base '(objectClass=*)' dn >/dev/null 2>&1; then + break + fi + sleep 2 + done + + ensure_ou() { + local ou_name="${1}" + local ou_dn="ou=${ou_name},${base_dn}" + + if ldapsearch -x -H "${ldap_uri}" -D "${admin_dn}" -w "${LDAP_ADMIN_PASSWORD}" -b "${ou_dn}" -s base '(objectClass=organizationalUnit)' dn >/dev/null 2>&1; then + echo "OU ${ou_name} exists" + return 0 + fi + + echo "Creating OU ${ou_name}" + cat < Date: Fri, 2 Jan 2026 13:25:30 -0300 Subject: [PATCH 276/684] sso: fix openldap bootstrap job --- services/openldap/bootstrap-job.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/services/openldap/bootstrap-job.yaml b/services/openldap/bootstrap-job.yaml index 2f94f88..31fa382 100644 --- a/services/openldap/bootstrap-job.yaml +++ b/services/openldap/bootstrap-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: openldap-bootstrap-1 + name: openldap-bootstrap-2 namespace: sso spec: backoffLimit: 3 @@ -29,18 +29,24 @@ spec: - | set -euo pipefail - domain="${LDAP_DOMAIN}" - base_dn="$(printf '%s' "${domain}" | awk -F. '{for (i=1;i<=NF;i++) printf("%sdc=%s", (i==1?"":","), $i)}')" + base_dn="dc=bstein,dc=dev" admin_dn="cn=admin,${base_dn}" - ldap_uri="ldap://openldap.sso.svc.cluster.local:389" + ldap_uri="ldap://openldap-0.openldap.sso.svc.cluster.local:389" echo "Waiting for OpenLDAP..." - for i in $(seq 1 60); do + i=0 + while [ "${i}" -lt 60 ]; do if ldapsearch -x -H "${ldap_uri}" -b "${base_dn}" -s base '(objectClass=*)' dn >/dev/null 2>&1; then + echo "OpenLDAP is ready" break fi sleep 2 + i=$((i+1)) done + if ! ldapsearch -x -H "${ldap_uri}" -b "${base_dn}" -s base '(objectClass=*)' dn >/dev/null 2>&1; then + echo "OpenLDAP did not become ready in time" >&2 + exit 1 + fi ensure_ou() { local ou_name="${1}" -- 2.47.2 From 5ae9bf578eabc05086c8cdb70f45f53f2baf621f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 13:34:16 -0300 Subject: [PATCH 277/684] sso: make openldap bootstrap POSIX sh --- services/openldap/bootstrap-job.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/openldap/bootstrap-job.yaml b/services/openldap/bootstrap-job.yaml index 31fa382..c41a7a0 100644 --- a/services/openldap/bootstrap-job.yaml +++ b/services/openldap/bootstrap-job.yaml @@ -27,7 +27,7 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail + set -eu base_dn="dc=bstein,dc=dev" admin_dn="cn=admin,${base_dn}" @@ -49,8 +49,8 @@ spec: fi ensure_ou() { - local ou_name="${1}" - local ou_dn="ou=${ou_name},${base_dn}" + ou_name="${1}" + ou_dn="ou=${ou_name},${base_dn}" if ldapsearch -x -H "${ldap_uri}" -D "${admin_dn}" -w "${LDAP_ADMIN_PASSWORD}" -b "${ou_dn}" -s base '(objectClass=organizationalUnit)' dn >/dev/null 2>&1; then echo "OU ${ou_name} exists" -- 2.47.2 From 2c86a6d95fbf564cf216d09f52877c8c21546ee0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 13:40:11 -0300 Subject: [PATCH 278/684] sso: bump openldap bootstrap job --- services/openldap/bootstrap-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/openldap/bootstrap-job.yaml b/services/openldap/bootstrap-job.yaml index c41a7a0..bbfaf5f 100644 --- a/services/openldap/bootstrap-job.yaml +++ b/services/openldap/bootstrap-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: openldap-bootstrap-2 + name: openldap-bootstrap-3 namespace: sso spec: backoffLimit: 3 -- 2.47.2 From d70b685f2794564086f9ffc5c5f0fe6980b8b9b7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 13:50:02 -0300 Subject: [PATCH 279/684] sso: remove openldap bootstrap job --- services/openldap/bootstrap-job.yaml | 69 ---------------------------- services/openldap/kustomization.yaml | 1 - 2 files changed, 70 deletions(-) delete mode 100644 services/openldap/bootstrap-job.yaml diff --git a/services/openldap/bootstrap-job.yaml b/services/openldap/bootstrap-job.yaml deleted file mode 100644 index bbfaf5f..0000000 --- a/services/openldap/bootstrap-job.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# services/openldap/bootstrap-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: openldap-bootstrap-3 - namespace: sso -spec: - backoffLimit: 3 - template: - spec: - restartPolicy: OnFailure - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: bootstrap - image: docker.io/osixia/openldap:1.5.0 - imagePullPolicy: IfNotPresent - env: - - name: LDAP_DOMAIN - value: bstein.dev - - name: LDAP_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: openldap-admin - key: LDAP_ADMIN_PASSWORD - command: ["/bin/sh", "-c"] - args: - - | - set -eu - - base_dn="dc=bstein,dc=dev" - admin_dn="cn=admin,${base_dn}" - ldap_uri="ldap://openldap-0.openldap.sso.svc.cluster.local:389" - - echo "Waiting for OpenLDAP..." - i=0 - while [ "${i}" -lt 60 ]; do - if ldapsearch -x -H "${ldap_uri}" -b "${base_dn}" -s base '(objectClass=*)' dn >/dev/null 2>&1; then - echo "OpenLDAP is ready" - break - fi - sleep 2 - i=$((i+1)) - done - if ! ldapsearch -x -H "${ldap_uri}" -b "${base_dn}" -s base '(objectClass=*)' dn >/dev/null 2>&1; then - echo "OpenLDAP did not become ready in time" >&2 - exit 1 - fi - - ensure_ou() { - ou_name="${1}" - ou_dn="ou=${ou_name},${base_dn}" - - if ldapsearch -x -H "${ldap_uri}" -D "${admin_dn}" -w "${LDAP_ADMIN_PASSWORD}" -b "${ou_dn}" -s base '(objectClass=organizationalUnit)' dn >/dev/null 2>&1; then - echo "OU ${ou_name} exists" - return 0 - fi - - echo "Creating OU ${ou_name}" - cat < Date: Fri, 2 Jan 2026 14:02:05 -0300 Subject: [PATCH 280/684] sso: fix keycloak ldap provider parentId --- services/keycloak/ldap-federation-job.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/services/keycloak/ldap-federation-job.yaml b/services/keycloak/ldap-federation-job.yaml index ad90ac7..f25ff13 100644 --- a/services/keycloak/ldap-federation-job.yaml +++ b/services/keycloak/ldap-federation-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-ldap-federation-1 + name: keycloak-ldap-federation-2 namespace: sso spec: backoffLimit: 2 @@ -117,6 +117,16 @@ spec: token = wait_for_keycloak() + # Keycloak component "parentId" must be the realm UUID, not the realm name. + status, realm_rep, _ = http_json( + "GET", + f"{base_url}/admin/realms/{realm}", + token, + ) + if status != 200 or not realm_rep or not realm_rep.get("id"): + raise SystemExit(f"Unable to resolve realm id for {realm} (status={status})") + realm_id = realm_rep["id"] + # Find existing LDAP user federation provider (if any) status, components, _ = http_json( "GET", @@ -138,7 +148,7 @@ spec: "name": "openldap", "providerId": "ldap", "providerType": "org.keycloak.storage.UserStorageProvider", - "parentId": realm, + "parentId": realm_id, "config": { "enabled": ["true"], "priority": ["0"], -- 2.47.2 From 1346ccd31b65a4219eae1a2913c055c969f9e6b9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 14:12:20 -0300 Subject: [PATCH 281/684] keycloak: repair ldap federation parentId --- services/keycloak/ldap-federation-job.yaml | 41 +++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/services/keycloak/ldap-federation-job.yaml b/services/keycloak/ldap-federation-job.yaml index f25ff13..f993fef 100644 --- a/services/keycloak/ldap-federation-job.yaml +++ b/services/keycloak/ldap-federation-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-ldap-federation-2 + name: keycloak-ldap-federation-3 namespace: sso spec: backoffLimit: 2 @@ -127,6 +127,45 @@ spec: raise SystemExit(f"Unable to resolve realm id for {realm} (status={status})") realm_id = realm_rep["id"] + # Some historical LDAP federation components were created with parentId=. + # That makes realm resolution null in Keycloak internals and breaks authentication. + status, all_components, _ = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/components", + token, + ) + if status != 200: + raise SystemExit(f"Unexpected components response: {status}") + all_components = all_components or [] + + for c in all_components: + if c.get("providerId") != "ldap": + continue + if c.get("providerType") != "org.keycloak.storage.UserStorageProvider": + continue + if c.get("parentId") == realm_id: + continue + cid = c.get("id") + if not cid: + continue + print(f"Fixing LDAP federation parentId for {cid} (was {c.get('parentId')})") + status, comp, _ = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/components/{cid}", + token, + ) + if status != 200 or not comp: + raise SystemExit(f"Unable to fetch component {cid} (status={status})") + comp["parentId"] = realm_id + status, _, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/components/{cid}", + token, + comp, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected parentId repair status for {cid}: {status}") + # Find existing LDAP user federation provider (if any) status, components, _ = http_json( "GET", -- 2.47.2 From c36d318d81c36ff5246e5d81523552b26101bb84 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 16:17:53 -0300 Subject: [PATCH 282/684] vaultwarden: add flux kustomization --- .../applications/kustomization.yaml | 1 + .../vaultwarden/kustomization.yaml | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index a503520..d944938 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization resources: - gitea/kustomization.yaml - vault/kustomization.yaml + - vaultwarden/kustomization.yaml - comms/kustomization.yaml - communication/kustomization.yaml - crypto/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml b/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml new file mode 100644 index 0000000..9cf4d08 --- /dev/null +++ b/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml @@ -0,0 +1,19 @@ +# clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: vaultwarden + namespace: flux-system +spec: + interval: 10m + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + path: ./services/vaultwarden + targetNamespace: vaultwarden + prune: true + wait: true + dependsOn: + - name: helm + - name: traefik -- 2.47.2 From e5f41cfa2b799f92c23274a6ffdc34c117826dba Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 16:26:48 -0300 Subject: [PATCH 283/684] vaultwarden: suspend flux kustomization --- .../flux-system/applications/vaultwarden/kustomization.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml b/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml index 9cf4d08..eda2a30 100644 --- a/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml @@ -6,6 +6,7 @@ metadata: namespace: flux-system spec: interval: 10m + suspend: true sourceRef: kind: GitRepository name: flux-system -- 2.47.2 From e2e76592a0ccddfc0368d1066506352ed5cd2452 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 16:57:42 -0300 Subject: [PATCH 284/684] keycloak: enable debug logging --- services/keycloak/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 09599ba..f772710 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -102,6 +102,8 @@ spec: value: "9000" - name: KC_HTTP_MANAGEMENT_BIND_ADDRESS value: 0.0.0.0 + - name: KC_LOG_LEVEL + value: DEBUG - name: KC_HEALTH_ENABLED value: "true" - name: KC_METRICS_ENABLED -- 2.47.2 From 23a9e1ec30666721e8961b9767aa55cb9c8aec46 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 17:02:59 -0300 Subject: [PATCH 285/684] keycloak: use recreate strategy with pvc --- services/keycloak/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index f772710..96875f4 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -8,6 +8,8 @@ metadata: app: keycloak spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: app: keycloak -- 2.47.2 From 22b7e7aa666def11c1a8901024709ba105a1e5fd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 17:09:24 -0300 Subject: [PATCH 286/684] keycloak: clear rollingUpdate for recreate --- services/keycloak/deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 96875f4..1545d3d 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -10,6 +10,7 @@ spec: replicas: 1 strategy: type: Recreate + rollingUpdate: null selector: matchLabels: app: keycloak -- 2.47.2 From 22545326429cd70e33040f984aa5dc03d218b0a1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 17:15:37 -0300 Subject: [PATCH 287/684] keycloak: roll update with no surge --- services/keycloak/deployment.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 1545d3d..0ead0a8 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -9,8 +9,10 @@ metadata: spec: replicas: 1 strategy: - type: Recreate - rollingUpdate: null + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 selector: matchLabels: app: keycloak -- 2.47.2 From a92a51c6c50999bfbdeff2671e0ced565acfa353 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 20:46:53 +0000 Subject: [PATCH 288/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 9cc24d0..e1f8122 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-47 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-48 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ee0aee71f8cabfe7052a89a69d316c317a5e8686 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 20:48:04 +0000 Subject: [PATCH 289/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 406c5db..a23cd7a 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-47 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-48 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 5e3cfee3d5650d19d6acbb372afbff37813b4c5e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 18:02:37 -0300 Subject: [PATCH 290/684] bstein-dev-home: read vaultwarden admin token --- services/bstein-dev-home/rbac.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/services/bstein-dev-home/rbac.yaml b/services/bstein-dev-home/rbac.yaml index a6fcd03..6717b45 100644 --- a/services/bstein-dev-home/rbac.yaml +++ b/services/bstein-dev-home/rbac.yaml @@ -27,3 +27,28 @@ subjects: - kind: ServiceAccount name: bstein-dev-home namespace: bstein-dev-home +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-vaultwarden-admin-token-reader + namespace: vaultwarden +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get"] + resourceNames: ["vaultwarden-admin"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-vaultwarden-admin-token-reader + namespace: vaultwarden +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-vaultwarden-admin-token-reader +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home -- 2.47.2 From b509234aeeabe27c0c2cd3f597ab4c5760477beb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 18:05:17 -0300 Subject: [PATCH 291/684] bstein-dev-home: allow vaultwarden admin secret read --- services/bstein-dev-home/rbac.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/services/bstein-dev-home/rbac.yaml b/services/bstein-dev-home/rbac.yaml index 6717b45..cbc8050 100644 --- a/services/bstein-dev-home/rbac.yaml +++ b/services/bstein-dev-home/rbac.yaml @@ -29,6 +29,29 @@ subjects: namespace: bstein-dev-home --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: bstein-dev-home-vaultwarden-admin-secret-reader +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get"] + resourceNames: ["vaultwarden-admin"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: bstein-dev-home-vaultwarden-admin-secret-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: bstein-dev-home-vaultwarden-admin-secret-reader +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: bstein-dev-home-vaultwarden-admin-token-reader -- 2.47.2 From 503a9264c551fb11553fedc4174432b7e21afecd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 18:45:45 -0300 Subject: [PATCH 292/684] keycloak: cleanup LDAP federation --- services/keycloak/ldap-federation-job.yaml | 75 ++++++++++++++++++++-- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/services/keycloak/ldap-federation-job.yaml b/services/keycloak/ldap-federation-job.yaml index f993fef..5c59fff 100644 --- a/services/keycloak/ldap-federation-job.yaml +++ b/services/keycloak/ldap-federation-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-ldap-federation-3 + name: keycloak-ldap-federation-4 namespace: sso spec: backoffLimit: 2 @@ -60,6 +60,7 @@ spec: import os import time import urllib.parse + import urllib.error import urllib.request base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") @@ -176,11 +177,21 @@ spec: raise SystemExit(f"Unexpected components response: {status}") components = components or [] - ldap_component = None - for c in components: - if c.get("providerId") == "ldap" and c.get("name") in ("openldap", "ldap"): - ldap_component = c - break + ldap_components = [c for c in components if c.get("providerId") == "ldap" and c.get("id")] + + # Select a canonical LDAP federation provider deterministically. + # Duplicate LDAP providers can cause Keycloak admin/user queries to fail if any one of them is misconfigured. + candidates = [] + for c in ldap_components: + if c.get("name") not in ("openldap", "ldap"): + continue + cfg = c.get("config") or {} + if (cfg.get("connectionUrl") or [None])[0] == ldap_url: + candidates.append(c) + if not candidates: + candidates = [c for c in ldap_components if c.get("name") in ("openldap", "ldap")] + candidates.sort(key=lambda x: x.get("id", "")) + ldap_component = candidates[0] if candidates else None ldap_component_id = ldap_component["id"] if ldap_component else None desired = { @@ -296,4 +307,56 @@ spec: ) if status not in (201, 204): raise SystemExit(f"Unexpected group mapper create status: {status}") + + # Cleanup duplicate LDAP federation providers and their child components (mappers, etc). + # Keep only the canonical provider we updated/created above. + try: + status, fresh_components, _ = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/components", + token, + ) + if status != 200: + raise Exception(f"unexpected components status {status}") + fresh_components = fresh_components or [] + + dup_provider_ids = [] + for c in fresh_components: + if c.get("providerId") != "ldap": + continue + if c.get("providerType") != "org.keycloak.storage.UserStorageProvider": + continue + cid = c.get("id") + if not cid or cid == ldap_component_id: + continue + dup_provider_ids.append(cid) + + if dup_provider_ids: + for pid in dup_provider_ids: + # Delete child components first. + for child in fresh_components: + if child.get("parentId") != pid: + continue + child_id = child.get("id") + if not child_id: + continue + try: + http_json( + "DELETE", + f"{base_url}/admin/realms/{realm}/components/{child_id}", + token, + ) + except urllib.error.HTTPError as e: + print(f"WARNING: failed to delete LDAP child component {child_id} (status={e.code})") + try: + http_json( + "DELETE", + f"{base_url}/admin/realms/{realm}/components/{pid}", + token, + ) + except urllib.error.HTTPError as e: + print(f"WARNING: failed to delete duplicate LDAP provider {pid} (status={e.code})") + print(f"Cleaned up {len(dup_provider_ids)} duplicate LDAP federation providers") + except Exception as e: + print(f"WARNING: LDAP cleanup failed (continuing): {e}") PY -- 2.47.2 From 82b2c95bf0d8f9ab1a4d52a434bb97608b01ae1c Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 22:23:00 +0000 Subject: [PATCH 293/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index e1f8122..ae6e37d 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-48 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-49 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 3f1780daedd7fe8c798c44e94af60da36a5a83a1 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 22:24:11 +0000 Subject: [PATCH 294/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a23cd7a..5359773 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-48 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-49 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 54d324f55533902a279ed83d7ae6822ba6cbce9a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 20:01:51 -0300 Subject: [PATCH 295/684] keycloak(atlas): harden realm settings job --- services/keycloak/realm-settings-job.yaml | 81 ++++++++++++++++++----- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 48569ae..3ec64a3 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-6 + name: keycloak-realm-settings-7 namespace: sso spec: backoffLimit: 2 @@ -64,6 +64,19 @@ spec: admin_user = os.environ["KEYCLOAK_ADMIN_USER"] admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + def http_json(method: str, url: str, token: str, payload=None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + token_data = urllib.parse.urlencode( { "grant_type": "password", @@ -82,9 +95,17 @@ spec: token_body = json.loads(resp.read().decode()) access_token = token_body["access_token"] - payload = { - "resetPasswordAllowed": True, - "smtpServer": { + # Update realm settings safely by fetching the full realm representation first. + realm_url = f"{base_url}/admin/realms/{realm}" + status, realm_rep = http_json("GET", realm_url, access_token) + if status != 200 or not realm_rep: + raise SystemExit(f"Unable to fetch realm {realm} (status={status})") + + realm_rep["resetPasswordAllowed"] = True + + smtp = realm_rep.get("smtpServer") or {} + smtp.update( + { "host": os.environ["KEYCLOAK_SMTP_HOST"], "port": os.environ["KEYCLOAK_SMTP_PORT"], "from": os.environ["KEYCLOAK_SMTP_FROM"], @@ -94,19 +115,43 @@ spec: "auth": "false", "starttls": "false", "ssl": "false", - }, - } - - update_req = urllib.request.Request( - f"{base_url}/admin/realms/{realm}", - data=json.dumps(payload).encode(), - headers={ - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/json", - }, - method="PUT", + } ) - with urllib.request.urlopen(update_req, timeout=10) as resp: - if resp.status not in (200, 204): - raise SystemExit(f"Unexpected response: {resp.status}") + realm_rep["smtpServer"] = smtp + + status, _ = http_json("PUT", realm_url, access_token, realm_rep) + if status not in (200, 204): + raise SystemExit(f"Unexpected realm update response: {status}") + + # Disable Identity Provider Redirector in the browser flow for this realm. + status, executions = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/authentication/flows/browser/executions", + access_token, + ) + if status == 200 and executions: + for ex in executions: + if ex.get("providerId") != "identity-provider-redirector": + continue + ex_id = ex.get("id") + if not ex_id: + continue + status, ex_rep = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/authentication/executions/{ex_id}", + access_token, + ) + if status != 200 or not ex_rep: + raise SystemExit(f"Unable to fetch browser execution {ex_id} (status={status})") + if ex_rep.get("requirement") == "DISABLED": + continue + ex_rep["requirement"] = "DISABLED" + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/authentication/executions/{ex_id}", + access_token, + ex_rep, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected execution update response for {ex_id}: {status}") PY -- 2.47.2 From 21d8fc37882695e5536e6f2fb93858aacd67f025 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 20:04:47 -0300 Subject: [PATCH 296/684] keycloak(atlas): retry realm settings job --- services/keycloak/realm-settings-job.yaml | 34 ++++++++++++++++------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 3ec64a3..f18ac6f 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,10 +2,10 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-7 + name: keycloak-realm-settings-8 namespace: sso spec: - backoffLimit: 2 + backoffLimit: 0 template: spec: affinity: @@ -18,7 +18,7 @@ spec: values: ["rpi5","rpi4"] - key: node-role.kubernetes.io/worker operator: Exists - restartPolicy: OnFailure + restartPolicy: Never containers: - name: configure image: python:3.11-alpine @@ -57,6 +57,7 @@ spec: import json import os import urllib.parse + import urllib.error import urllib.request base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") @@ -71,11 +72,20 @@ spec: data = json.dumps(payload).encode() headers["Content-Type"] = "application/json" req = urllib.request.Request(url, data=data, headers=headers, method=method) - with urllib.request.urlopen(req, timeout=30) as resp: - body = resp.read() - if not body: - return resp.status, None - return resp.status, json.loads(body.decode()) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read() + if not raw: + return exc.code, None + try: + return exc.code, json.loads(raw.decode()) + except Exception: + return exc.code, {"raw": raw.decode(errors="replace")} token_data = urllib.parse.urlencode( { @@ -91,8 +101,12 @@ spec: headers={"Content-Type": "application/x-www-form-urlencoded"}, method="POST", ) - with urllib.request.urlopen(token_req, timeout=10) as resp: - token_body = json.loads(resp.read().decode()) + try: + with urllib.request.urlopen(token_req, timeout=10) as resp: + token_body = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + body = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={body}") access_token = token_body["access_token"] # Update realm settings safely by fetching the full realm representation first. -- 2.47.2 From 0f26bd508efd857e9788fe838faf3411db22d30f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 20:09:05 -0300 Subject: [PATCH 297/684] keycloak(atlas): disable browser IdP redirector --- services/keycloak/realm-settings-job.yaml | 24 ++++++++--------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index f18ac6f..2b106d1 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-8 + name: keycloak-realm-settings-9 namespace: sso spec: backoffLimit: 0 @@ -147,25 +147,17 @@ spec: for ex in executions: if ex.get("providerId") != "identity-provider-redirector": continue - ex_id = ex.get("id") - if not ex_id: + if ex.get("requirement") == "DISABLED": continue - status, ex_rep = http_json( - "GET", - f"{base_url}/admin/realms/{realm}/authentication/executions/{ex_id}", - access_token, - ) - if status != 200 or not ex_rep: - raise SystemExit(f"Unable to fetch browser execution {ex_id} (status={status})") - if ex_rep.get("requirement") == "DISABLED": - continue - ex_rep["requirement"] = "DISABLED" + ex["requirement"] = "DISABLED" status, _ = http_json( "PUT", - f"{base_url}/admin/realms/{realm}/authentication/executions/{ex_id}", + f"{base_url}/admin/realms/{realm}/authentication/flows/browser/executions", access_token, - ex_rep, + ex, ) if status not in (200, 204): - raise SystemExit(f"Unexpected execution update response for {ex_id}: {status}") + raise SystemExit( + f"Unexpected execution update response for identity-provider-redirector: {status}" + ) PY -- 2.47.2 From 4e1ec914f68312a8acf53dbd65d02909f72133b9 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 23:27:05 +0000 Subject: [PATCH 298/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index ae6e37d..0ae0215 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-49 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-50 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 727d8cfd48c877cf00f4e695906ed5726f6c41e5 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Fri, 2 Jan 2026 23:27:16 +0000 Subject: [PATCH 299/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 5359773..b1b47c8 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-49 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-50 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API -- 2.47.2 From 5437cebb9e363482880609afe040481c6edd6b2c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 21:03:44 -0300 Subject: [PATCH 300/684] sso: provision vaultwarden users --- scripts/vaultwarden_cred_sync.py | 98 +++++++++++++++++++ .../bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/kustomization.yaml | 9 ++ .../vaultwarden-cred-sync-cronjob.yaml | 57 +++++++++++ services/keycloak/ldap-federation-job.yaml | 2 +- services/keycloak/realm-settings-job.yaml | 2 +- 6 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 scripts/vaultwarden_cred_sync.py create mode 100644 services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml diff --git a/scripts/vaultwarden_cred_sync.py b/scripts/vaultwarden_cred_sync.py new file mode 100644 index 0000000..8f844de --- /dev/null +++ b/scripts/vaultwarden_cred_sync.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import sys +from typing import Any, Iterable + +import httpx + +from atlas_portal import settings +from atlas_portal.keycloak import admin_client +from atlas_portal.vaultwarden import invite_user + + +def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: + client = admin_client() + if not client.ready(): + raise RuntimeError("keycloak admin client not configured") + + url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users" + first = 0 + while True: + headers = client.headers() + params = {"first": str(first), "max": str(page_size)} + with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: + resp = http.get(url, params=params, headers=headers) + resp.raise_for_status() + payload = resp.json() + + if not isinstance(payload, list) or not payload: + return + + for item in payload: + if isinstance(item, dict): + yield item + + if len(payload) < page_size: + return + first += page_size + + +def _email_for_user(user: dict[str, Any]) -> str: + email = (user.get("email") if isinstance(user.get("email"), str) else "") or "" + if email.strip(): + return email.strip() + username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" + username = username.strip() + if not username: + return "" + return f"{username}@{settings.MAILU_DOMAIN}" + + +def main() -> int: + processed = 0 + created = 0 + skipped = 0 + failures = 0 + + for user in _iter_keycloak_users(): + username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" + username = username.strip() + if not username: + skipped += 1 + continue + + enabled = user.get("enabled") + if enabled is False: + skipped += 1 + continue + + if user.get("serviceAccountClientId") or username.startswith("service-account-"): + skipped += 1 + continue + + email = _email_for_user(user) + if not email: + print(f"skip {username}: missing email", file=sys.stderr) + skipped += 1 + continue + + processed += 1 + result = invite_user(email) + if result.ok: + created += 1 + print(f"ok {username}: {result.status}") + else: + failures += 1 + print(f"err {username}: {result.status} {result.detail}", file=sys.stderr) + + print( + f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}", + file=sys.stderr, + ) + return 0 if failures == 0 else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b1b47c8..053c986 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -71,7 +71,7 @@ spec: name: atlas-portal-db key: PORTAL_DATABASE_URL - name: HTTP_CHECK_TIMEOUT_SEC - value: "10" + value: "20" - name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT value: "30" - name: ACCESS_REQUEST_SUBMIT_RATE_WINDOW_SEC diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 99b9443..3f3ebc0 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -13,4 +13,13 @@ resources: - frontend-service.yaml - backend-deployment.yaml - backend-service.yaml + - vaultwarden-cred-sync-cronjob.yaml - ingress.yaml + +configMapGenerator: + - name: vaultwarden-cred-sync-script + namespace: bstein-dev-home + files: + - vaultwarden_cred_sync.py=../../scripts/vaultwarden_cred_sync.py + options: + disableNameSuffixHash: true diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml new file mode 100644 index 0000000..4acf673 --- /dev/null +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -0,0 +1,57 @@ +# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vaultwarden-cred-sync + namespace: bstein-dev-home +spec: + schedule: "*/15 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 0 + template: + spec: + serviceAccountName: bstein-dev-home + restartPolicy: Never + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + imagePullSecrets: + - name: harbor-bstein-robot + containers: + - name: sync + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-49 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + imagePullPolicy: Always + command: + - python + - /scripts/vaultwarden_cred_sync.py + env: + - name: KEYCLOAK_ENABLED + value: "true" + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_URL + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_ADMIN_REALM + value: atlas + - name: KEYCLOAK_ADMIN_CLIENT_ID + value: bstein-dev-home-admin + - name: KEYCLOAK_ADMIN_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: bstein-dev-home-keycloak-admin + key: client_secret + - name: HTTP_CHECK_TIMEOUT_SEC + value: "20" + volumeMounts: + - name: vaultwarden-cred-sync-script + mountPath: /scripts + readOnly: true + volumes: + - name: vaultwarden-cred-sync-script + configMap: + name: vaultwarden-cred-sync-script + defaultMode: 0555 diff --git a/services/keycloak/ldap-federation-job.yaml b/services/keycloak/ldap-federation-job.yaml index 5c59fff..9650468 100644 --- a/services/keycloak/ldap-federation-job.yaml +++ b/services/keycloak/ldap-federation-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-ldap-federation-4 + name: keycloak-ldap-federation-5 namespace: sso spec: backoffLimit: 2 diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 2b106d1..ae9b8d1 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-9 + name: keycloak-realm-settings-10 namespace: sso spec: backoffLimit: 0 -- 2.47.2 From efb226fe075780c1a9978ddd4a985aee5cfa31f7 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 00:05:12 +0000 Subject: [PATCH 301/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 4acf673..685a598 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-49 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-50 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From bb49d584f5d5606ee86a8fdfcf5fc30ae5f18b35 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 00:09:08 +0000 Subject: [PATCH 302/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 0ae0215..a13ecf8 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-50 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-51 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 479cb81b3e5595d19a62292e97c43e4419d7f0c9 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 00:09:19 +0000 Subject: [PATCH 303/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 053c986..3893043 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-50 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-51 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 685a598..50f6592 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-50 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-51 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From c9d9a28c036403589842bb9d49642274f04deb76 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 2 Jan 2026 21:11:21 -0300 Subject: [PATCH 304/684] portal: fix vaultwarden sync job env --- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 50f6592..4360f6b 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -29,6 +29,8 @@ spec: - python - /scripts/vaultwarden_cred_sync.py env: + - name: PYTHONPATH + value: /app - name: KEYCLOAK_ENABLED value: "true" - name: KEYCLOAK_REALM -- 2.47.2 From c62e142a87cce949e58eca062927d7934e733d3e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 03:46:24 +0000 Subject: [PATCH 305/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index a13ecf8..d349f1b 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-51 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-52 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 14a9a8403ac408411e74224e54785197383d0b92 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 03:47:34 +0000 Subject: [PATCH 306/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 3893043..97cff6c 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-51 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-52 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 4360f6b..ba13c35 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-51 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-52 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 64138ea04599f81697a1672ec5fcdd0f96b82f19 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 04:03:25 +0000 Subject: [PATCH 307/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index d349f1b..3989a23 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-52 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-53 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From c080d39375e4dba0af63c63acf0db832fd2582f7 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 04:04:36 +0000 Subject: [PATCH 308/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 97cff6c..78e6f07 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-52 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-53 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index ba13c35..db6ca7b 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-52 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-53 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 10e322e853f59b8fbe5804f6298bff20e6325069 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 01:08:53 -0300 Subject: [PATCH 309/684] keycloak(atlas): default TOTP required action --- services/keycloak/realm-settings-job.yaml | 29 ++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index ae9b8d1..cb05a6d 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-10 + name: keycloak-realm-settings-11 namespace: sso spec: backoffLimit: 0 @@ -137,6 +137,33 @@ spec: if status not in (200, 204): raise SystemExit(f"Unexpected realm update response: {status}") + # Ensure MFA is on by default for newly-created users. + status, required_actions = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/authentication/required-actions", + access_token, + ) + if status == 200 and isinstance(required_actions, list): + for action in required_actions: + if not isinstance(action, dict): + continue + if action.get("alias") != "CONFIGURE_TOTP": + continue + if action.get("enabled") is True and action.get("defaultAction") is True: + break + action["enabled"] = True + action["defaultAction"] = True + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/authentication/required-actions/CONFIGURE_TOTP", + access_token, + action, + ) + if status not in (200, 204): + raise SystemExit( + f"Unexpected required-action update response for CONFIGURE_TOTP: {status}" + ) + # Disable Identity Provider Redirector in the browser flow for this realm. status, executions = http_json( "GET", -- 2.47.2 From e6eff8165ab7a7d596e8f1d8bbca3e8bdd1ab086 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 02:35:47 -0300 Subject: [PATCH 310/684] mailu: sync via mailu_email attribute --- .gitignore | 2 + scripts/mailu_sync.py | 47 ++++++++++++++++------- scripts/nextcloud-mail-sync.sh | 27 +++++++++---- scripts/tests/test_mailu_sync.py | 11 ++++-- services/nextcloud/mail-sync-cronjob.yaml | 2 + 5 files changed, 63 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 88b0632..7bf3646 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.md !README.md +__pycache__/ +*.py[cod] diff --git a/scripts/mailu_sync.py b/scripts/mailu_sync.py index 4d08a62..74b170a 100644 --- a/scripts/mailu_sync.py +++ b/scripts/mailu_sync.py @@ -110,13 +110,33 @@ def random_password(): alphabet = string.ascii_letters + string.digits return "".join(secrets.choice(alphabet) for _ in range(24)) +def get_attribute_value(attributes, key): + raw = (attributes or {}).get(key) + if isinstance(raw, list): + return raw[0] if raw else None + if isinstance(raw, str): + return raw + return None + + +def resolve_mailu_email(user, attributes): + explicit = get_attribute_value(attributes, "mailu_email") + if explicit: + return explicit + + email = user.get("email") or "" + if "@" in email and email.lower().endswith(f"@{MAILU_DOMAIN.lower()}"): + return email + + return f"{user['username']}@{MAILU_DOMAIN}" + def ensure_mailu_user(cursor, email, password, display_name): localpart, domain = email.split("@", 1) if domain.lower() != MAILU_DOMAIN.lower(): return hashed = bcrypt_sha256.hash(password) - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) cursor.execute( """ INSERT INTO "user" ( @@ -167,30 +187,29 @@ def main(): for user in users: attrs = user.get("attributes", {}) or {} - app_pw_value = attrs.get("mailu_app_password") - if isinstance(app_pw_value, list): - app_pw = app_pw_value[0] if app_pw_value else None - elif isinstance(app_pw_value, str): - app_pw = app_pw_value - else: - app_pw = None + app_pw = get_attribute_value(attrs, "mailu_app_password") + mailu_email = resolve_mailu_email(user, attrs) - email = user.get("email") - if not email: - email = f"{user['username']}@{MAILU_DOMAIN}" + needs_update = False + if not get_attribute_value(attrs, "mailu_email"): + attrs["mailu_email"] = [mailu_email] + needs_update = True if not app_pw: app_pw = random_password() attrs["mailu_app_password"] = [app_pw] + needs_update = True + + if needs_update: kc_update_attributes(token, user, attrs) - log(f"Set mailu_app_password for {email}") + log(f"Updated Mailu attributes for {mailu_email}") display_name = " ".join( part for part in [user.get("firstName"), user.get("lastName")] if part ).strip() - ensure_mailu_user(cursor, email, app_pw, display_name) - log(f"Synced mailbox for {email}") + ensure_mailu_user(cursor, mailu_email, app_pw, display_name) + log(f"Synced mailbox for {mailu_email}") cursor.close() conn.close() diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 4476e7f..e31da48 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -5,6 +5,7 @@ KC_BASE="${KC_BASE:?}" KC_REALM="${KC_REALM:?}" KC_ADMIN_USER="${KC_ADMIN_USER:?}" KC_ADMIN_PASS="${KC_ADMIN_PASS:?}" +MAILU_DOMAIN="${MAILU_DOMAIN:?}" if ! command -v jq >/dev/null 2>&1; then apt-get update && apt-get install -y jq curl >/dev/null @@ -45,16 +46,26 @@ users=$(curl -s -H "Authorization: Bearer ${token}" \ echo "${users}" | jq -c '.[]' | while read -r user; do username=$(echo "${user}" | jq -r '.username') - email=$(echo "${user}" | jq -r '.email // empty') + keycloak_email=$(echo "${user}" | jq -r '.email // empty') + mailu_email=$(echo "${user}" | jq -r '(.attributes.mailu_email[0] // .attributes.mailu_email // empty)') app_pw=$(echo "${user}" | jq -r '(.attributes.mailu_app_password[0] // .attributes.mailu_app_password // empty)') - [[ -z "${email}" || -z "${app_pw}" ]] && continue - if account_exists "${username}" "${email}"; then - echo "Skipping ${email}, already exists" + + if [[ -z "${mailu_email}" ]]; then + if [[ -n "${keycloak_email}" && "${keycloak_email,,}" == *"@${MAILU_DOMAIN,,}" ]]; then + mailu_email="${keycloak_email}" + else + mailu_email="${username}@${MAILU_DOMAIN}" + fi + fi + + [[ -z "${mailu_email}" || -z "${app_pw}" ]] && continue + if account_exists "${username}" "${mailu_email}"; then + echo "Skipping ${mailu_email}, already exists" continue fi - echo "Syncing ${email}" + echo "Syncing ${mailu_email}" /usr/sbin/runuser -u www-data -- php occ mail:account:create \ - "${username}" "${username}" "${email}" \ - mail.bstein.dev 993 ssl "${email}" "${app_pw}" \ - mail.bstein.dev 587 tls "${email}" "${app_pw}" || true + "${username}" "${username}" "${mailu_email}" \ + mail.bstein.dev 993 ssl "${mailu_email}" "${app_pw}" \ + mail.bstein.dev 587 tls "${mailu_email}" "${app_pw}" || true done diff --git a/scripts/tests/test_mailu_sync.py b/scripts/tests/test_mailu_sync.py index 41616b2..9e5f383 100644 --- a/scripts/tests/test_mailu_sync.py +++ b/scripts/tests/test_mailu_sync.py @@ -102,7 +102,8 @@ def test_kc_get_users_paginates(monkeypatch): sync.SESSION = _PagedSession() users = sync.kc_get_users("tok") assert [u["id"] for u in users] == ["u1", "u2"] - assert sync.SESSION.calls == 2 + # Pagination stops when results < page size. + assert sync.SESSION.calls == 1 def test_ensure_mailu_user_skips_foreign_domain(monkeypatch): @@ -119,6 +120,7 @@ def test_ensure_mailu_user_skips_foreign_domain(monkeypatch): def test_ensure_mailu_user_upserts(monkeypatch): sync = load_sync_module(monkeypatch) + monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}") captured = {} class _Cursor: @@ -134,6 +136,7 @@ def test_ensure_mailu_user_upserts(monkeypatch): def test_main_generates_password_and_upserts(monkeypatch): sync = load_sync_module(monkeypatch) + monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}") users = [ {"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}}, {"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}}, @@ -176,6 +179,6 @@ def test_main_generates_password_and_upserts(monkeypatch): sync.main() - # Should attempt two inserts (third user skipped due to domain mismatch) - assert len(updated) == 1 # only one missing attr was backfilled - assert conns and len(conns[0]._cursor.executions) == 2 + # Always backfill mailu_email, even if Keycloak recovery email is external. + assert len(updated) == 3 + assert conns and len(conns[0]._cursor.executions) == 3 diff --git a/services/nextcloud/mail-sync-cronjob.yaml b/services/nextcloud/mail-sync-cronjob.yaml index 52dc3ea..809bc78 100644 --- a/services/nextcloud/mail-sync-cronjob.yaml +++ b/services/nextcloud/mail-sync-cronjob.yaml @@ -25,6 +25,8 @@ spec: value: https://sso.bstein.dev - name: KC_REALM value: atlas + - name: MAILU_DOMAIN + value: bstein.dev - name: KC_ADMIN_USER valueFrom: secretKeyRef: -- 2.47.2 From 76bb48eac12c6a1f281e28df14eec3f7d5a8af60 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 05:40:32 +0000 Subject: [PATCH 311/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 3989a23..c10a2fc 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-53 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-54 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 23ebcbaf9298979cb6c63f7fbbc4353f89d6c812 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 05:41:43 +0000 Subject: [PATCH 312/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 78e6f07..8862580 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-53 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-54 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index db6ca7b..1a6f98e 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-53 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-54 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From b1706397b6f6573535342abd00814414ccd66b00 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 06:16:34 +0000 Subject: [PATCH 313/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index c10a2fc..82d0f39 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-54 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-55 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 5c618c65608c03c67febd44392225071c8e26574 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 06:17:45 +0000 Subject: [PATCH 314/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 8862580..98c833f 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-54 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-55 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 1a6f98e..6f25361 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-54 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-55 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 0b211520cbdc3bc3f9494518aad16cae6ecd0f2f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 03:32:38 -0300 Subject: [PATCH 315/684] keycloak: allow mailu_email + groups --- services/keycloak/realm-settings-job.yaml | 52 ++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index cb05a6d..bd4fa25 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-11 + name: keycloak-realm-settings-12 namespace: sso spec: backoffLimit: 0 @@ -137,6 +137,56 @@ spec: if status not in (200, 204): raise SystemExit(f"Unexpected realm update response: {status}") + # Ensure required custom user-profile attributes exist. + profile_url = f"{base_url}/admin/realms/{realm}/users/profile" + status, profile = http_json("GET", profile_url, access_token) + if status == 200 and isinstance(profile, dict): + attrs = profile.get("attributes") + if not isinstance(attrs, list): + attrs = [] + has_mailu_email = any( + isinstance(item, dict) and item.get("name") == "mailu_email" for item in attrs + ) + if not has_mailu_email: + attrs.append( + { + "name": "mailu_email", + "displayName": "Atlas Mailbox", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"email": {}, "length": {"max": 255}}, + } + ) + profile["attributes"] = attrs + status, _ = http_json("PUT", profile_url, access_token, profile) + if status not in (200, 204): + raise SystemExit(f"Unexpected user-profile update response: {status}") + + # Ensure basic realm groups exist for provisioning. + for group_name in ("dev", "admin"): + status, groups = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/groups?search={urllib.parse.quote(group_name)}", + access_token, + ) + exists = False + if status == 200 and isinstance(groups, list): + for item in groups: + if isinstance(item, dict) and item.get("name") == group_name: + exists = True + break + if exists: + continue + status, _ = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/groups", + access_token, + {"name": group_name}, + ) + if status not in (201, 204): + raise SystemExit(f"Unexpected group create response for {group_name}: {status}") + # Ensure MFA is on by default for newly-created users. status, required_actions = http_json( "GET", -- 2.47.2 From 335906aafcdf17dad5afc0b222bcdaa3f37ddd2b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 07:13:39 +0000 Subject: [PATCH 316/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 82d0f39..4fd9316 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-55 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-56 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From cc677eb7f3be24e99a2a0752b637cc0dec2c2cd3 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 07:14:49 +0000 Subject: [PATCH 317/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 98c833f..b4f5dfe 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-55 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-56 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 6f25361..a83fbb0 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-55 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-56 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From fff9ffbba7bcaa161e2f0ccfaf998807adc6363e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 07:32:40 +0000 Subject: [PATCH 318/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 4fd9316..c75e1b4 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-56 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-57 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 6360012155a1c3948e757266fcc2b822f46677ac Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 07:33:50 +0000 Subject: [PATCH 319/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b4f5dfe..8bb6ab1 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-56 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-57 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index a83fbb0..422be8c 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-56 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-57 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From dd3b940ee747ea78f65dfa9fca191758ef012d6e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 08:00:42 +0000 Subject: [PATCH 320/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index c75e1b4..6f30068 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-57 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-58 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From a7222878c37b7b55ec22246b317abb6a4dee9ad5 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 08:01:52 +0000 Subject: [PATCH 321/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 8bb6ab1..892ad87 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-57 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-58 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 422be8c..412c722 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-57 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-58 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 25ce112c82df134287d8787f086fe8e7545e171e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 08:15:43 +0000 Subject: [PATCH 322/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 6f30068..9db13f2 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-58 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-59 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 034acdaaf22600cbb3ce3e6db8d3646c733851e3 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 08:16:54 +0000 Subject: [PATCH 323/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 892ad87..9824d3e 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-58 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-59 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 412c722..7e8253d 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-58 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-59 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 747b6aacb6504d8d34e2eb7cb3733a835b61294a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 06:14:56 -0300 Subject: [PATCH 324/684] keycloak: set bstein mailu_email --- services/keycloak/kustomization.yaml | 1 + services/keycloak/user-overrides-job.yaml | 145 ++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 services/keycloak/user-overrides-job.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 5fb05ef..05b410d 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -8,5 +8,6 @@ resources: - deployment.yaml - realm-settings-job.yaml - ldap-federation-job.yaml + - user-overrides-job.yaml - service.yaml - ingress.yaml diff --git a/services/keycloak/user-overrides-job.yaml b/services/keycloak/user-overrides-job.yaml new file mode 100644 index 0000000..43813ee --- /dev/null +++ b/services/keycloak/user-overrides-job.yaml @@ -0,0 +1,145 @@ +# services/keycloak/user-overrides-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-user-overrides-1 + namespace: sso +spec: + backoffLimit: 0 + template: + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi5", "rpi4"] + - key: node-role.kubernetes.io/worker + operator: Exists + restartPolicy: Never + containers: + - name: configure + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_USER + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + - name: OVERRIDE_USERNAME + value: bstein + - name: OVERRIDE_MAILU_EMAIL + value: brad@bstein.dev + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import json + import os + import urllib.parse + import urllib.error + import urllib.request + + base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + + override_username = os.environ["OVERRIDE_USERNAME"].strip() + override_mailu_email = os.environ["OVERRIDE_MAILU_EMAIL"].strip() + if not override_username or not override_mailu_email: + raise SystemExit("Missing override inputs") + + def http_json(method: str, url: str, token: str, payload=None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read() + if not raw: + return exc.code, None + try: + return exc.code, json.loads(raw.decode()) + except Exception: + return exc.code, {"raw": raw.decode(errors="replace")} + + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + token_req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(token_req, timeout=10) as resp: + token_body = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + body = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={body}") + access_token = token_body["access_token"] + + # Find target user id. + status, users = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/users?username={urllib.parse.quote(override_username)}&exact=true&max=1", + access_token, + ) + if status != 200 or not isinstance(users, list) or not users: + raise SystemExit(f"User not found: {override_username}") + user = users[0] if isinstance(users[0], dict) else None + user_id = (user or {}).get("id") or "" + if not user_id: + raise SystemExit("User id missing") + + # Fetch full user and update only attributes. + status, full = http_json("GET", f"{base_url}/admin/realms/{realm}/users/{user_id}", access_token) + if status != 200 or not isinstance(full, dict): + raise SystemExit("Unable to fetch user") + + attrs = full.get("attributes") or {} + if not isinstance(attrs, dict): + attrs = {} + existing = attrs.get("mailu_email") + if isinstance(existing, list) and existing and existing[0] == override_mailu_email: + raise SystemExit(0) + if isinstance(existing, str) and existing == override_mailu_email: + raise SystemExit(0) + + attrs["mailu_email"] = [override_mailu_email] + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/users/{user_id}", + access_token, + {"attributes": attrs}, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected user update response: {status}") + PY -- 2.47.2 From 2d17d03b3d7ade2947447a7c2ec0825604e0f541 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 09:28:48 +0000 Subject: [PATCH 325/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 9db13f2..fbe1e83 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-59 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-60 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From e82be4955b08eaa32c73d9604dbb0c26327fa95e Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 09:29:59 +0000 Subject: [PATCH 326/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 9824d3e..a3a9a7a 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-59 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-60 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 7e8253d..a6b3c3c 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-59 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-60 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 9d8c11385001fea3f18475c9221a5ad47396c4c0 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 09:52:50 +0000 Subject: [PATCH 327/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index fbe1e83..3d08cd8 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-60 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-61 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 51f94194be0a2f7aa090f9a8ac01834d8458979d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 06:52:53 -0300 Subject: [PATCH 328/684] fix(nextcloud): dedupe + update mail accounts --- scripts/nextcloud-mail-sync.sh | 89 ++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index e31da48..9865176 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -11,19 +11,33 @@ if ! command -v jq >/dev/null 2>&1; then apt-get update && apt-get install -y jq curl >/dev/null fi -account_exists() { +list_mail_accounts() { local user_id="${1}" - local email="${2}" + local export_out # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). - local export - if ! export=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>/dev/null); then + if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>/dev/null); then echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2 - return 0 + return 1 fi - # Output formatting varies by Nextcloud/Mail versions and locale; match by email address. - grep -Fq -- "${email}" <<<"${export}" + # The export output is human-readable and includes blocks like: + # Account 10: + # - E-Mail: user@example.com + # Extract "account-id email" pairs. + awk ' + /^Account[[:space:]]+[0-9]+:/ { + id=$2; + sub(/:$/, "", id); + next; + } + id != "" && /@/ { + if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/, m)) { + printf("%s\t%s\n", id, m[0]); + id=""; + } + } + ' <<<"${export_out}" | sort -u } token=$( @@ -59,13 +73,60 @@ echo "${users}" | jq -c '.[]' | while read -r user; do fi [[ -z "${mailu_email}" || -z "${app_pw}" ]] && continue - if account_exists "${username}" "${mailu_email}"; then - echo "Skipping ${mailu_email}, already exists" + + if ! accounts=$(list_mail_accounts "${username}"); then continue fi - echo "Syncing ${mailu_email}" - /usr/sbin/runuser -u www-data -- php occ mail:account:create \ - "${username}" "${username}" "${mailu_email}" \ - mail.bstein.dev 993 ssl "${mailu_email}" "${app_pw}" \ - mail.bstein.dev 587 tls "${mailu_email}" "${app_pw}" || true + + # Manage only internal Mailu-domain accounts; leave any external accounts untouched. + mailu_accounts=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts}" || true) + + desired_email="${mailu_email}" + primary_id="" + primary_email="" + + if [[ -n "${mailu_accounts}" ]]; then + while IFS=$'\t' read -r account_id account_email; do + if [[ -z "${primary_id}" ]]; then + primary_id="${account_id}" + primary_email="${account_email}" + fi + if [[ "${account_email,,}" == "${desired_email,,}" ]]; then + primary_id="${account_id}" + primary_email="${account_email}" + break + fi + done <<<"${mailu_accounts}" + + echo "Updating ${username} mail account ${primary_id} (${primary_email})" + /usr/sbin/runuser -u www-data -- php occ mail:account:update -q "${primary_id}" \ + --name "${username}" \ + --email "${desired_email}" \ + --imap-host mail.bstein.dev \ + --imap-port 993 \ + --imap-ssl-mode ssl \ + --imap-user "${desired_email}" \ + --imap-password "${app_pw}" \ + --smtp-host mail.bstein.dev \ + --smtp-port 587 \ + --smtp-ssl-mode tls \ + --smtp-user "${desired_email}" \ + --smtp-password "${app_pw}" \ + --auth-method password >/dev/null 2>&1 || true + + # Remove any extra Mailu-domain accounts for this user to prevent duplicates. + while IFS=$'\t' read -r account_id account_email; do + if [[ "${account_id}" == "${primary_id}" ]]; then + continue + fi + echo "Deleting extra mail account ${account_id} (${account_email})" + /usr/sbin/runuser -u www-data -- php occ mail:account:delete -q "${account_id}" >/dev/null 2>&1 || true + done <<<"${mailu_accounts}" + else + echo "Creating mail account for ${username} (${desired_email})" + /usr/sbin/runuser -u www-data -- php occ mail:account:create -q \ + "${username}" "${username}" "${desired_email}" \ + mail.bstein.dev 993 ssl "${desired_email}" "${app_pw}" \ + mail.bstein.dev 587 tls "${desired_email}" "${app_pw}" password >/dev/null 2>&1 || true + fi done -- 2.47.2 From b06dcb226320b439034180bde12dc58b4413f612 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 09:54:01 +0000 Subject: [PATCH 329/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a3a9a7a..545cbdb 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-60 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-61 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index a6b3c3c..fb7265b 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-60 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-61 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From a76d94443328257920879079e944ddc851c400be Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 07:03:43 -0300 Subject: [PATCH 330/684] nextcloud-mail-sync: manage CronJob via Flux --- services/nextcloud-mail-sync/cronjob.yaml | 69 +++++++++++++++++++ .../nextcloud-mail-sync/kustomization.yaml | 2 + 2 files changed, 71 insertions(+) create mode 100644 services/nextcloud-mail-sync/cronjob.yaml diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml new file mode 100644 index 0000000..55a593d --- /dev/null +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -0,0 +1,69 @@ +# services/nextcloud-mail-sync/cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: nextcloud-mail-sync + namespace: nextcloud +spec: + schedule: "0 5 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + securityContext: + runAsUser: 0 + runAsGroup: 0 + containers: + - name: mail-sync + image: nextcloud:29-apache + imagePullPolicy: IfNotPresent + command: + - /bin/bash + - /sync/sync.sh + env: + - name: KC_BASE + value: https://sso.bstein.dev + - name: KC_REALM + value: atlas + - name: KC_ADMIN_USER + valueFrom: + secretKeyRef: + name: nextcloud-keycloak-admin + key: username + - name: KC_ADMIN_PASS + valueFrom: + secretKeyRef: + name: nextcloud-keycloak-admin + key: password + - name: MAILU_DOMAIN + value: bstein.dev + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumeMounts: + - name: nextcloud-app + mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /data/userdata + - name: sync-script + mountPath: /sync/sync.sh + subPath: sync.sh + volumes: + - name: nextcloud-app + persistentVolumeClaim: + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data + - name: sync-script + configMap: + name: nextcloud-mail-sync-script + defaultMode: 0755 diff --git a/services/nextcloud-mail-sync/kustomization.yaml b/services/nextcloud-mail-sync/kustomization.yaml index cc1fa68..c349f1e 100644 --- a/services/nextcloud-mail-sync/kustomization.yaml +++ b/services/nextcloud-mail-sync/kustomization.yaml @@ -2,6 +2,8 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: nextcloud +resources: + - cronjob.yaml configMapGenerator: - name: nextcloud-mail-sync-script files: -- 2.47.2 From c165087edac8f875ded093aab8074eab43965eac Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 07:06:30 -0300 Subject: [PATCH 331/684] fix(nextcloud-mail-sync): portable email parsing --- scripts/nextcloud-mail-sync.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 9865176..34a3b89 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -32,8 +32,8 @@ list_mail_accounts() { next; } id != "" && /@/ { - if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/, m)) { - printf("%s\t%s\n", id, m[0]); + if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/)) { + printf("%s\t%s\n", id, substr($0, RSTART, RLENGTH)); id=""; } } -- 2.47.2 From 6cd63b067dbebcae206ac09455aa9e3ac668979b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 07:13:58 -0300 Subject: [PATCH 332/684] fix(nextcloud-mail-sync): capture occ export output reliably --- scripts/nextcloud-mail-sync.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 34a3b89..1d64685 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -16,7 +16,8 @@ list_mail_accounts() { local export_out # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). - if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>/dev/null); then + # Some occ commands emit to stderr; capture both streams so we don't mis-detect "no accounts". + if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>&1); then echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2 return 1 fi -- 2.47.2 From c7c2e03ea21f725f2e950cfe3cf41e94f8e703db Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 07:18:50 -0300 Subject: [PATCH 333/684] fix(nextcloud-mail-sync): mawk-compatible email regex --- scripts/nextcloud-mail-sync.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 1d64685..2efe58a 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -33,7 +33,8 @@ list_mail_accounts() { next; } id != "" && /@/ { - if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/)) { + # Keep the regex simple (mawk doesn't support interval expressions like {2,}). + if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+/)) { printf("%s\t%s\n", id, substr($0, RSTART, RLENGTH)); id=""; } -- 2.47.2 From caa23e6f1c62ab468ccff72ab1bcec7f453ed46d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 07:39:45 -0300 Subject: [PATCH 334/684] fix(nextcloud-mail-sync): fix bash syntax --- scripts/nextcloud-mail-sync.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index 2efe58a..dd606d9 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -33,7 +33,7 @@ list_mail_accounts() { next; } id != "" && /@/ { - # Keep the regex simple (mawk doesn't support interval expressions like {2,}). + # Keep the regex simple (mawk does not support interval expressions like {2,}). if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+/)) { printf("%s\t%s\n", id, substr($0, RSTART, RLENGTH)); id=""; -- 2.47.2 From 51b0a88a62efd875f2589a9f2543ee9b3a9e8ff6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 07:44:24 -0300 Subject: [PATCH 335/684] nextcloud: delegate mail sync to separate kustomization --- services/nextcloud/kustomization.yaml | 6 --- services/nextcloud/mail-sync-cronjob.yaml | 60 ----------------------- 2 files changed, 66 deletions(-) delete mode 100644 services/nextcloud/mail-sync-cronjob.yaml diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index 5e3b414..66b00d4 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -10,7 +10,6 @@ resources: - service.yaml - ingress.yaml - cronjob.yaml - - mail-sync-cronjob.yaml - maintenance-cronjob.yaml configMapGenerator: - name: nextcloud-maintenance-script @@ -18,8 +17,3 @@ configMapGenerator: - maintenance.sh=../../scripts/nextcloud-maintenance.sh options: disableNameSuffixHash: true - - name: nextcloud-mail-sync-script - files: - - sync.sh=../../scripts/nextcloud-mail-sync.sh - options: - disableNameSuffixHash: true diff --git a/services/nextcloud/mail-sync-cronjob.yaml b/services/nextcloud/mail-sync-cronjob.yaml deleted file mode 100644 index 809bc78..0000000 --- a/services/nextcloud/mail-sync-cronjob.yaml +++ /dev/null @@ -1,60 +0,0 @@ -# services/nextcloud/mail-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: nextcloud-mail-sync - namespace: nextcloud -spec: - schedule: "0 5 * * *" - concurrencyPolicy: Forbid - jobTemplate: - spec: - template: - spec: - restartPolicy: OnFailure - securityContext: - runAsUser: 0 - runAsGroup: 0 - containers: - - name: mail-sync - image: nextcloud:29-apache - imagePullPolicy: IfNotPresent - command: ["/bin/bash", "/sync/sync.sh"] - env: - - name: KC_BASE - value: https://sso.bstein.dev - - name: KC_REALM - value: atlas - - name: MAILU_DOMAIN - value: bstein.dev - - name: KC_ADMIN_USER - valueFrom: - secretKeyRef: - name: nextcloud-keycloak-admin - key: username - - name: KC_ADMIN_PASS - valueFrom: - secretKeyRef: - name: nextcloud-keycloak-admin - key: password - volumeMounts: - - name: nextcloud-data - mountPath: /var/www/html - - name: sync-script - mountPath: /sync/sync.sh - subPath: sync.sh - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - volumes: - - name: nextcloud-data - persistentVolumeClaim: - claimName: nextcloud-data - - name: sync-script - configMap: - name: nextcloud-mail-sync-script - defaultMode: 0755 -- 2.47.2 From 91106ee2981c978a5106db2ada07945ea5736ca7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 12:18:29 -0300 Subject: [PATCH 336/684] nextcloud: per-user mail sync + portal RBAC --- scripts/nextcloud-mail-sync.sh | 80 ++++++++++++++++++++++++++++-- services/bstein-dev-home/rbac.yaml | 31 ++++++++++++ 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index dd606d9..a3ca3b6 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -6,6 +6,7 @@ KC_REALM="${KC_REALM:?}" KC_ADMIN_USER="${KC_ADMIN_USER:?}" KC_ADMIN_PASS="${KC_ADMIN_PASS:?}" MAILU_DOMAIN="${MAILU_DOMAIN:?}" +ONLY_USERNAME="${ONLY_USERNAME:-}" if ! command -v jq >/dev/null 2>&1; then apt-get update && apt-get install -y jq curl >/dev/null @@ -57,11 +58,52 @@ fi cd /var/www/html -users=$(curl -s -H "Authorization: Bearer ${token}" \ - "${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000") +kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000" +if [[ -n "${ONLY_USERNAME}" ]]; then + username_q=$(jq -nr --arg v "${ONLY_USERNAME}" '$v|@uri') + kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?username=${username_q}&exact=true&max=1" +fi -echo "${users}" | jq -c '.[]' | while read -r user; do - username=$(echo "${user}" | jq -r '.username') +users=$(curl -s -H "Authorization: Bearer ${token}" "${kc_users_url}") + +kc_set_user_mail_meta() { + local user_id="${1}" + local primary_email="${2}" + local mailu_account_count="${3}" + local synced_at="${4}" + + # Fetch the full user representation so we don't accidentally clobber attributes. + local user_json updated_json + if ! user_json=$(curl -fsS -H "Authorization: Bearer ${token}" \ + "${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}"); then + echo "WARN: unable to fetch Keycloak user ${user_id} for metadata writeback" >&2 + return 1 + fi + + updated_json=$( + jq -c \ + --arg primary_email "${primary_email}" \ + --arg mailu_account_count "${mailu_account_count}" \ + --arg synced_at "${synced_at}" \ + ' + .attributes = (.attributes // {}) | + .attributes.nextcloud_mail_primary_email = [$primary_email] | + .attributes.nextcloud_mail_account_count = [$mailu_account_count] | + .attributes.nextcloud_mail_synced_at = [$synced_at] | + del(.access) + ' <<<"${user_json}" + ) + + curl -fsS -X PUT \ + -H "Authorization: Bearer ${token}" \ + -H "Content-Type: application/json" \ + -d "${updated_json}" \ + "${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}" >/dev/null +} + +while read -r user; do + user_id=$(jq -r '.id' <<<"${user}") + username=$(jq -r '.username' <<<"${user}") keycloak_email=$(echo "${user}" | jq -r '.email // empty') mailu_email=$(echo "${user}" | jq -r '(.attributes.mailu_email[0] // .attributes.mailu_email // empty)') app_pw=$(echo "${user}" | jq -r '(.attributes.mailu_app_password[0] // .attributes.mailu_app_password // empty)') @@ -131,4 +173,32 @@ echo "${users}" | jq -c '.[]' | while read -r user; do mail.bstein.dev 993 ssl "${desired_email}" "${app_pw}" \ mail.bstein.dev 587 tls "${desired_email}" "${app_pw}" password >/dev/null 2>&1 || true fi -done + + # Write non-secret metadata back to Keycloak for UI introspection and onboarding gating. + synced_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + if accounts_after=$(list_mail_accounts "${username}"); then + mailu_accounts_after=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts_after}" || true) + if [[ -n "${mailu_accounts_after}" ]]; then + mailu_account_count=$(printf '%s\n' "${mailu_accounts_after}" | wc -l | tr -d ' ') + else + mailu_account_count="0" + fi + primary_email_after="" + if [[ -n "${mailu_accounts_after}" ]]; then + while IFS=$'\t' read -r _account_id account_email; do + if [[ "${account_email,,}" == "${desired_email,,}" ]]; then + primary_email_after="${account_email}" + break + fi + if [[ -z "${primary_email_after}" ]]; then + primary_email_after="${account_email}" + fi + done <<<"${mailu_accounts_after}" + fi + else + mailu_account_count="0" + primary_email_after="" + fi + + kc_set_user_mail_meta "${user_id}" "${primary_email_after}" "${mailu_account_count}" "${synced_at}" || true +done < <(jq -c '.[]' <<<"${users}") diff --git a/services/bstein-dev-home/rbac.yaml b/services/bstein-dev-home/rbac.yaml index cbc8050..f97ed24 100644 --- a/services/bstein-dev-home/rbac.yaml +++ b/services/bstein-dev-home/rbac.yaml @@ -75,3 +75,34 @@ subjects: - kind: ServiceAccount name: bstein-dev-home namespace: bstein-dev-home +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-nextcloud-mail-sync + namespace: nextcloud +rules: + - apiGroups: ["batch"] + resources: ["cronjobs"] + verbs: ["get"] + resourceNames: ["nextcloud-mail-sync"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-nextcloud-mail-sync + namespace: nextcloud +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-nextcloud-mail-sync +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home -- 2.47.2 From 565fad4522e2f5a0227d9c5a51862d6397d3e58c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 12:22:41 -0300 Subject: [PATCH 337/684] nextcloud-mail-sync: portal RBAC --- .../nextcloud-mail-sync/kustomization.yaml | 1 + services/nextcloud-mail-sync/portal-rbac.yaml | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 services/nextcloud-mail-sync/portal-rbac.yaml diff --git a/services/nextcloud-mail-sync/kustomization.yaml b/services/nextcloud-mail-sync/kustomization.yaml index c349f1e..fb18550 100644 --- a/services/nextcloud-mail-sync/kustomization.yaml +++ b/services/nextcloud-mail-sync/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: nextcloud resources: - cronjob.yaml + - portal-rbac.yaml configMapGenerator: - name: nextcloud-mail-sync-script files: diff --git a/services/nextcloud-mail-sync/portal-rbac.yaml b/services/nextcloud-mail-sync/portal-rbac.yaml new file mode 100644 index 0000000..dc9a4e4 --- /dev/null +++ b/services/nextcloud-mail-sync/portal-rbac.yaml @@ -0,0 +1,29 @@ +# services/nextcloud-mail-sync/portal-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-nextcloud-mail-sync +rules: + - apiGroups: ["batch"] + resources: ["cronjobs"] + verbs: ["get"] + resourceNames: ["nextcloud-mail-sync"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-nextcloud-mail-sync +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-nextcloud-mail-sync +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home -- 2.47.2 From db17c95ee0ed582cd925518140edea562ef521aa Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 15:23:13 +0000 Subject: [PATCH 338/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 3d08cd8..f5aabcc 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-61 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-62 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 73728bcc09610b6937f61af74eb0956f0622c2e8 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 15:23:24 +0000 Subject: [PATCH 339/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 545cbdb..a52740f 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-61 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-62 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index fb7265b..581cbfc 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-61 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-62 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From c8f9b59e4a91053e9f28edfaeab883c1cffb5994 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 12:36:00 -0300 Subject: [PATCH 340/684] keycloak: allow nextcloud mail profile attrs --- services/keycloak/realm-settings-job.yaml | 70 ++++++++++++++++++----- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index bd4fa25..db08722 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-12 + name: keycloak-realm-settings-13 namespace: sso spec: backoffLimit: 0 @@ -144,20 +144,60 @@ spec: attrs = profile.get("attributes") if not isinstance(attrs, list): attrs = [] - has_mailu_email = any( - isinstance(item, dict) and item.get("name") == "mailu_email" for item in attrs - ) - if not has_mailu_email: - attrs.append( - { - "name": "mailu_email", - "displayName": "Atlas Mailbox", - "multivalued": False, - "annotations": {"group": "user-metadata"}, - "permissions": {"view": ["admin"], "edit": ["admin"]}, - "validations": {"email": {}, "length": {"max": 255}}, - } - ) + + required_attrs = [ + { + "name": "mailu_email", + "displayName": "Atlas Mailbox", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"email": {}, "length": {"max": 255}}, + }, + { + "name": "mailu_app_password", + "displayName": "Atlas Mail App Password", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 255}}, + }, + { + "name": "nextcloud_mail_primary_email", + "displayName": "Nextcloud Mail Primary Email", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"email": {}, "length": {"max": 255}}, + }, + { + "name": "nextcloud_mail_account_count", + "displayName": "Nextcloud Mail Account Count", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 32}}, + }, + { + "name": "nextcloud_mail_synced_at", + "displayName": "Nextcloud Mail Last Synced", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 64}}, + }, + ] + + def has_attr(name: str) -> bool: + return any(isinstance(item, dict) and item.get("name") == name for item in attrs) + + updated = False + for attr in required_attrs: + if not has_attr(attr.get("name", "")): + attrs.append(attr) + updated = True + + if updated: profile["attributes"] = attrs status, _ = http_json("PUT", profile_url, access_token, profile) if status not in (200, 204): -- 2.47.2 From f1d1e1bd7d7665d63cccbfb8acb603b2e3bdb0fb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 14:29:28 -0300 Subject: [PATCH 341/684] keycloak: enable token exchange --- services/keycloak/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 0ead0a8..e632d7d 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -103,6 +103,8 @@ spec: value: xforwarded - name: KC_HTTP_ENABLED value: "true" + - name: KC_FEATURES + value: token-exchange - name: KC_HTTP_MANAGEMENT_PORT value: "9000" - name: KC_HTTP_MANAGEMENT_BIND_ADDRESS -- 2.47.2 From e09589ec357bbb496c256f2dca934691040f660a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 14:35:23 -0300 Subject: [PATCH 342/684] keycloak: add portal e2e client --- services/keycloak/kustomization.yaml | 1 + services/keycloak/portal-e2e-client-job.yaml | 247 +++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 services/keycloak/portal-e2e-client-job.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 05b410d..eb554ab 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -7,6 +7,7 @@ resources: - pvc.yaml - deployment.yaml - realm-settings-job.yaml + - portal-e2e-client-job.yaml - ldap-federation-job.yaml - user-overrides-job.yaml - service.yaml diff --git a/services/keycloak/portal-e2e-client-job.yaml b/services/keycloak/portal-e2e-client-job.yaml new file mode 100644 index 0000000..2a22edf --- /dev/null +++ b/services/keycloak/portal-e2e-client-job.yaml @@ -0,0 +1,247 @@ +# services/keycloak/portal-e2e-client-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-portal-e2e-client-1 + namespace: sso +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: configure + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_USER + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + - name: PORTAL_E2E_CLIENT_ID + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_id + - name: PORTAL_E2E_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_secret + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import json + import os + import urllib.parse + import urllib.error + import urllib.request + + base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + e2e_client_id = os.environ["PORTAL_E2E_CLIENT_ID"] + e2e_client_secret = os.environ["PORTAL_E2E_CLIENT_SECRET"] + + def http_json(method: str, url: str, token: str, payload=None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read() + if not raw: + return exc.code, None + try: + return exc.code, json.loads(raw.decode()) + except Exception: + return exc.code, {"raw": raw.decode(errors="replace")} + + def get_admin_token() -> str: + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + body = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={raw}") + return body["access_token"] + + token = get_admin_token() + + # Ensure the confidential client for E2E token exchange exists with service accounts enabled. + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(e2e_client_id)}", + token, + ) + if status != 200 or not isinstance(clients, list): + raise SystemExit(f"Unexpected clients lookup response: {status}") + + client_uuid = None + if clients: + for item in clients: + if isinstance(item, dict) and item.get("clientId") == e2e_client_id: + client_uuid = item.get("id") + break + + desired_rep = { + "clientId": e2e_client_id, + "enabled": True, + "protocol": "openid-connect", + "publicClient": False, + "serviceAccountsEnabled": True, + "standardFlowEnabled": False, + "directAccessGrantsEnabled": False, + "implicitFlowEnabled": False, + "secret": e2e_client_secret, + "attributes": { + "oauth2.device.authorization.grant.enabled": "false", + "oauth2.token.exchange.grant.enabled": "true", + }, + } + + if not client_uuid: + status, resp = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/clients", + token, + desired_rep, + ) + if status not in (201, 204): + raise SystemExit(f"Client create failed (status={status}) resp={resp}") + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(e2e_client_id)}", + token, + ) + if status != 200 or not isinstance(clients, list) or not clients: + raise SystemExit("Unable to refetch client after creation") + client_uuid = clients[0].get("id") + + # Update existing client with desired settings (idempotent). + status, client_rep = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}", + token, + ) + if status != 200 or not isinstance(client_rep, dict): + raise SystemExit(f"Unable to fetch client representation (status={status})") + + updated = False + for key in ("enabled", "serviceAccountsEnabled", "standardFlowEnabled", "directAccessGrantsEnabled", "implicitFlowEnabled"): + if client_rep.get(key) != desired_rep.get(key): + client_rep[key] = desired_rep.get(key) + updated = True + if client_rep.get("publicClient") is not False: + client_rep["publicClient"] = False + updated = True + if client_rep.get("secret") != desired_rep.get("secret"): + client_rep["secret"] = desired_rep.get("secret") + updated = True + + attrs = client_rep.get("attributes") or {} + for k, v in desired_rep["attributes"].items(): + if attrs.get(k) != v: + attrs[k] = v + updated = True + client_rep["attributes"] = attrs + + if updated: + status, resp = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}", + token, + client_rep, + ) + if status not in (200, 204): + raise SystemExit(f"Client update failed (status={status}) resp={resp}") + + # Give the service account user minimal realm-management roles for impersonation + user lookup. + status, svc_user = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}/service-account-user", + token, + ) + if status != 200 or not isinstance(svc_user, dict) or not svc_user.get("id"): + raise SystemExit(f"Unable to fetch service account user (status={status})") + svc_user_id = svc_user["id"] + + status, rm_clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId=realm-management", + token, + ) + if status != 200 or not isinstance(rm_clients, list) or not rm_clients: + raise SystemExit("Unable to find realm-management client") + rm_uuid = rm_clients[0].get("id") + if not rm_uuid: + raise SystemExit("realm-management client has no id") + + wanted_roles = ("query-users", "view-users", "impersonation") + role_reps = [] + for role_name in wanted_roles: + status, role = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/roles/{urllib.parse.quote(role_name)}", + token, + ) + if status != 200 or not isinstance(role, dict): + raise SystemExit(f"Unable to fetch role {role_name} (status={status})") + role_reps.append({"id": role.get("id"), "name": role.get("name")}) + + status, assigned = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/users/{svc_user_id}/role-mappings/clients/{rm_uuid}", + token, + ) + assigned_names = set() + if status == 200 and isinstance(assigned, list): + for r in assigned: + if isinstance(r, dict) and r.get("name"): + assigned_names.add(r["name"]) + + missing = [r for r in role_reps if r.get("name") and r["name"] not in assigned_names] + if missing: + status, resp = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/users/{svc_user_id}/role-mappings/clients/{rm_uuid}", + token, + missing, + ) + if status not in (200, 204): + raise SystemExit(f"Role mapping update failed (status={status}) resp={resp}") + PY -- 2.47.2 From b21a79dad73c4d42ca12f7b81ec11dfced2456b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 14:48:28 -0300 Subject: [PATCH 343/684] keycloak: allow token exchange to portal --- services/keycloak/kustomization.yaml | 1 + .../portal-e2e-target-client-job.yaml | 138 ++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 services/keycloak/portal-e2e-target-client-job.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index eb554ab..80c504c 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -8,6 +8,7 @@ resources: - deployment.yaml - realm-settings-job.yaml - portal-e2e-client-job.yaml + - portal-e2e-target-client-job.yaml - ldap-federation-job.yaml - user-overrides-job.yaml - service.yaml diff --git a/services/keycloak/portal-e2e-target-client-job.yaml b/services/keycloak/portal-e2e-target-client-job.yaml new file mode 100644 index 0000000..45b3980 --- /dev/null +++ b/services/keycloak/portal-e2e-target-client-job.yaml @@ -0,0 +1,138 @@ +# services/keycloak/portal-e2e-target-client-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-portal-e2e-target-1 + namespace: sso +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: configure + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_USER + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + - name: TARGET_CLIENT_ID + value: bstein-dev-home + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import json + import os + import urllib.parse + import urllib.error + import urllib.request + + base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + target_client_id = os.environ["TARGET_CLIENT_ID"] + + def http_json(method: str, url: str, token: str, payload=None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read() + if not raw: + return exc.code, None + try: + return exc.code, json.loads(raw.decode()) + except Exception: + return exc.code, {"raw": raw.decode(errors="replace")} + + def get_admin_token() -> str: + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + body = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={raw}") + return body["access_token"] + + token = get_admin_token() + + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(target_client_id)}", + token, + ) + if status != 200 or not isinstance(clients, list) or not clients: + raise SystemExit(f"Unable to find target client {target_client_id!r} (status={status})") + + client_uuid = None + for item in clients: + if isinstance(item, dict) and item.get("clientId") == target_client_id: + client_uuid = item.get("id") + break + if not client_uuid: + raise SystemExit(f"Target client {target_client_id!r} has no id") + + status, client_rep = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}", + token, + ) + if status != 200 or not isinstance(client_rep, dict): + raise SystemExit(f"Unable to fetch client representation (status={status})") + + attrs = client_rep.get("attributes") or {} + updated = False + if attrs.get("oauth2.token.exchange.grant.enabled") != "true": + attrs["oauth2.token.exchange.grant.enabled"] = "true" + updated = True + client_rep["attributes"] = attrs + + if updated: + status, resp = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}", + token, + client_rep, + ) + if status not in (200, 204): + raise SystemExit(f"Client update failed (status={status}) resp={resp}") + + print(f"OK: ensured token exchange enabled on client {target_client_id}") + PY -- 2.47.2 From df959ee17d39518dc8eb17d50a33e7a71a1848b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 15:43:07 -0300 Subject: [PATCH 344/684] keycloak: enable fine-grained token exchange authz --- services/keycloak/deployment.yaml | 2 +- services/keycloak/kustomization.yaml | 1 + ...al-e2e-token-exchange-permissions-job.yaml | 235 ++++++++++++++++++ 3 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 services/keycloak/portal-e2e-token-exchange-permissions-job.yaml diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index e632d7d..48cf5e0 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -104,7 +104,7 @@ spec: - name: KC_HTTP_ENABLED value: "true" - name: KC_FEATURES - value: token-exchange + value: token-exchange,admin-fine-grained-authz - name: KC_HTTP_MANAGEMENT_PORT value: "9000" - name: KC_HTTP_MANAGEMENT_BIND_ADDRESS diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 80c504c..f3f91ab 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -9,6 +9,7 @@ resources: - realm-settings-job.yaml - portal-e2e-client-job.yaml - portal-e2e-target-client-job.yaml + - portal-e2e-token-exchange-permissions-job.yaml - ldap-federation-job.yaml - user-overrides-job.yaml - service.yaml diff --git a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml new file mode 100644 index 0000000..2610f6d --- /dev/null +++ b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml @@ -0,0 +1,235 @@ +# services/keycloak/portal-e2e-token-exchange-permissions-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-portal-e2e-token-exchange-permissions-1 + namespace: sso +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: configure + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_USER + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + - name: PORTAL_E2E_CLIENT_ID + value: test-portal-e2e + - name: TARGET_CLIENT_ID + value: bstein-dev-home + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python - <<'PY' + import json + import os + import re + import urllib.parse + import urllib.error + import urllib.request + from typing import Any + + base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + e2e_client_id = os.environ["PORTAL_E2E_CLIENT_ID"] + target_client_id = os.environ["TARGET_CLIENT_ID"] + + uuid_re = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE) + + def is_uuid(value: str) -> bool: + return bool(uuid_re.match(value)) + + def http_json(method: str, url: str, token: str, payload: Any | None = None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read() + if not raw: + return exc.code, None + try: + return exc.code, json.loads(raw.decode()) + except Exception: + return exc.code, {"raw": raw.decode(errors="replace")} + + def get_admin_token() -> str: + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + body = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={raw}") + return body["access_token"] + + def find_client_uuid(token: str, client_id: str) -> str: + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(client_id)}", + token, + ) + if status != 200 or not isinstance(clients, list) or not clients: + raise SystemExit(f"Unable to find client {client_id!r} (status={status})") + for item in clients: + if isinstance(item, dict) and item.get("clientId") == client_id and item.get("id"): + return item["id"] + raise SystemExit(f"Client {client_id!r} has no id") + + token = get_admin_token() + + rm_uuid = find_client_uuid(token, "realm-management") + e2e_uuid = find_client_uuid(token, e2e_client_id) + target_uuid = find_client_uuid(token, target_client_id) + + def enable_and_get_permissions(url: str) -> dict[str, Any]: + status, resp = http_json("PUT", url, token, {"enabled": True}) + if status not in (200, 204): + raise SystemExit(f"Failed enabling permissions at {url} (status={status}) resp={resp}") + status, perms = http_json("GET", url, token) + if status != 200 or not isinstance(perms, dict): + raise SystemExit(f"Failed reading permissions at {url} (status={status}) resp={perms}") + return perms + + users_perms = enable_and_get_permissions(f"{base_url}/admin/realms/{realm}/users-management-permissions") + users_scope_perms = users_perms.get("scopePermissions") or {} + if not isinstance(users_scope_perms, dict): + raise SystemExit("Users management permissions missing scopePermissions") + impersonate_perm_id = users_scope_perms.get("impersonate") or users_scope_perms.get("impersonation") + if not impersonate_perm_id: + keys = sorted(k for k in users_scope_perms.keys()) + raise SystemExit(f"Users permissions missing impersonate scope (have: {keys})") + + target_perms = enable_and_get_permissions( + f"{base_url}/admin/realms/{realm}/clients/{target_uuid}/management/permissions" + ) + target_scope_perms = target_perms.get("scopePermissions") or {} + if not isinstance(target_scope_perms, dict): + raise SystemExit("Target client permissions missing scopePermissions") + token_exchange_perm_id = target_scope_perms.get("token-exchange") + if not token_exchange_perm_id: + keys = sorted(k for k in target_scope_perms.keys()) + raise SystemExit(f"Target client permissions missing token-exchange scope (have: {keys})") + + policy_name = "test-portal-e2e-token-exchange" + status, policies = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy/search?name={urllib.parse.quote(policy_name)}&fields=id,name,type,config", + token, + ) + policy = None + if status == 200 and isinstance(policies, list): + for item in policies: + if isinstance(item, dict) and item.get("name") == policy_name: + policy = item + break + + if policy is None: + create_rep: dict[str, Any] = { + "name": policy_name, + "type": "client", + "logic": "POSITIVE", + "decisionStrategy": "UNANIMOUS", + "config": {"clients": json.dumps([e2e_uuid])}, + } + status, created = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy", + token, + create_rep, + ) + if status != 201 or not isinstance(created, dict) or not created.get("id"): + raise SystemExit(f"Failed creating policy {policy_name!r} (status={status}) resp={created}") + policy = created + + policy_id = policy.get("id") + if not isinstance(policy_id, str) or not policy_id: + raise SystemExit(f"Policy {policy_name!r} missing id") + + def patch_permission(permission_id: str): + candidates = [ + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/permission/scope/{permission_id}", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/permission/resource/{permission_id}", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/permission/{permission_id}", + ] + perm = None + url_used = None + for url in candidates: + st, body = http_json("GET", url, token) + if st == 200 and isinstance(body, dict): + perm = body + url_used = url + break + if perm is None or url_used is None: + raise SystemExit(f"Unable to fetch permission {permission_id} via expected endpoints") + + policies_field = perm.get("policies") + if isinstance(policies_field, list): + policies_list = [p for p in policies_field if isinstance(p, str)] + else: + policies_list = [] + + use_ids = any(is_uuid(p) for p in policies_list) + entry = policy_id if use_ids else policy_name + if entry in policies_list: + return + + policies_list.append(entry) + perm["policies"] = policies_list + st, body = http_json("PUT", url_used, token, perm) + if st in (200, 204): + return + + # Retry once with the other identifier form. + alt_entry = policy_name if entry == policy_id else policy_id + if alt_entry not in policies_list: + perm["policies"] = [p for p in policies_list if p != entry] + [alt_entry] + st2, body2 = http_json("PUT", url_used, token, perm) + if st2 in (200, 204): + return + raise SystemExit(f"Failed updating permission {permission_id} (status={st2}) resp={body2}") + raise SystemExit(f"Failed updating permission {permission_id} (status={st}) resp={body}") + + patch_permission(str(impersonate_perm_id)) + patch_permission(str(token_exchange_perm_id)) + + print("OK: configured token exchange permissions for portal E2E client") + PY -- 2.47.2 From 1f2bddc7fea6f6eeaaa86ab0775bd435f043c76a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 15:45:04 -0300 Subject: [PATCH 345/684] keycloak: retry token exchange permissions job --- ...al-e2e-token-exchange-permissions-job.yaml | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml index 2610f6d..e62aff1 100644 --- a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml +++ b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml @@ -2,10 +2,10 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-token-exchange-permissions-1 + name: keycloak-portal-e2e-token-exchange-permissions-2 namespace: sso spec: - backoffLimit: 0 + backoffLimit: 6 template: spec: restartPolicy: Never @@ -39,6 +39,7 @@ spec: import json import os import re + import time import urllib.parse import urllib.error import urllib.request @@ -79,6 +80,7 @@ spec: return exc.code, {"raw": raw.decode(errors="replace")} def get_admin_token() -> str: + last_error: str | None = None token_data = urllib.parse.urlencode( { "grant_type": "password", @@ -93,13 +95,24 @@ spec: headers={"Content-Type": "application/x-www-form-urlencoded"}, method="POST", ) - try: - with urllib.request.urlopen(req, timeout=15) as resp: - body = json.loads(resp.read().decode()) - except urllib.error.HTTPError as exc: - raw = exc.read().decode(errors="replace") - raise SystemExit(f"Token request failed: status={exc.code} body={raw}") - return body["access_token"] + for attempt in range(1, 61): + try: + with urllib.request.urlopen(req, timeout=15) as resp: + body = json.loads(resp.read().decode()) + token = body.get("access_token") + if isinstance(token, str) and token: + return token + last_error = "missing access_token" + except urllib.error.HTTPError as exc: + # Treat transient startup errors as retryable. + if exc.code in (404, 429, 500, 502, 503, 504): + last_error = f"http {exc.code}" + else: + raise SystemExit(f"Token request failed: status={exc.code}") + except urllib.error.URLError as exc: + last_error = str(exc.reason) + time.sleep(2) + raise SystemExit(f"Token request failed after retries: {last_error}") def find_client_uuid(token: str, client_id: str) -> str: status, clients = http_json( -- 2.47.2 From cb37756f5ff492851233c0b25e909f5a47feb64d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 15:46:26 -0300 Subject: [PATCH 346/684] keycloak: fix token exchange permission patching --- .../keycloak/portal-e2e-token-exchange-permissions-job.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml index e62aff1..f372b20 100644 --- a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml +++ b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-token-exchange-permissions-2 + name: keycloak-portal-e2e-token-exchange-permissions-3 namespace: sso spec: backoffLimit: 6 @@ -228,7 +228,7 @@ spec: policies_list.append(entry) perm["policies"] = policies_list st, body = http_json("PUT", url_used, token, perm) - if st in (200, 204): + if st in (200, 201, 204): return # Retry once with the other identifier form. @@ -236,7 +236,7 @@ spec: if alt_entry not in policies_list: perm["policies"] = [p for p in policies_list if p != entry] + [alt_entry] st2, body2 = http_json("PUT", url_used, token, perm) - if st2 in (200, 204): + if st2 in (200, 201, 204): return raise SystemExit(f"Failed updating permission {permission_id} (status={st2}) resp={body2}") raise SystemExit(f"Failed updating permission {permission_id} (status={st}) resp={body}") -- 2.47.2 From 3f19d01d0044b9d2a038edb14d04d9b1b871af88 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 15:48:40 -0300 Subject: [PATCH 347/684] keycloak: make token exchange permissions job idempotent --- ...al-e2e-token-exchange-permissions-job.yaml | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml index f372b20..a7fd79d 100644 --- a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml +++ b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-token-exchange-permissions-3 + name: keycloak-portal-e2e-token-exchange-permissions-4 namespace: sso spec: backoffLimit: 6 @@ -189,9 +189,23 @@ spec: token, create_rep, ) - if status != 201 or not isinstance(created, dict) or not created.get("id"): - raise SystemExit(f"Failed creating policy {policy_name!r} (status={status}) resp={created}") - policy = created + if status == 409: + status, policies = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy/search?name={urllib.parse.quote(policy_name)}&fields=id,name,type,config", + token, + ) + if status == 200 and isinstance(policies, list): + for item in policies: + if isinstance(item, dict) and item.get("name") == policy_name: + policy = item + break + if policy is None: + raise SystemExit(f"Policy {policy_name!r} exists but could not be retrieved") + else: + if status != 201 or not isinstance(created, dict) or not created.get("id"): + raise SystemExit(f"Failed creating policy {policy_name!r} (status={status}) resp={created}") + policy = created policy_id = policy.get("id") if not isinstance(policy_id, str) or not policy_id: -- 2.47.2 From e73baa6ecd0956dcb204a8559af909637b73b59c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 15:50:43 -0300 Subject: [PATCH 348/684] keycloak: robust policy lookup for token exchange job --- ...al-e2e-token-exchange-permissions-job.yaml | 63 +++++++++++-------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml index a7fd79d..104d6f0 100644 --- a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml +++ b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-token-exchange-permissions-4 + name: keycloak-portal-e2e-token-exchange-permissions-5 namespace: sso spec: backoffLimit: 6 @@ -163,17 +163,35 @@ spec: raise SystemExit(f"Target client permissions missing token-exchange scope (have: {keys})") policy_name = "test-portal-e2e-token-exchange" - status, policies = http_json( - "GET", - f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy/search?name={urllib.parse.quote(policy_name)}&fields=id,name,type,config", - token, - ) - policy = None - if status == 200 and isinstance(policies, list): - for item in policies: - if isinstance(item, dict) and item.get("name") == policy_name: - policy = item - break + policy_base_url = f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy" + + def find_policy_by_name(name: str): + urls = [ + f"{policy_base_url}/search?name={urllib.parse.quote(name)}&fields=id,name,type,config", + f"{policy_base_url}/search?name={urllib.parse.quote(name)}", + policy_base_url, + ] + for url in urls: + st, body = http_json("GET", url, token) + if st != 200: + continue + items = None + if isinstance(body, list): + items = body + elif isinstance(body, dict): + for key in ("policies", "items", "data"): + value = body.get(key) + if isinstance(value, list): + items = value + break + if not isinstance(items, list): + continue + for item in items: + if isinstance(item, dict) and item.get("name") == name and item.get("id"): + return item + return None + + policy = find_policy_by_name(policy_name) if policy is None: create_rep: dict[str, Any] = { @@ -185,27 +203,18 @@ spec: } status, created = http_json( "POST", - f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy", + policy_base_url, token, create_rep, ) - if status == 409: - status, policies = http_json( - "GET", - f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/authz/resource-server/policy/search?name={urllib.parse.quote(policy_name)}&fields=id,name,type,config", - token, - ) - if status == 200 and isinstance(policies, list): - for item in policies: - if isinstance(item, dict) and item.get("name") == policy_name: - policy = item - break + if status == 201 and isinstance(created, dict) and created.get("id"): + policy = created + elif status == 409: + policy = find_policy_by_name(policy_name) if policy is None: raise SystemExit(f"Policy {policy_name!r} exists but could not be retrieved") else: - if status != 201 or not isinstance(created, dict) or not created.get("id"): - raise SystemExit(f"Failed creating policy {policy_name!r} (status={status}) resp={created}") - policy = created + raise SystemExit(f"Failed creating policy {policy_name!r} (status={status}) resp={created}") policy_id = policy.get("id") if not isinstance(policy_id, str) or not policy_id: -- 2.47.2 From 70980a2ca95314c9397b8415a81f16b84a2f3aac Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 15:58:44 -0300 Subject: [PATCH 349/684] keycloak: add token exchange E2E smoke test --- scripts/tests/test_portal_token_exchange.py | 157 ++++++++++++++++++ services/keycloak/kustomization.yaml | 7 + .../portal-e2e-token-exchange-test-job.yaml | 52 ++++++ 3 files changed, 216 insertions(+) create mode 100644 scripts/tests/test_portal_token_exchange.py create mode 100644 services/keycloak/portal-e2e-token-exchange-test-job.yaml diff --git a/scripts/tests/test_portal_token_exchange.py b/scripts/tests/test_portal_token_exchange.py new file mode 100644 index 0000000..8332005 --- /dev/null +++ b/scripts/tests/test_portal_token_exchange.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +import base64 +import json +import os +import sys +import time +import urllib.parse +import urllib.error +import urllib.request + + +def _require_env(name: str) -> str: + value = os.environ.get(name) + if not value: + raise SystemExit(f"missing required env var: {name}") + return value + + +def _post_form(url: str, data: dict[str, str], token: str | None = None, timeout_s: int = 30) -> dict: + body = urllib.parse.urlencode(data).encode() + headers = {"Content-Type": "application/x-www-form-urlencoded"} + if token: + headers["Authorization"] = f"Bearer {token}" + req = urllib.request.Request(url, data=body, headers=headers, method="POST") + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + payload = resp.read().decode() + return json.loads(payload) if payload else {} + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _get_json(url: str, token: str, timeout_s: int = 30) -> object: + req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"}, method="GET") + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + payload = resp.read().decode() + return json.loads(payload) if payload else None + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _decode_jwt_without_verification(jwt: str) -> dict: + parts = jwt.split(".") + if len(parts) < 2: + return {} + padded = parts[1] + "=" * (-len(parts[1]) % 4) + try: + return json.loads(base64.urlsafe_b64decode(padded.encode()).decode()) + except Exception: + return {} + +def _is_retryable_failure(message: str) -> bool: + retryable_markers = ( + "HTTP 401 ", + "HTTP 403 ", + "HTTP 404 ", + "HTTP 409 ", + "HTTP 429 ", + "HTTP 500 ", + "HTTP 502 ", + "HTTP 503 ", + "HTTP 504 ", + "timed out", + "Temporary failure", + "Connection refused", + ) + return any(marker in message for marker in retryable_markers) + + +def main() -> int: + keycloak_base = _require_env("KEYCLOAK_SERVER").rstrip("/") + realm = os.environ.get("KEYCLOAK_REALM", "atlas") + client_id = _require_env("PORTAL_E2E_CLIENT_ID") + client_secret = _require_env("PORTAL_E2E_CLIENT_SECRET") + target_client_id = os.environ.get("TARGET_CLIENT_ID", "bstein-dev-home") + impersonate_username = os.environ.get("IMPERSONATE_USERNAME", "robotuser") + + token_url = f"{keycloak_base}/realms/{realm}/protocol/openid-connect/token" + admin_users_url = f"{keycloak_base}/admin/realms/{realm}/users" + + def run_once() -> None: + token_payload = _post_form( + token_url, + {"grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret}, + ) + access_token = token_payload.get("access_token") + if not isinstance(access_token, str) or not access_token: + raise SystemExit("client credentials token missing access_token") + + users = _get_json( + f"{admin_users_url}?{urllib.parse.urlencode({'username': impersonate_username, 'exact': 'true'})}", + access_token, + ) + if not isinstance(users, list) or not users: + raise SystemExit(f"unable to locate user {impersonate_username!r} via admin API") + user_id = users[0].get("id") + if not isinstance(user_id, str) or not user_id: + raise SystemExit(f"user {impersonate_username!r} missing id") + + exchange_payload = _post_form( + token_url, + { + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "client_id": client_id, + "client_secret": client_secret, + "subject_token": access_token, + "requested_subject": user_id, + "audience": target_client_id, + }, + ) + exchanged = exchange_payload.get("access_token") + if not isinstance(exchanged, str) or not exchanged: + raise SystemExit("token exchange response missing access_token") + + claims = _decode_jwt_without_verification(exchanged) + aud = claims.get("aud") + if aud is None: + raise SystemExit("token exchange access_token missing aud claim") + if isinstance(aud, str): + aud_ok = aud == target_client_id + elif isinstance(aud, list): + aud_ok = target_client_id in aud + else: + aud_ok = False + if not aud_ok: + raise SystemExit(f"token exchange aud mismatch (expected {target_client_id!r})") + + deadline_seconds = int(os.environ.get("RETRY_DEADLINE_SECONDS", "300")) + retry_interval_seconds = int(os.environ.get("RETRY_INTERVAL_SECONDS", "5")) + deadline_at = time.monotonic() + deadline_seconds + last_error: str | None = None + + while True: + try: + run_once() + print("PASS: token exchange works") + return 0 + except SystemExit as exc: + message = str(exc) + last_error = message or last_error + if time.monotonic() >= deadline_at: + raise + if not _is_retryable_failure(message): + raise + time.sleep(retry_interval_seconds) + except Exception as exc: + last_error = str(exc) or last_error + if time.monotonic() >= deadline_at: + raise SystemExit(str(exc)) + time.sleep(retry_interval_seconds) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index f3f91ab..8f5b0a5 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -10,7 +10,14 @@ resources: - portal-e2e-client-job.yaml - portal-e2e-target-client-job.yaml - portal-e2e-token-exchange-permissions-job.yaml + - portal-e2e-token-exchange-test-job.yaml - ldap-federation-job.yaml - user-overrides-job.yaml - service.yaml - ingress.yaml +generatorOptions: + disableNameSuffixHash: true +configMapGenerator: + - name: portal-e2e-tests + files: + - test_portal_token_exchange.py=../../scripts/tests/test_portal_token_exchange.py diff --git a/services/keycloak/portal-e2e-token-exchange-test-job.yaml b/services/keycloak/portal-e2e-token-exchange-test-job.yaml new file mode 100644 index 0000000..ab43303 --- /dev/null +++ b/services/keycloak/portal-e2e-token-exchange-test-job.yaml @@ -0,0 +1,52 @@ +# services/keycloak/portal-e2e-token-exchange-test-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-portal-e2e-token-exchange-test-1 + namespace: sso +spec: + backoffLimit: 6 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: Never + containers: + - name: test + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: TARGET_CLIENT_ID + value: bstein-dev-home + - name: IMPERSONATE_USERNAME + value: robotuser + - name: RETRY_DEADLINE_SECONDS + value: "300" + - name: RETRY_INTERVAL_SECONDS + value: "5" + - name: PORTAL_E2E_CLIENT_ID + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_id + - name: PORTAL_E2E_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_secret + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python /scripts/test_portal_token_exchange.py + volumeMounts: + - name: tests + mountPath: /scripts + readOnly: true + volumes: + - name: tests + configMap: + name: portal-e2e-tests + defaultMode: 0555 -- 2.47.2 From c386ff7c7acdb71471585790a49d2da2dd6c111a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 16:55:02 -0300 Subject: [PATCH 350/684] vaultwarden: disable signups and sync invites --- scripts/vaultwarden_cred_sync.py | 74 ++++++++++++++++++++++++++-- services/vaultwarden/deployment.yaml | 2 + 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/scripts/vaultwarden_cred_sync.py b/scripts/vaultwarden_cred_sync.py index 8f844de..4fb3f8a 100644 --- a/scripts/vaultwarden_cred_sync.py +++ b/scripts/vaultwarden_cred_sync.py @@ -3,6 +3,7 @@ from __future__ import annotations import sys +import time from typing import Any, Iterable import httpx @@ -12,6 +13,11 @@ from atlas_portal.keycloak import admin_client from atlas_portal.vaultwarden import invite_user +VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email" +VAULTWARDEN_STATUS_ATTR = "vaultwarden_status" +VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at" + + def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: client = admin_client() if not client.ready(): @@ -39,17 +45,60 @@ def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: first += page_size -def _email_for_user(user: dict[str, Any]) -> str: - email = (user.get("email") if isinstance(user.get("email"), str) else "") or "" - if email.strip(): - return email.strip() +def _extract_attr(attrs: Any, key: str) -> str: + if not isinstance(attrs, dict): + return "" + raw = attrs.get(key) + if isinstance(raw, list): + for item in raw: + if isinstance(item, str) and item.strip(): + return item.strip() + return "" + if isinstance(raw, str) and raw.strip(): + return raw.strip() + return "" + + +def _vaultwarden_email_for_user(user: dict[str, Any]) -> str: username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" username = username.strip() if not username: return "" + + attrs = user.get("attributes") + vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR) + if vaultwarden_email: + return vaultwarden_email + + mailu_email = _extract_attr(attrs, "mailu_email") + if mailu_email: + return mailu_email + + email = (user.get("email") if isinstance(user.get("email"), str) else "") or "" + email = email.strip() + if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"): + return email + return f"{username}@{settings.MAILU_DOMAIN}" +def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None: + value = (value or "").strip() + if not value: + return + existing = _extract_attr(user.get("attributes"), key) + if existing: + return + admin_client().set_user_attribute(username, key, value) + + +def _set_user_attribute(username: str, key: str, value: str) -> None: + value = (value or "").strip() + if not value: + return + admin_client().set_user_attribute(username, key, value) + + def main() -> int: processed = 0 created = 0 @@ -72,20 +121,35 @@ def main() -> int: skipped += 1 continue - email = _email_for_user(user) + email = _vaultwarden_email_for_user(user) if not email: print(f"skip {username}: missing email", file=sys.stderr) skipped += 1 continue + try: + _set_user_attribute_if_missing(username, user, VAULTWARDEN_EMAIL_ATTR, email) + except Exception: + pass + processed += 1 result = invite_user(email) if result.ok: created += 1 print(f"ok {username}: {result.status}") + try: + _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) + _set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())) + except Exception: + pass else: failures += 1 print(f"err {username}: {result.status} {result.detail}", file=sys.stderr) + try: + _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) + _set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())) + except Exception: + pass print( f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}", diff --git a/services/vaultwarden/deployment.yaml b/services/vaultwarden/deployment.yaml index 175cbca..210b8aa 100644 --- a/services/vaultwarden/deployment.yaml +++ b/services/vaultwarden/deployment.yaml @@ -19,6 +19,8 @@ spec: image: vaultwarden/server:1.33.2 env: - name: SIGNUPS_ALLOWED + value: "false" + - name: INVITATIONS_ALLOWED value: "true" - name: DATABASE_URL valueFrom: -- 2.47.2 From db27242ce17da182faf7e508d67ab23b5011fe3f Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 19:58:33 +0000 Subject: [PATCH 351/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index f5aabcc..ad110bd 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-62 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-63 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 6be16eed1df96f8cd10a4b079dfed37555961980 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 19:59:44 +0000 Subject: [PATCH 352/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a52740f..e847ab0 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-62 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-63 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 581cbfc..ef3d5eb 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-62 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-63 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 2ee8f7da88afb530679cac027a2dc02239770ccd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 17:00:01 -0300 Subject: [PATCH 353/684] flux: resume vaultwarden --- .../flux-system/applications/vaultwarden/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml b/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml index eda2a30..783d5e1 100644 --- a/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml @@ -6,7 +6,7 @@ metadata: namespace: flux-system spec: interval: 10m - suspend: true + suspend: false sourceRef: kind: GitRepository name: flux-system -- 2.47.2 From c11a663d051216ee2f9586461f643afc1e0a69a5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 17:07:48 -0300 Subject: [PATCH 354/684] vaultwarden: use Recreate strategy --- services/vaultwarden/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/vaultwarden/deployment.yaml b/services/vaultwarden/deployment.yaml index 210b8aa..1cd35bd 100644 --- a/services/vaultwarden/deployment.yaml +++ b/services/vaultwarden/deployment.yaml @@ -6,6 +6,8 @@ metadata: namespace: vaultwarden spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: app: vaultwarden -- 2.47.2 From e1deeb1853252c4e56370d0b9bc3bbccde07e1c0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 17:12:46 -0300 Subject: [PATCH 355/684] vaultwarden: avoid RWO multi-attach rollout --- services/vaultwarden/deployment.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/vaultwarden/deployment.yaml b/services/vaultwarden/deployment.yaml index 1cd35bd..578dd08 100644 --- a/services/vaultwarden/deployment.yaml +++ b/services/vaultwarden/deployment.yaml @@ -7,7 +7,10 @@ metadata: spec: replicas: 1 strategy: - type: Recreate + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 selector: matchLabels: app: vaultwarden -- 2.47.2 From dbd14fac8b6d3ea2ebdf69a8e50a563a3640b1dd Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 20:28:35 +0000 Subject: [PATCH 356/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index ad110bd..55db7d6 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-63 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-64 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 148bba0fd6e4a378da777c4aafb2f27db7cfca50 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 20:29:46 +0000 Subject: [PATCH 357/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index e847ab0..d411ddf 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-63 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-64 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index ef3d5eb..7367ed3 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-63 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-64 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From b7c8b4693da977c29245f6944a189ae8ca42e1ad Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 17:44:24 -0300 Subject: [PATCH 358/684] vaultwarden: enable SMTP via Mailu --- services/vaultwarden/deployment.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/services/vaultwarden/deployment.yaml b/services/vaultwarden/deployment.yaml index 578dd08..b43a750 100644 --- a/services/vaultwarden/deployment.yaml +++ b/services/vaultwarden/deployment.yaml @@ -27,6 +27,18 @@ spec: value: "false" - name: INVITATIONS_ALLOWED value: "true" + - name: DOMAIN + value: "https://vault.bstein.dev" + - name: SMTP_HOST + value: "mailu-front.mailu-mailserver.svc.cluster.local" + - name: SMTP_PORT + value: "25" + - name: SMTP_SECURITY + value: "starttls" + - name: SMTP_FROM + value: "postmaster@bstein.dev" + - name: SMTP_FROM_NAME + value: "Atlas Vaultwarden" - name: DATABASE_URL valueFrom: secretKeyRef: -- 2.47.2 From 12348258fa6b45aac639e901982ee6fae6a7eba3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 17:54:27 -0300 Subject: [PATCH 359/684] vaultwarden: allow internal SMTP TLS --- services/vaultwarden/deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/vaultwarden/deployment.yaml b/services/vaultwarden/deployment.yaml index b43a750..9e65c22 100644 --- a/services/vaultwarden/deployment.yaml +++ b/services/vaultwarden/deployment.yaml @@ -35,6 +35,10 @@ spec: value: "25" - name: SMTP_SECURITY value: "starttls" + - name: SMTP_ACCEPT_INVALID_HOSTNAMES + value: "true" + - name: SMTP_ACCEPT_INVALID_CERTS + value: "true" - name: SMTP_FROM value: "postmaster@bstein.dev" - name: SMTP_FROM_NAME -- 2.47.2 From 51a733096f96e27c8202a9b20b5fb4df59ee96b9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 18:18:31 -0300 Subject: [PATCH 360/684] vaultwarden: make cred sync idempotent --- scripts/vaultwarden_cred_sync.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/vaultwarden_cred_sync.py b/scripts/vaultwarden_cred_sync.py index 4fb3f8a..3850249 100644 --- a/scripts/vaultwarden_cred_sync.py +++ b/scripts/vaultwarden_cred_sync.py @@ -27,7 +27,9 @@ def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: first = 0 while True: headers = client.headers() - params = {"first": str(first), "max": str(page_size)} + # We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a + # brief representation which may omit these. + params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"} with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: resp = http.get(url, params=params, headers=headers) resp.raise_for_status() @@ -79,7 +81,9 @@ def _vaultwarden_email_for_user(user: dict[str, Any]) -> str: if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"): return email - return f"{username}@{settings.MAILU_DOMAIN}" + # Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email. + # This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient). + return "" def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None: @@ -121,6 +125,7 @@ def main() -> int: skipped += 1 continue + current_status = _extract_attr(user.get("attributes"), VAULTWARDEN_STATUS_ATTR) email = _vaultwarden_email_for_user(user) if not email: print(f"skip {username}: missing email", file=sys.stderr) @@ -132,6 +137,12 @@ def main() -> int: except Exception: pass + # If we've already successfully invited or confirmed presence, do not re-invite on every cron run. + # Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits. + if current_status in {"invited", "already_present"}: + skipped += 1 + continue + processed += 1 result = invite_user(email) if result.ok: -- 2.47.2 From e8fab60d89bdc37bc07aaeff7a5e91829d45f53f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 18:21:04 -0300 Subject: [PATCH 361/684] vaultwarden: skip reinvite when status set --- scripts/vaultwarden_cred_sync.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/vaultwarden_cred_sync.py b/scripts/vaultwarden_cred_sync.py index 3850249..5dbf22f 100644 --- a/scripts/vaultwarden_cred_sync.py +++ b/scripts/vaultwarden_cred_sync.py @@ -125,15 +125,25 @@ def main() -> int: skipped += 1 continue - current_status = _extract_attr(user.get("attributes"), VAULTWARDEN_STATUS_ATTR) - email = _vaultwarden_email_for_user(user) + # Fetch the full user payload so we can reliably read attributes (and skip re-invites). + user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or "" + user_id = user_id.strip() + full_user = user + if user_id: + try: + full_user = admin_client().get_user(user_id) + except Exception: + full_user = user + + current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR) + email = _vaultwarden_email_for_user(full_user) if not email: print(f"skip {username}: missing email", file=sys.stderr) skipped += 1 continue try: - _set_user_attribute_if_missing(username, user, VAULTWARDEN_EMAIL_ATTR, email) + _set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email) except Exception: pass -- 2.47.2 From ab658fa064442bb0fd0488bcafe026ec603e7eac Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 18:25:48 -0300 Subject: [PATCH 362/684] keycloak: allow vaultwarden user attributes --- services/keycloak/realm-settings-job.yaml | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index db08722..bbf15c9 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -146,6 +146,30 @@ spec: attrs = [] required_attrs = [ + { + "name": "vaultwarden_email", + "displayName": "Vaultwarden Email", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"email": {}, "length": {"max": 255}}, + }, + { + "name": "vaultwarden_status", + "displayName": "Vaultwarden Status", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 64}}, + }, + { + "name": "vaultwarden_synced_at", + "displayName": "Vaultwarden Last Synced", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 64}}, + }, { "name": "mailu_email", "displayName": "Atlas Mailbox", -- 2.47.2 From b63b724b521ac327fc2225b375a17996f20287bd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 18:27:29 -0300 Subject: [PATCH 363/684] keycloak: rerun realm settings job --- services/keycloak/realm-settings-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index bbf15c9..af595c1 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-13 + name: keycloak-realm-settings-14 namespace: sso spec: backoffLimit: 0 -- 2.47.2 From c5fa1b5a381921fdd421338efdecbc1eac9d5c32 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 18:43:25 -0300 Subject: [PATCH 364/684] vaultwarden: backfill synced_at --- scripts/vaultwarden_cred_sync.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/vaultwarden_cred_sync.py b/scripts/vaultwarden_cred_sync.py index 5dbf22f..d259b31 100644 --- a/scripts/vaultwarden_cred_sync.py +++ b/scripts/vaultwarden_cred_sync.py @@ -136,6 +136,7 @@ def main() -> int: full_user = user current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR) + current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR) email = _vaultwarden_email_for_user(full_user) if not email: print(f"skip {username}: missing email", file=sys.stderr) @@ -150,6 +151,15 @@ def main() -> int: # If we've already successfully invited or confirmed presence, do not re-invite on every cron run. # Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits. if current_status in {"invited", "already_present"}: + if not current_synced_at: + try: + _set_user_attribute( + username, + VAULTWARDEN_SYNCED_AT_ATTR, + time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + ) + except Exception: + pass skipped += 1 continue -- 2.47.2 From 558cab9a0b2877c8ebf3fc5844e66fdcefb2090c Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 22:55:46 +0000 Subject: [PATCH 365/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 55db7d6..6527c74 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-64 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-65 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From dd473b8a8c6873836ba74cb63f8c7ea1d4532083 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 22:56:57 +0000 Subject: [PATCH 366/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index d411ddf..5d972bb 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-64 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-65 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 7367ed3..60e16cb 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-64 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-65 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 762164aed44d5c708ddad2b08d6f9be17326db99 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 20:02:53 -0300 Subject: [PATCH 367/684] bstein-dev-home: reduce lab status probe timeout --- services/bstein-dev-home/backend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 5d972bb..97178dd 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -71,7 +71,7 @@ spec: name: atlas-portal-db key: PORTAL_DATABASE_URL - name: HTTP_CHECK_TIMEOUT_SEC - value: "20" + value: "2" - name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT value: "30" - name: ACCESS_REQUEST_SUBMIT_RATE_WINDOW_SEC -- 2.47.2 From aad5a29986fda040d45b5626d73e2cfb0b513d1b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 23:40:49 +0000 Subject: [PATCH 368/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 6527c74..7814730 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-65 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-66 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From eb560a38fa2be0b343b5dc34e8065b8868d6ad0f Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 3 Jan 2026 23:42:00 +0000 Subject: [PATCH 369/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 97178dd..d608eed 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-65 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-66 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 60e16cb..9787a02 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-65 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-66 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From d2d4b601f3b9d8413ac9a25f14380b4e79e16193 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 00:51:54 +0000 Subject: [PATCH 370/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 7814730..8d7122a 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-66 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-67 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 4b522035321d5611d02a7ae7fab9f99a03f62fe1 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 00:53:05 +0000 Subject: [PATCH 371/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index d608eed..be4edb7 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-66 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-67 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 9787a02..b9bb933 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-66 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-67 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From f628d2768bd6c8402abd4efd39b148b9201eaa3f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 21:52:37 -0300 Subject: [PATCH 372/684] bstein-dev-home: add onboarding e2e job --- scripts/tests/test_portal_onboarding_flow.py | 191 ++++++++++++++++++ services/bstein-dev-home/kustomization.yaml | 7 + .../portal-onboarding-e2e-test-job.yaml | 54 +++++ 3 files changed, 252 insertions(+) create mode 100644 scripts/tests/test_portal_onboarding_flow.py create mode 100644 services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py new file mode 100644 index 0000000..52d0d7f --- /dev/null +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +import json +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +import psycopg + + +def _env(name: str, default: str | None = None) -> str: + value = os.environ.get(name, default) + if value is None or value == "": + raise SystemExit(f"missing required env var: {name}") + return value + + +def _post_json(url: str, payload: dict, timeout_s: int = 30) -> dict: + body = json.dumps(payload).encode() + req = urllib.request.Request( + url, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + raw = resp.read().decode() + return json.loads(raw) if raw else {} + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _post_form(url: str, data: dict[str, str], timeout_s: int = 30) -> dict: + body = urllib.parse.urlencode(data).encode() + req = urllib.request.Request( + url, + data=body, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + raw = resp.read().decode() + return json.loads(raw) if raw else {} + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _get_json(url: str, headers: dict[str, str] | None = None, timeout_s: int = 30) -> object: + req = urllib.request.Request(url, headers=headers or {}, method="GET") + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + raw = resp.read().decode() + return json.loads(raw) if raw else None + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _keycloak_admin_token(keycloak_base: str, realm: str, client_id: str, client_secret: str) -> str: + token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token" + payload = _post_form( + token_url, + { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + }, + timeout_s=20, + ) + token = payload.get("access_token") + if not isinstance(token, str) or not token: + raise SystemExit("keycloak admin token response missing access_token") + return token + + +def _keycloak_find_user(keycloak_base: str, realm: str, token: str, username: str) -> dict | None: + url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users?{urllib.parse.urlencode({'username': username, 'exact': 'true', 'max': '1'})}" + users = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20) + if not isinstance(users, list) or not users: + return None + user = users[0] + return user if isinstance(user, dict) else None + + +def _keycloak_get_user(keycloak_base: str, realm: str, token: str, user_id: str) -> dict: + url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users/{urllib.parse.quote(user_id, safe='')}" + data = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20) + if not isinstance(data, dict): + raise SystemExit("unexpected keycloak user payload") + return data + + +def main() -> int: + portal_base = _env("PORTAL_BASE_URL").rstrip("/") + db_url = _env("PORTAL_DATABASE_URL") + + keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/") + realm = _env("KEYCLOAK_REALM", "atlas") + kc_admin_client_id = _env("KEYCLOAK_ADMIN_CLIENT_ID") + kc_admin_client_secret = _env("KEYCLOAK_ADMIN_CLIENT_SECRET") + + username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user") + now = int(time.time()) + username = f"{username_prefix}-{now}" + email = f"{username}@example.invalid" + + submit = _post_json( + f"{portal_base}/api/access/request", + {"username": username, "email": email, "note": "portal onboarding e2e"}, + timeout_s=20, + ) + + request_code = submit.get("request_code") + if not isinstance(request_code, str) or not request_code: + raise SystemExit(f"request submit did not return request_code: {submit}") + + with psycopg.connect(db_url, autocommit=True) as conn: + # Bypass the emailed token by marking the request as verified and pending (same as /api/access/request/verify). + conn.execute( + """ + UPDATE access_requests + SET status = 'pending', + email_verified_at = NOW(), + email_verification_token_hash = NULL + WHERE request_code = %s AND status = 'pending_email_verification' + """, + (request_code,), + ) + # Simulate admin approval. + conn.execute( + """ + UPDATE access_requests + SET status = 'accounts_building', + decided_at = NOW(), + decided_by = 'portal-e2e' + WHERE request_code = %s AND status = 'pending' + """, + (request_code,), + ) + + status_url = f"{portal_base}/api/access/request/status" + deadline_s = int(os.environ.get("E2E_DEADLINE_SECONDS", "600")) + interval_s = int(os.environ.get("E2E_POLL_SECONDS", "10")) + deadline_at = time.monotonic() + deadline_s + + last_status = None + while True: + status_payload = _post_json(status_url, {"request_code": request_code}, timeout_s=60) + status = status_payload.get("status") + if isinstance(status, str): + last_status = status + + if status in ("awaiting_onboarding", "ready"): + break + if status in ("denied", "unknown"): + raise SystemExit(f"request transitioned to unexpected terminal status: {status_payload}") + if time.monotonic() >= deadline_at: + raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status})") + time.sleep(interval_s) + + token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + user = _keycloak_find_user(keycloak_base, realm, token, username) + if not user: + raise SystemExit("expected Keycloak user was not created") + user_id = user.get("id") + if not isinstance(user_id, str) or not user_id: + raise SystemExit("created user missing id") + + full = _keycloak_get_user(keycloak_base, realm, token, user_id) + required_actions = full.get("requiredActions") or [] + required: set[str] = set() + if isinstance(required_actions, list): + required = {a for a in required_actions if isinstance(a, str)} + + missing = [name for name in ("UPDATE_PASSWORD", "CONFIGURE_TOTP") if name not in required] + if missing: + raise SystemExit(f"Keycloak user missing required actions {missing}: requiredActions={sorted(required)}") + + print(f"PASS: onboarding provisioning completed for {request_code} ({username})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 3f3ebc0..2b710d1 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -14,6 +14,7 @@ resources: - backend-deployment.yaml - backend-service.yaml - vaultwarden-cred-sync-cronjob.yaml + - portal-onboarding-e2e-test-job.yaml - ingress.yaml configMapGenerator: @@ -23,3 +24,9 @@ configMapGenerator: - vaultwarden_cred_sync.py=../../scripts/vaultwarden_cred_sync.py options: disableNameSuffixHash: true + - name: portal-onboarding-e2e-tests + namespace: bstein-dev-home + files: + - test_portal_onboarding_flow.py=../../scripts/tests/test_portal_onboarding_flow.py + options: + disableNameSuffixHash: true diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml new file mode 100644 index 0000000..14f2519 --- /dev/null +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -0,0 +1,54 @@ +# services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: portal-onboarding-e2e-test-1 + namespace: bstein-dev-home +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: test + image: python:3.11-slim + env: + - name: PORTAL_BASE_URL + value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local:8080 + - name: PORTAL_DATABASE_URL + valueFrom: + secretKeyRef: + name: atlas-portal-db + key: PORTAL_DATABASE_URL + - name: KEYCLOAK_ADMIN_URL + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: KEYCLOAK_ADMIN_CLIENT_ID + value: bstein-dev-home-admin + - name: KEYCLOAK_ADMIN_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: bstein-dev-home-keycloak-admin + key: client_secret + - name: E2E_USERNAME_PREFIX + value: e2e-portal + - name: E2E_DEADLINE_SECONDS + value: "600" + - name: E2E_POLL_SECONDS + value: "10" + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python -m pip install --no-cache-dir 'psycopg[binary]==3.2.5' + python /scripts/test_portal_onboarding_flow.py + volumeMounts: + - name: tests + mountPath: /scripts + readOnly: true + volumes: + - name: tests + configMap: + name: portal-onboarding-e2e-tests + defaultMode: 0555 -- 2.47.2 From 8cdd5fa1ba5df91deb5623611ba4eb09dcf7922c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 22:11:57 -0300 Subject: [PATCH 373/684] bstein-dev-home: fix onboarding e2e job url --- services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 14f2519..5e1f0ce 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-1 + name: portal-onboarding-e2e-test-2 namespace: bstein-dev-home spec: backoffLimit: 0 @@ -14,7 +14,7 @@ spec: image: python:3.11-slim env: - name: PORTAL_BASE_URL - value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local:8080 + value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local - name: PORTAL_DATABASE_URL valueFrom: secretKeyRef: -- 2.47.2 From 6bda60676073fe7ad46d14aace3034b315aba2e0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 22:27:33 -0300 Subject: [PATCH 374/684] test: stabilize portal onboarding e2e --- scripts/tests/test_portal_onboarding_flow.py | 34 ++++++++++++++----- .../portal-onboarding-e2e-test-job.yaml | 2 +- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index 52d0d7f..6f94b6e 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import http.client import json import os import sys @@ -110,11 +111,19 @@ def main() -> int: username = f"{username_prefix}-{now}" email = f"{username}@example.invalid" - submit = _post_json( - f"{portal_base}/api/access/request", - {"username": username, "email": email, "note": "portal onboarding e2e"}, - timeout_s=20, - ) + submit_url = f"{portal_base}/api/access/request" + submit_payload = {"username": username, "email": email, "note": "portal onboarding e2e"} + submit = None + for attempt in range(1, 6): + try: + submit = _post_json(submit_url, submit_payload, timeout_s=20) + break + except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc: + if attempt == 5: + raise SystemExit(f"portal submit failed after {attempt} attempts: {exc}") + time.sleep(2) + if not isinstance(submit, dict): + raise SystemExit("portal submit did not return json") request_code = submit.get("request_code") if not isinstance(request_code, str) or not request_code: @@ -150,8 +159,17 @@ def main() -> int: deadline_at = time.monotonic() + deadline_s last_status = None + last_error = None while True: - status_payload = _post_json(status_url, {"request_code": request_code}, timeout_s=60) + try: + status_payload = _post_json(status_url, {"request_code": request_code}, timeout_s=60) + last_error = None + except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc: + last_error = str(exc) + if time.monotonic() >= deadline_at: + raise SystemExit(f"timed out waiting for provisioning to finish (last error={last_error})") + time.sleep(interval_s) + continue status = status_payload.get("status") if isinstance(status, str): last_status = status @@ -161,7 +179,8 @@ def main() -> int: if status in ("denied", "unknown"): raise SystemExit(f"request transitioned to unexpected terminal status: {status_payload}") if time.monotonic() >= deadline_at: - raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status})") + suffix = f" (last error={last_error})" if last_error else "" + raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status}){suffix}") time.sleep(interval_s) token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) @@ -188,4 +207,3 @@ def main() -> int: if __name__ == "__main__": sys.exit(main()) - diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 5e1f0ce..3b1fcc7 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-2 + name: portal-onboarding-e2e-test-3 namespace: bstein-dev-home spec: backoffLimit: 0 -- 2.47.2 From 300873f74352da24454d4c459a9fce1e93d86fdb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 3 Jan 2026 22:34:39 -0300 Subject: [PATCH 375/684] bstein-dev-home: relax health probe timeouts --- services/bstein-dev-home/backend-deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index be4edb7..531cfb8 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -89,12 +89,14 @@ spec: port: http initialDelaySeconds: 2 periodSeconds: 5 + timeoutSeconds: 3 livenessProbe: httpGet: path: /api/healthz port: http initialDelaySeconds: 10 periodSeconds: 10 + timeoutSeconds: 3 resources: requests: cpu: 50m -- 2.47.2 From 7407d42f98f282e1c8b3b2f80c73a997ca6b11dc Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 03:45:07 +0000 Subject: [PATCH 376/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 8d7122a..a976121 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-67 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-68 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From d21f18d92006d7ea6cd7ce1117f7a32626d9d009 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 03:46:18 +0000 Subject: [PATCH 377/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 531cfb8..b54d8dc 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-67 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-68 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index b9bb933..0d2bb40 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-67 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-68 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From cadb0daba040b186636279c56cd0a859bd7d8e67 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 00:41:45 -0300 Subject: [PATCH 378/684] tests: add Keycloak email probe --- .../test_keycloak_execute_actions_email.py | 127 ++++++++++++++++++ scripts/tests/test_portal_onboarding_flow.py | 2 +- .../portal-onboarding-e2e-test-job.yaml | 2 +- services/keycloak/kustomization.yaml | 2 + ...al-e2e-execute-actions-email-test-job.yaml | 52 +++++++ 5 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 scripts/tests/test_keycloak_execute_actions_email.py create mode 100644 services/keycloak/portal-e2e-execute-actions-email-test-job.yaml diff --git a/scripts/tests/test_keycloak_execute_actions_email.py b/scripts/tests/test_keycloak_execute_actions_email.py new file mode 100644 index 0000000..040d7d3 --- /dev/null +++ b/scripts/tests/test_keycloak_execute_actions_email.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +import json +import os +import sys +import urllib.error +import urllib.parse +import urllib.request + + +def _require_env(name: str) -> str: + value = os.environ.get(name) + if not value: + raise SystemExit(f"missing required env var: {name}") + return value + + +def _post_form(url: str, data: dict[str, str], timeout_s: int = 30) -> dict: + body = urllib.parse.urlencode(data).encode() + req = urllib.request.Request( + url, + data=body, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + payload = resp.read().decode() + return json.loads(payload) if payload else {} + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _request_json(method: str, url: str, token: str, payload: object | None = None, timeout_s: int = 30) -> tuple[int, object | None]: + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def main() -> int: + keycloak_base = _require_env("KEYCLOAK_SERVER").rstrip("/") + realm = os.environ.get("KEYCLOAK_REALM", "atlas") + client_id = _require_env("PORTAL_E2E_CLIENT_ID") + client_secret = _require_env("PORTAL_E2E_CLIENT_SECRET") + + probe_username = os.environ.get("E2E_PROBE_USERNAME", "e2e-smtp-probe") + probe_email = os.environ.get("E2E_PROBE_EMAIL", "e2e-smtp-probe@bstein.dev") + + execute_client_id = os.environ.get("EXECUTE_ACTIONS_CLIENT_ID", "bstein-dev-home") + execute_redirect_uri = os.environ.get("EXECUTE_ACTIONS_REDIRECT_URI", "https://bstein.dev/") + + token_url = f"{keycloak_base}/realms/{realm}/protocol/openid-connect/token" + admin_users_url = f"{keycloak_base}/admin/realms/{realm}/users" + + token_payload = _post_form( + token_url, + {"grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret}, + timeout_s=30, + ) + access_token = token_payload.get("access_token") + if not isinstance(access_token, str) or not access_token: + raise SystemExit("client credentials token missing access_token") + + status, users = _request_json( + "GET", + f"{admin_users_url}?{urllib.parse.urlencode({'username': probe_username, 'exact': 'true'})}", + access_token, + timeout_s=30, + ) + if status != 200 or not isinstance(users, list): + raise SystemExit("unexpected admin API response when searching for probe user") + + if not users: + create_payload = { + "username": probe_username, + "enabled": False, + "email": probe_email, + "emailVerified": True, + } + status, _ = _request_json("POST", admin_users_url, access_token, create_payload, timeout_s=30) + if status not in (201, 204): + raise SystemExit(f"unexpected status creating probe user: {status}") + status, users = _request_json( + "GET", + f"{admin_users_url}?{urllib.parse.urlencode({'username': probe_username, 'exact': 'true'})}", + access_token, + timeout_s=30, + ) + if status != 200 or not isinstance(users, list) or not users: + raise SystemExit("failed to refetch probe user after creation") + + user_id = users[0].get("id") + if not isinstance(user_id, str) or not user_id: + raise SystemExit("probe user missing id") + + # Trigger an email to validate Keycloak SMTP integration. + query = urllib.parse.urlencode( + { + "client_id": execute_client_id, + "redirect_uri": execute_redirect_uri, + "lifespan": "600", + } + ) + url = f"{admin_users_url}/{urllib.parse.quote(user_id)}/execute-actions-email?{query}" + status, _ = _request_json("PUT", url, access_token, ["UPDATE_PASSWORD"], timeout_s=30) + if status != 204: + raise SystemExit(f"unexpected status from execute-actions-email: {status}") + + print("PASS: Keycloak execute-actions-email succeeded") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index 6f94b6e..a69c0c0 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -197,7 +197,7 @@ def main() -> int: if isinstance(required_actions, list): required = {a for a in required_actions if isinstance(a, str)} - missing = [name for name in ("UPDATE_PASSWORD", "CONFIGURE_TOTP") if name not in required] + missing = [name for name in ("UPDATE_PASSWORD", "VERIFY_EMAIL", "CONFIGURE_TOTP") if name not in required] if missing: raise SystemExit(f"Keycloak user missing required actions {missing}: requiredActions={sorted(required)}") diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 3b1fcc7..0b05da4 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-3 + name: portal-onboarding-e2e-test-4 namespace: bstein-dev-home spec: backoffLimit: 0 diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 8f5b0a5..b9266b8 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -11,6 +11,7 @@ resources: - portal-e2e-target-client-job.yaml - portal-e2e-token-exchange-permissions-job.yaml - portal-e2e-token-exchange-test-job.yaml + - portal-e2e-execute-actions-email-test-job.yaml - ldap-federation-job.yaml - user-overrides-job.yaml - service.yaml @@ -21,3 +22,4 @@ configMapGenerator: - name: portal-e2e-tests files: - test_portal_token_exchange.py=../../scripts/tests/test_portal_token_exchange.py + - test_keycloak_execute_actions_email.py=../../scripts/tests/test_keycloak_execute_actions_email.py diff --git a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml new file mode 100644 index 0000000..3775984 --- /dev/null +++ b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml @@ -0,0 +1,52 @@ +# services/keycloak/portal-e2e-execute-actions-email-test-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-portal-e2e-execute-actions-email-1 + namespace: sso +spec: + backoffLimit: 3 + template: + spec: + restartPolicy: Never + containers: + - name: test + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: PORTAL_E2E_CLIENT_ID + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_id + - name: PORTAL_E2E_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_secret + - name: E2E_PROBE_USERNAME + value: e2e-smtp-probe + - name: E2E_PROBE_EMAIL + value: e2e-smtp-probe@bstein.dev + - name: EXECUTE_ACTIONS_CLIENT_ID + value: bstein-dev-home + - name: EXECUTE_ACTIONS_REDIRECT_URI + value: https://bstein.dev/ + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + python /scripts/test_keycloak_execute_actions_email.py + volumeMounts: + - name: tests + mountPath: /scripts + readOnly: true + volumes: + - name: tests + configMap: + name: portal-e2e-tests + defaultMode: 0555 + -- 2.47.2 From eb11eaff4e025886bfe2c1416d27d58880faeff9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 00:58:02 -0300 Subject: [PATCH 379/684] keycloak: allow e2e client execute-actions-email --- .../test_keycloak_execute_actions_email.py | 94 ++++++++++++------- services/keycloak/portal-e2e-client-job.yaml | 4 +- ...al-e2e-execute-actions-email-test-job.yaml | 3 +- 3 files changed, 65 insertions(+), 36 deletions(-) diff --git a/scripts/tests/test_keycloak_execute_actions_email.py b/scripts/tests/test_keycloak_execute_actions_email.py index 040d7d3..14aa320 100644 --- a/scripts/tests/test_keycloak_execute_actions_email.py +++ b/scripts/tests/test_keycloak_execute_actions_email.py @@ -2,6 +2,7 @@ import json import os import sys +import time import urllib.error import urllib.parse import urllib.request @@ -46,7 +47,10 @@ def _request_json(method: str, url: str, token: str, payload: object | None = No return resp.status, json.loads(body.decode()) except urllib.error.HTTPError as exc: raw = exc.read().decode(errors="replace") - raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + try: + return exc.code, json.loads(raw) if raw else None + except json.JSONDecodeError: + return exc.code, raw def main() -> int: @@ -64,23 +68,34 @@ def main() -> int: token_url = f"{keycloak_base}/realms/{realm}/protocol/openid-connect/token" admin_users_url = f"{keycloak_base}/admin/realms/{realm}/users" - token_payload = _post_form( - token_url, - {"grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret}, - timeout_s=30, - ) - access_token = token_payload.get("access_token") - if not isinstance(access_token, str) or not access_token: - raise SystemExit("client credentials token missing access_token") + def get_access_token() -> str: + token_payload = _post_form( + token_url, + {"grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret}, + timeout_s=30, + ) + access_token = token_payload.get("access_token") + if not isinstance(access_token, str) or not access_token: + raise SystemExit("client credentials token missing access_token") + return access_token - status, users = _request_json( - "GET", - f"{admin_users_url}?{urllib.parse.urlencode({'username': probe_username, 'exact': 'true'})}", - access_token, - timeout_s=30, - ) - if status != 200 or not isinstance(users, list): - raise SystemExit("unexpected admin API response when searching for probe user") + access_token = get_access_token() + + users: list | None = None + search_url = f"{admin_users_url}?{urllib.parse.urlencode({'username': probe_username, 'exact': 'true'})}" + for attempt in range(1, 11): + status, body = _request_json("GET", search_url, access_token, timeout_s=30) + if status == 200 and isinstance(body, list): + users = body + break + if status == 403 and attempt < 10: + time.sleep(3) + access_token = get_access_token() + continue + raise SystemExit(f"unexpected admin API response when searching for probe user (status={status} body={body})") + + if users is None: + raise SystemExit("probe user search did not return a list response") if not users: create_payload = { @@ -89,17 +104,27 @@ def main() -> int: "email": probe_email, "emailVerified": True, } - status, _ = _request_json("POST", admin_users_url, access_token, create_payload, timeout_s=30) - if status not in (201, 204): - raise SystemExit(f"unexpected status creating probe user: {status}") - status, users = _request_json( - "GET", - f"{admin_users_url}?{urllib.parse.urlencode({'username': probe_username, 'exact': 'true'})}", - access_token, - timeout_s=30, - ) - if status != 200 or not isinstance(users, list) or not users: - raise SystemExit("failed to refetch probe user after creation") + for attempt in range(1, 6): + status, body = _request_json("POST", admin_users_url, access_token, create_payload, timeout_s=30) + if status in (201, 204): + break + if status == 403 and attempt < 5: + time.sleep(3) + access_token = get_access_token() + continue + raise SystemExit(f"unexpected status creating probe user: {status} body={body}") + + # Refetch. + for attempt in range(1, 11): + status, body = _request_json("GET", search_url, access_token, timeout_s=30) + if status == 200 and isinstance(body, list) and body: + users = body + break + if status == 403 and attempt < 10: + time.sleep(3) + access_token = get_access_token() + continue + raise SystemExit(f"failed to refetch probe user after creation (status={status} body={body})") user_id = users[0].get("id") if not isinstance(user_id, str) or not user_id: @@ -114,9 +139,15 @@ def main() -> int: } ) url = f"{admin_users_url}/{urllib.parse.quote(user_id)}/execute-actions-email?{query}" - status, _ = _request_json("PUT", url, access_token, ["UPDATE_PASSWORD"], timeout_s=30) - if status != 204: - raise SystemExit(f"unexpected status from execute-actions-email: {status}") + for attempt in range(1, 6): + status, body = _request_json("PUT", url, access_token, ["UPDATE_PASSWORD"], timeout_s=30) + if status == 204: + break + if status == 403 and attempt < 5: + time.sleep(3) + access_token = get_access_token() + continue + raise SystemExit(f"unexpected status from execute-actions-email: {status} body={body}") print("PASS: Keycloak execute-actions-email succeeded") return 0 @@ -124,4 +155,3 @@ def main() -> int: if __name__ == "__main__": sys.exit(main()) - diff --git a/services/keycloak/portal-e2e-client-job.yaml b/services/keycloak/portal-e2e-client-job.yaml index 2a22edf..7f6c5dd 100644 --- a/services/keycloak/portal-e2e-client-job.yaml +++ b/services/keycloak/portal-e2e-client-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-client-1 + name: keycloak-portal-e2e-client-2 namespace: sso spec: backoffLimit: 0 @@ -211,7 +211,7 @@ spec: if not rm_uuid: raise SystemExit("realm-management client has no id") - wanted_roles = ("query-users", "view-users", "impersonation") + wanted_roles = ("query-users", "view-users", "manage-users", "impersonation") role_reps = [] for role_name in wanted_roles: status, role = http_json( diff --git a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml index 3775984..931760c 100644 --- a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml +++ b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-execute-actions-email-1 + name: keycloak-portal-e2e-execute-actions-email-2 namespace: sso spec: backoffLimit: 3 @@ -49,4 +49,3 @@ spec: configMap: name: portal-e2e-tests defaultMode: 0555 - -- 2.47.2 From 7cbbb7e193c94133eb97e36aa491424512c0fb07 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 00:59:24 -0300 Subject: [PATCH 380/684] test: fix keycloak execute-actions-email probe --- scripts/tests/test_keycloak_execute_actions_email.py | 10 +++++++++- .../portal-e2e-execute-actions-email-test-job.yaml | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/tests/test_keycloak_execute_actions_email.py b/scripts/tests/test_keycloak_execute_actions_email.py index 14aa320..2828f85 100644 --- a/scripts/tests/test_keycloak_execute_actions_email.py +++ b/scripts/tests/test_keycloak_execute_actions_email.py @@ -100,7 +100,7 @@ def main() -> int: if not users: create_payload = { "username": probe_username, - "enabled": False, + "enabled": True, "email": probe_email, "emailVerified": True, } @@ -130,6 +130,14 @@ def main() -> int: if not isinstance(user_id, str) or not user_id: raise SystemExit("probe user missing id") + # execute-actions-email requires the user to be enabled. + status, user = _request_json("GET", f"{admin_users_url}/{urllib.parse.quote(user_id)}", access_token, timeout_s=30) + if status == 200 and isinstance(user, dict) and user.get("enabled") is False: + user["enabled"] = True + status, body = _request_json("PUT", f"{admin_users_url}/{urllib.parse.quote(user_id)}", access_token, user, timeout_s=30) + if status not in (200, 204): + raise SystemExit(f"unexpected status enabling probe user: {status} body={body}") + # Trigger an email to validate Keycloak SMTP integration. query = urllib.parse.urlencode( { diff --git a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml index 931760c..dec3b87 100644 --- a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml +++ b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-execute-actions-email-2 + name: keycloak-portal-e2e-execute-actions-email-3 namespace: sso spec: backoffLimit: 3 -- 2.47.2 From 38b4935e1de51a069bab44648eef23014772e763 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 01:06:05 -0300 Subject: [PATCH 381/684] test: send execute-actions-email to existing mailbox --- .../keycloak/portal-e2e-execute-actions-email-test-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml index dec3b87..565aac8 100644 --- a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml +++ b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-execute-actions-email-3 + name: keycloak-portal-e2e-execute-actions-email-4 namespace: sso spec: backoffLimit: 3 @@ -30,7 +30,7 @@ spec: - name: E2E_PROBE_USERNAME value: e2e-smtp-probe - name: E2E_PROBE_EMAIL - value: e2e-smtp-probe@bstein.dev + value: robot@bstein.dev - name: EXECUTE_ACTIONS_CLIENT_ID value: bstein-dev-home - name: EXECUTE_ACTIONS_REDIRECT_URI -- 2.47.2 From a7f68ddddb3fda52e5ab609c06f9460bdc77edc5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 01:08:17 -0300 Subject: [PATCH 382/684] test: ensure smtp probe user has email --- .../test_keycloak_execute_actions_email.py | 45 ++++++++++++++++--- ...al-e2e-execute-actions-email-test-job.yaml | 2 +- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/scripts/tests/test_keycloak_execute_actions_email.py b/scripts/tests/test_keycloak_execute_actions_email.py index 2828f85..7d89a2e 100644 --- a/scripts/tests/test_keycloak_execute_actions_email.py +++ b/scripts/tests/test_keycloak_execute_actions_email.py @@ -130,13 +130,46 @@ def main() -> int: if not isinstance(user_id, str) or not user_id: raise SystemExit("probe user missing id") - # execute-actions-email requires the user to be enabled. - status, user = _request_json("GET", f"{admin_users_url}/{urllib.parse.quote(user_id)}", access_token, timeout_s=30) - if status == 200 and isinstance(user, dict) and user.get("enabled") is False: + # execute-actions-email requires the user to be enabled and have an email configured. + user_url = f"{admin_users_url}/{urllib.parse.quote(user_id)}" + user: dict | None = None + for attempt in range(1, 6): + status, body = _request_json("GET", user_url, access_token, timeout_s=30) + if status == 200 and isinstance(body, dict): + user = body + break + if status == 403 and attempt < 5: + time.sleep(3) + access_token = get_access_token() + continue + raise SystemExit(f"unexpected status fetching probe user: {status} body={body}") + + if user is None: + raise SystemExit("probe user fetch did not return a user object") + + needs_update = False + if user.get("enabled") is False: user["enabled"] = True - status, body = _request_json("PUT", f"{admin_users_url}/{urllib.parse.quote(user_id)}", access_token, user, timeout_s=30) - if status not in (200, 204): - raise SystemExit(f"unexpected status enabling probe user: {status} body={body}") + needs_update = True + + if user.get("email") != probe_email: + user["email"] = probe_email + needs_update = True + + if user.get("emailVerified") is not True: + user["emailVerified"] = True + needs_update = True + + if needs_update: + for attempt in range(1, 6): + status, body = _request_json("PUT", user_url, access_token, user, timeout_s=30) + if status in (200, 204): + break + if status == 403 and attempt < 5: + time.sleep(3) + access_token = get_access_token() + continue + raise SystemExit(f"unexpected status updating probe user: {status} body={body}") # Trigger an email to validate Keycloak SMTP integration. query = urllib.parse.urlencode( diff --git a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml index 565aac8..877dd55 100644 --- a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml +++ b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-execute-actions-email-4 + name: keycloak-portal-e2e-execute-actions-email-5 namespace: sso spec: backoffLimit: 3 -- 2.47.2 From 04b730dbab6281de1fcda62e892bb8c149c0e46d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 01:48:46 -0300 Subject: [PATCH 383/684] tests(portal): verify access requests via email --- scripts/tests/test_portal_onboarding_flow.py | 144 ++++++++++++++++-- .../bstein-dev-home/backend-deployment.yaml | 2 + .../portal-onboarding-e2e-test-job.yaml | 6 +- 3 files changed, 137 insertions(+), 15 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index a69c0c0..739f5b9 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -1,7 +1,11 @@ #!/usr/bin/env python3 +import email import http.client +import imaplib import json import os +import re +import ssl import sys import time import urllib.error @@ -97,6 +101,89 @@ def _keycloak_get_user(keycloak_base: str, realm: str, token: str, user_id: str) return data +def _extract_attr(attributes: object, key: str) -> str: + if not isinstance(attributes, dict): + return "" + value = attributes.get(key) + if isinstance(value, list) and value and isinstance(value[0], str): + return value[0] + if isinstance(value, str): + return value + return "" + + +def _imap_wait_for_verify_token( + *, + host: str, + port: int, + username: str, + password: str, + request_code: str, + deadline_sec: int, +) -> str: + ssl_context = ssl._create_unverified_context() + deadline_at = time.monotonic() + deadline_sec + + with imaplib.IMAP4_SSL(host, port, ssl_context=ssl_context) as client: + client.login(username, password) + client.select("INBOX") + + while time.monotonic() < deadline_at: + status, data = client.search(None, "TEXT", request_code) + if status == "OK" and data and data[0]: + ids = data[0].split() + msg_id = ids[-1] + fetch_status, msg_data = client.fetch(msg_id, "(RFC822)") + if fetch_status != "OK" or not msg_data: + time.sleep(2) + continue + + raw = msg_data[0][1] if isinstance(msg_data[0], tuple) and len(msg_data[0]) > 1 else None + if not isinstance(raw, (bytes, bytearray)): + time.sleep(2) + continue + + message = email.message_from_bytes(raw) + body = None + if message.is_multipart(): + for part in message.walk(): + if part.get_content_type() == "text/plain": + payload = part.get_payload(decode=True) + if isinstance(payload, (bytes, bytearray)): + body = payload.decode(errors="replace") + break + else: + payload = message.get_payload(decode=True) + if isinstance(payload, (bytes, bytearray)): + body = payload.decode(errors="replace") + + if not body: + time.sleep(2) + continue + + url = None + for line in body.splitlines(): + candidate = line.strip() + if "verify=" in candidate and candidate.startswith("http"): + url = candidate + break + if not url: + match = re.search(r"https?://\\S+verify=\\S+", body) + url = match.group(0) if match else None + if not url: + time.sleep(2) + continue + + parsed = urllib.parse.urlparse(url) + query = urllib.parse.parse_qs(parsed.query) + token = query.get("verify", [""])[0] + if isinstance(token, str) and token: + return token + time.sleep(2) + + raise SystemExit("verification email not found before deadline") + + def main() -> int: portal_base = _env("PORTAL_BASE_URL").rstrip("/") db_url = _env("PORTAL_DATABASE_URL") @@ -106,13 +193,38 @@ def main() -> int: kc_admin_client_id = _env("KEYCLOAK_ADMIN_CLIENT_ID") kc_admin_client_secret = _env("KEYCLOAK_ADMIN_CLIENT_SECRET") + contact_email = os.environ.get("E2E_CONTACT_EMAIL", "robotuser@bstein.dev").strip() + if not contact_email: + raise SystemExit("E2E_CONTACT_EMAIL must not be empty") + + imap_host = os.environ.get("E2E_IMAP_HOST", "mailu-front.mailu-mailserver.svc.cluster.local").strip() + imap_port = int(os.environ.get("E2E_IMAP_PORT", "993")) + imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip() + imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90")) + + token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + mailbox_user = _keycloak_find_user(keycloak_base, realm, token, imap_keycloak_username) + if not mailbox_user: + raise SystemExit(f"unable to locate Keycloak mailbox user {imap_keycloak_username!r}") + mailbox_user_id = mailbox_user.get("id") + if not isinstance(mailbox_user_id, str) or not mailbox_user_id: + raise SystemExit("mailbox user missing id") + + mailbox_full = _keycloak_get_user(keycloak_base, realm, token, mailbox_user_id) + mailbox_attrs = mailbox_full.get("attributes") + mailu_email = _extract_attr(mailbox_attrs, "mailu_email") + if not mailu_email: + mailu_email = contact_email + mailu_password = _extract_attr(mailbox_attrs, "mailu_app_password") + if not mailu_password: + raise SystemExit(f"Keycloak user {imap_keycloak_username!r} missing mailu_app_password attribute") + username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user") now = int(time.time()) username = f"{username_prefix}-{now}" - email = f"{username}@example.invalid" submit_url = f"{portal_base}/api/access/request" - submit_payload = {"username": username, "email": email, "note": "portal onboarding e2e"} + submit_payload = {"username": username, "email": contact_email, "note": "portal onboarding e2e"} submit = None for attempt in range(1, 6): try: @@ -129,18 +241,23 @@ def main() -> int: if not isinstance(request_code, str) or not request_code: raise SystemExit(f"request submit did not return request_code: {submit}") + verify_token = _imap_wait_for_verify_token( + host=imap_host, + port=imap_port, + username=mailu_email, + password=mailu_password, + request_code=request_code, + deadline_sec=imap_wait_sec, + ) + verify_resp = _post_json( + f"{portal_base}/api/access/request/verify", + {"request_code": request_code, "token": verify_token}, + timeout_s=30, + ) + if not isinstance(verify_resp, dict) or verify_resp.get("ok") is not True: + raise SystemExit(f"unexpected verify response: {verify_resp}") + with psycopg.connect(db_url, autocommit=True) as conn: - # Bypass the emailed token by marking the request as verified and pending (same as /api/access/request/verify). - conn.execute( - """ - UPDATE access_requests - SET status = 'pending', - email_verified_at = NOW(), - email_verification_token_hash = NULL - WHERE request_code = %s AND status = 'pending_email_verification' - """, - (request_code,), - ) # Simulate admin approval. conn.execute( """ @@ -183,7 +300,6 @@ def main() -> int: raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status}){suffix}") time.sleep(interval_s) - token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) user = _keycloak_find_user(keycloak_base, realm, token, username) if not user: raise SystemExit("expected Keycloak user was not created") diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b54d8dc..b927e3a 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -80,6 +80,8 @@ spec: value: "120" - name: ACCESS_REQUEST_STATUS_RATE_WINDOW_SEC value: "60" + - name: ACCESS_REQUEST_INTERNAL_EMAIL_ALLOWLIST + value: robotuser@bstein.dev ports: - name: http containerPort: 8080 diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 0b05da4..4fc5b31 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-4 + name: portal-onboarding-e2e-test-5 namespace: bstein-dev-home spec: backoffLimit: 0 @@ -33,6 +33,10 @@ spec: key: client_secret - name: E2E_USERNAME_PREFIX value: e2e-portal + - name: E2E_CONTACT_EMAIL + value: robotuser@bstein.dev + - name: E2E_IMAP_KEYCLOAK_USERNAME + value: robotuser - name: E2E_DEADLINE_SECONDS value: "600" - name: E2E_POLL_SECONDS -- 2.47.2 From d7c44e65a66ad4824972f610e3d15fae5f6e8abe Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 04:53:11 +0000 Subject: [PATCH 384/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index a976121..24f275a 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-68 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From d2fa996b8ab029c76dd62620a80ce92ed13c4ad5 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 04:55:22 +0000 Subject: [PATCH 385/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b927e3a..d703286 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-68 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always env: - name: AI_CHAT_API diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 0d2bb40..a2749bf 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-68 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From d0e088e50ae9725fce41b7cbd10a82eb3326ad2f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 01:57:53 -0300 Subject: [PATCH 386/684] tests(portal): rerun onboarding e2e job --- services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 4fc5b31..d727355 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-5 + name: portal-onboarding-e2e-test-6 namespace: bstein-dev-home spec: backoffLimit: 0 -- 2.47.2 From 4b77f909af647d855b555f1538440af684579e2d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 02:09:36 -0300 Subject: [PATCH 387/684] tests(portal): refresh keycloak token during e2e --- scripts/tests/test_portal_onboarding_flow.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index 739f5b9..9fe934c 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -300,6 +300,9 @@ def main() -> int: raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status}){suffix}") time.sleep(interval_s) + # Refresh admin token (it may expire during the provisioning wait). + token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + user = _keycloak_find_user(keycloak_base, realm, token, username) if not user: raise SystemExit("expected Keycloak user was not created") -- 2.47.2 From bbb15a6532ca9ba588040270da3d9de5fa604795 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 02:09:59 -0300 Subject: [PATCH 388/684] tests(portal): rerun onboarding e2e job (7) --- services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index d727355..15c0b01 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-6 + name: portal-onboarding-e2e-test-7 namespace: bstein-dev-home spec: backoffLimit: 0 -- 2.47.2 From 4a841a1660f080cc58381e631140720e057b1bfe Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 02:25:40 -0300 Subject: [PATCH 389/684] fix(bstein-dev-home): harden backend gunicorn --- .../bstein-dev-home/backend-deployment.yaml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index d703286..34c5d17 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -26,6 +26,15 @@ spec: - name: backend image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always + command: ["gunicorn"] + args: + - -b + - 0.0.0.0:8080 + - --workers + - "2" + - --timeout + - "180" + - app:app env: - name: AI_CHAT_API value: http://ollama.ai.svc.cluster.local:11434 @@ -101,8 +110,8 @@ spec: timeoutSeconds: 3 resources: requests: - cpu: 50m - memory: 64Mi + cpu: 100m + memory: 128Mi limits: - cpu: 300m - memory: 256Mi + cpu: 500m + memory: 512Mi -- 2.47.2 From 0b96894e7a05dd8993624c9d51c2d2ee8c533dfa Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 02:26:42 -0300 Subject: [PATCH 390/684] tests(portal): rerun onboarding e2e job (8) --- services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 15c0b01..3227ded 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-7 + name: portal-onboarding-e2e-test-8 namespace: bstein-dev-home spec: backoffLimit: 0 -- 2.47.2 From c298946ce0d36762296fd06860a277fcc16eedbb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 02:58:44 -0300 Subject: [PATCH 391/684] test(portal): approve requests via admin API --- scripts/tests/test_portal_onboarding_flow.py | 98 +++++++++++++++---- .../portal-onboarding-e2e-test-job.yaml | 22 +++-- 2 files changed, 95 insertions(+), 25 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index 9fe934c..504cfd1 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -12,8 +12,6 @@ import urllib.error import urllib.parse import urllib.request -import psycopg - def _env(name: str, default: str | None = None) -> str: value = os.environ.get(name, default) @@ -67,7 +65,29 @@ def _get_json(url: str, headers: dict[str, str] | None = None, timeout_s: int = raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") -def _keycloak_admin_token(keycloak_base: str, realm: str, client_id: str, client_secret: str) -> str: +def _request_json( + method: str, + url: str, + token: str, + payload: dict | None = None, + timeout_s: int = 30, +) -> dict: + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + raw = resp.read().decode() + return json.loads(raw) if raw else {} + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") + + +def _keycloak_client_token(keycloak_base: str, realm: str, client_id: str, client_secret: str) -> str: token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token" payload = _post_form( token_url, @@ -84,6 +104,35 @@ def _keycloak_admin_token(keycloak_base: str, realm: str, client_id: str, client return token +def _keycloak_token_exchange( + *, + keycloak_base: str, + realm: str, + client_id: str, + client_secret: str, + subject_token: str, + requested_subject: str, + audience: str, +) -> str: + token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token" + payload = _post_form( + token_url, + { + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "client_id": client_id, + "client_secret": client_secret, + "subject_token": subject_token, + "requested_subject": requested_subject, + "audience": audience, + }, + timeout_s=20, + ) + token = payload.get("access_token") + if not isinstance(token, str) or not token: + raise SystemExit("keycloak token exchange response missing access_token") + return token + + def _keycloak_find_user(keycloak_base: str, realm: str, token: str, username: str) -> dict | None: url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users?{urllib.parse.urlencode({'username': username, 'exact': 'true', 'max': '1'})}" users = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20) @@ -186,12 +235,15 @@ def _imap_wait_for_verify_token( def main() -> int: portal_base = _env("PORTAL_BASE_URL").rstrip("/") - db_url = _env("PORTAL_DATABASE_URL") keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/") realm = _env("KEYCLOAK_REALM", "atlas") kc_admin_client_id = _env("KEYCLOAK_ADMIN_CLIENT_ID") kc_admin_client_secret = _env("KEYCLOAK_ADMIN_CLIENT_SECRET") + portal_e2e_client_id = _env("PORTAL_E2E_CLIENT_ID") + portal_e2e_client_secret = _env("PORTAL_E2E_CLIENT_SECRET") + portal_target_client_id = os.environ.get("PORTAL_TARGET_CLIENT_ID", "bstein-dev-home").strip() or "bstein-dev-home" + portal_admin_username = os.environ.get("E2E_PORTAL_ADMIN_USERNAME", "bstein").strip() or "bstein" contact_email = os.environ.get("E2E_CONTACT_EMAIL", "robotuser@bstein.dev").strip() if not contact_email: @@ -202,7 +254,7 @@ def main() -> int: imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip() imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90")) - token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) mailbox_user = _keycloak_find_user(keycloak_base, realm, token, imap_keycloak_username) if not mailbox_user: raise SystemExit(f"unable to locate Keycloak mailbox user {imap_keycloak_username!r}") @@ -257,18 +309,28 @@ def main() -> int: if not isinstance(verify_resp, dict) or verify_resp.get("ok") is not True: raise SystemExit(f"unexpected verify response: {verify_resp}") - with psycopg.connect(db_url, autocommit=True) as conn: - # Simulate admin approval. - conn.execute( - """ - UPDATE access_requests - SET status = 'accounts_building', - decided_at = NOW(), - decided_by = 'portal-e2e' - WHERE request_code = %s AND status = 'pending' - """, - (request_code,), - ) + portal_admin = _keycloak_find_user(keycloak_base, realm, token, portal_admin_username) + if not portal_admin: + raise SystemExit(f"unable to locate portal admin user {portal_admin_username!r} via Keycloak admin API") + portal_admin_user_id = portal_admin.get("id") + if not isinstance(portal_admin_user_id, str) or not portal_admin_user_id: + raise SystemExit("portal admin user missing id") + + e2e_subject_token = _keycloak_client_token(keycloak_base, realm, portal_e2e_client_id, portal_e2e_client_secret) + portal_bearer = _keycloak_token_exchange( + keycloak_base=keycloak_base, + realm=realm, + client_id=portal_e2e_client_id, + client_secret=portal_e2e_client_secret, + subject_token=e2e_subject_token, + requested_subject=portal_admin_user_id, + audience=portal_target_client_id, + ) + + approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve" + approve_resp = _request_json("POST", approve_url, portal_bearer, payload=None, timeout_s=60) + if not isinstance(approve_resp, dict) or approve_resp.get("ok") is not True: + raise SystemExit(f"unexpected approval response: {approve_resp}") status_url = f"{portal_base}/api/access/request/status" deadline_s = int(os.environ.get("E2E_DEADLINE_SECONDS", "600")) @@ -301,7 +363,7 @@ def main() -> int: time.sleep(interval_s) # Refresh admin token (it may expire during the provisioning wait). - token = _keycloak_admin_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) user = _keycloak_find_user(keycloak_base, realm, token, username) if not user: diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 3227ded..568457d 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-8 + name: portal-onboarding-e2e-test-9 namespace: bstein-dev-home spec: backoffLimit: 0 @@ -15,11 +15,6 @@ spec: env: - name: PORTAL_BASE_URL value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local - - name: PORTAL_DATABASE_URL - valueFrom: - secretKeyRef: - name: atlas-portal-db - key: PORTAL_DATABASE_URL - name: KEYCLOAK_ADMIN_URL value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM @@ -31,6 +26,20 @@ spec: secretKeyRef: name: bstein-dev-home-keycloak-admin key: client_secret + - name: PORTAL_E2E_CLIENT_ID + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_id + - name: PORTAL_E2E_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: portal-e2e-client + key: client_secret + - name: PORTAL_TARGET_CLIENT_ID + value: bstein-dev-home + - name: E2E_PORTAL_ADMIN_USERNAME + value: bstein - name: E2E_USERNAME_PREFIX value: e2e-portal - name: E2E_CONTACT_EMAIL @@ -45,7 +54,6 @@ spec: args: - | set -euo pipefail - python -m pip install --no-cache-dir 'psycopg[binary]==3.2.5' python /scripts/test_portal_onboarding_flow.py volumeMounts: - name: tests -- 2.47.2 From b9d2fa8277b8442d930a8bbceee4ee9c5df4c5bb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 03:01:56 -0300 Subject: [PATCH 392/684] test(portal): improve e2e auth errors --- scripts/tests/test_portal_onboarding_flow.py | 33 ++++++++++++------- .../portal-onboarding-e2e-test-job.yaml | 2 +- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index 504cfd1..2b5ff68 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -100,7 +100,7 @@ def _keycloak_client_token(keycloak_base: str, realm: str, client_id: str, clien ) token = payload.get("access_token") if not isinstance(token, str) or not token: - raise SystemExit("keycloak admin token response missing access_token") + raise SystemExit("keycloak token response missing access_token") return token @@ -254,7 +254,10 @@ def main() -> int: imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip() imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90")) - token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + try: + token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret) + except SystemExit as exc: + raise SystemExit(f"failed to fetch keycloak token for admin client {kc_admin_client_id!r}: {exc}") mailbox_user = _keycloak_find_user(keycloak_base, realm, token, imap_keycloak_username) if not mailbox_user: raise SystemExit(f"unable to locate Keycloak mailbox user {imap_keycloak_username!r}") @@ -316,16 +319,22 @@ def main() -> int: if not isinstance(portal_admin_user_id, str) or not portal_admin_user_id: raise SystemExit("portal admin user missing id") - e2e_subject_token = _keycloak_client_token(keycloak_base, realm, portal_e2e_client_id, portal_e2e_client_secret) - portal_bearer = _keycloak_token_exchange( - keycloak_base=keycloak_base, - realm=realm, - client_id=portal_e2e_client_id, - client_secret=portal_e2e_client_secret, - subject_token=e2e_subject_token, - requested_subject=portal_admin_user_id, - audience=portal_target_client_id, - ) + try: + e2e_subject_token = _keycloak_client_token(keycloak_base, realm, portal_e2e_client_id, portal_e2e_client_secret) + except SystemExit as exc: + raise SystemExit(f"failed to fetch keycloak token for E2E client {portal_e2e_client_id!r}: {exc}") + try: + portal_bearer = _keycloak_token_exchange( + keycloak_base=keycloak_base, + realm=realm, + client_id=portal_e2e_client_id, + client_secret=portal_e2e_client_secret, + subject_token=e2e_subject_token, + requested_subject=portal_admin_user_id, + audience=portal_target_client_id, + ) + except SystemExit as exc: + raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}") approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve" approve_resp = _request_json("POST", approve_url, portal_bearer, payload=None, timeout_s=60) diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 568457d..6b9eb7d 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-9 + name: portal-onboarding-e2e-test-10 namespace: bstein-dev-home spec: backoffLimit: 0 -- 2.47.2 From c53d310c5925aa5a68554b5bd6a7c098d74bafe1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 03:27:32 -0300 Subject: [PATCH 393/684] test(portal): use external Keycloak URL --- services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 6b9eb7d..fd56523 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -16,7 +16,7 @@ spec: - name: PORTAL_BASE_URL value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local - name: KEYCLOAK_ADMIN_URL - value: http://keycloak.sso.svc.cluster.local + value: https://sso.bstein.dev - name: KEYCLOAK_REALM value: atlas - name: KEYCLOAK_ADMIN_CLIENT_ID -- 2.47.2 From 17a9a7e245148a1b240f11bde63dc18e31f94a1d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 03:35:26 -0300 Subject: [PATCH 394/684] test(portal): sync e2e client secret --- scripts/sso_portal_e2e_client_secret_sync.sh | 20 ++++++++++++ services/bstein-dev-home/kustomization.yaml | 1 + .../portal-e2e-client-secret-sync-rbac.yaml | 24 ++++++++++++++ services/keycloak/kustomization.yaml | 5 +++ ...portal-e2e-client-secret-sync-cronjob.yaml | 32 +++++++++++++++++++ .../portal-e2e-client-secret-sync-rbac.yaml | 31 ++++++++++++++++++ 6 files changed, 113 insertions(+) create mode 100755 scripts/sso_portal_e2e_client_secret_sync.sh create mode 100644 services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml create mode 100644 services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml create mode 100644 services/keycloak/portal-e2e-client-secret-sync-rbac.yaml diff --git a/scripts/sso_portal_e2e_client_secret_sync.sh b/scripts/sso_portal_e2e_client_secret_sync.sh new file mode 100755 index 0000000..bf944ca --- /dev/null +++ b/scripts/sso_portal_e2e_client_secret_sync.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +SOURCE_NAMESPACE="${SOURCE_NAMESPACE:-sso}" +DEST_NAMESPACE="${DEST_NAMESPACE:-bstein-dev-home}" +SECRET_NAME="${SECRET_NAME:-portal-e2e-client}" + +client_id="$(kubectl -n "${SOURCE_NAMESPACE}" get secret "${SECRET_NAME}" -o jsonpath='{.data.client_id}')" +client_secret="$(kubectl -n "${SOURCE_NAMESPACE}" get secret "${SECRET_NAME}" -o jsonpath='{.data.client_secret}')" + +cat </dev/null +apiVersion: v1 +kind: Secret +metadata: + name: ${SECRET_NAME} +type: Opaque +data: + client_id: ${client_id} + client_secret: ${client_secret} +EOF diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 2b710d1..3a423ef 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -6,6 +6,7 @@ resources: - namespace.yaml - image.yaml - rbac.yaml + - portal-e2e-client-secret-sync-rbac.yaml - chat-ai-gateway-configmap.yaml - chat-ai-gateway-deployment.yaml - chat-ai-gateway-service.yaml diff --git a/services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml b/services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml new file mode 100644 index 0000000..045bd0a --- /dev/null +++ b/services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml @@ -0,0 +1,24 @@ +# services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: portal-e2e-client-secret-sync-target + namespace: bstein-dev-home +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "create", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: portal-e2e-client-secret-sync-target + namespace: bstein-dev-home +subjects: + - kind: ServiceAccount + name: portal-e2e-client-secret-sync + namespace: sso +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: portal-e2e-client-secret-sync-target diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index b9266b8..24490de 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -8,6 +8,8 @@ resources: - deployment.yaml - realm-settings-job.yaml - portal-e2e-client-job.yaml + - portal-e2e-client-secret-sync-rbac.yaml + - portal-e2e-client-secret-sync-cronjob.yaml - portal-e2e-target-client-job.yaml - portal-e2e-token-exchange-permissions-job.yaml - portal-e2e-token-exchange-test-job.yaml @@ -23,3 +25,6 @@ configMapGenerator: files: - test_portal_token_exchange.py=../../scripts/tests/test_portal_token_exchange.py - test_keycloak_execute_actions_email.py=../../scripts/tests/test_keycloak_execute_actions_email.py + - name: portal-e2e-client-secret-sync-script + files: + - sso_portal_e2e_client_secret_sync.sh=../../scripts/sso_portal_e2e_client_secret_sync.sh diff --git a/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml b/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml new file mode 100644 index 0000000..cbe47b9 --- /dev/null +++ b/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml @@ -0,0 +1,32 @@ +# services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: portal-e2e-client-secret-sync + namespace: sso +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + spec: + serviceAccountName: portal-e2e-client-secret-sync + restartPolicy: Never + containers: + - name: sync + image: bitnami/kubectl:1.33.1 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/sso_portal_e2e_client_secret_sync.sh"] + volumeMounts: + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: script + configMap: + name: portal-e2e-client-secret-sync-script + defaultMode: 0555 diff --git a/services/keycloak/portal-e2e-client-secret-sync-rbac.yaml b/services/keycloak/portal-e2e-client-secret-sync-rbac.yaml new file mode 100644 index 0000000..e2d39bb --- /dev/null +++ b/services/keycloak/portal-e2e-client-secret-sync-rbac.yaml @@ -0,0 +1,31 @@ +# services/keycloak/portal-e2e-client-secret-sync-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: portal-e2e-client-secret-sync + namespace: sso +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: portal-e2e-client-secret-sync-source + namespace: sso +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["portal-e2e-client"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: portal-e2e-client-secret-sync-source + namespace: sso +subjects: + - kind: ServiceAccount + name: portal-e2e-client-secret-sync + namespace: sso +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: portal-e2e-client-secret-sync-source -- 2.47.2 From 55606e5b7063d3b3e64dfdc2ef3a26ba6a780dbd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 03:40:13 -0300 Subject: [PATCH 395/684] fix(portal): pin kubectl image digest --- services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml b/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml index cbe47b9..8bb7e55 100644 --- a/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml +++ b/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml @@ -18,7 +18,7 @@ spec: restartPolicy: Never containers: - name: sync - image: bitnami/kubectl:1.33.1 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 command: ["/usr/bin/env", "bash"] args: ["/scripts/sso_portal_e2e_client_secret_sync.sh"] volumeMounts: -- 2.47.2 From 6f784c94a4a58e8b90d62fc426c6f1014f696789 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 10:35:36 +0000 Subject: [PATCH 396/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 24f275a..7ee115b 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-70 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 333481bd6766ad1c4668b17a3e094b39f9deb01d Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 10:36:47 +0000 Subject: [PATCH 397/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 34c5d17..d39a35d 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-70 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index a2749bf..9d1112a 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-69 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-70 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 1a8b3ce304c77bf84be74862fdfd328d5876003a Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 11:27:40 +0000 Subject: [PATCH 398/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 7ee115b..cbedc0f 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-70 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-71 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 20332f7029d6f0fa33373c7d9ca2e9b1c33edcea Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 11:28:50 +0000 Subject: [PATCH 399/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index d39a35d..1d3d0e3 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-70 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-71 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 9d1112a..b20105f 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-70 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-71 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 6eeff1271cd526890588f82b3650c629d47d1d7b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 08:35:49 -0300 Subject: [PATCH 400/684] test(portal): stop requiring totp --- scripts/tests/test_portal_onboarding_flow.py | 6 +++++- .../bstein-dev-home/portal-onboarding-e2e-test-job.yaml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index 2b5ff68..a34c36d 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -387,9 +387,13 @@ def main() -> int: if isinstance(required_actions, list): required = {a for a in required_actions if isinstance(a, str)} - missing = [name for name in ("UPDATE_PASSWORD", "VERIFY_EMAIL", "CONFIGURE_TOTP") if name not in required] + missing = [name for name in ("UPDATE_PASSWORD", "VERIFY_EMAIL") if name not in required] if missing: raise SystemExit(f"Keycloak user missing required actions {missing}: requiredActions={sorted(required)}") + if "CONFIGURE_TOTP" in required: + raise SystemExit( + f"Keycloak user should not require CONFIGURE_TOTP at first login: requiredActions={sorted(required)}" + ) print(f"PASS: onboarding provisioning completed for {request_code} ({username})") return 0 diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index fd56523..3170f86 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-10 + name: portal-onboarding-e2e-test-11 namespace: bstein-dev-home spec: backoffLimit: 0 -- 2.47.2 From b82e2b99dbeb9f96bd81d25b6640290d31061ad9 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 11:49:41 +0000 Subject: [PATCH 401/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index cbedc0f..7ec8d39 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-71 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-72 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From b28fdece0aa8ee399ac7ea32773f118cb9016215 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 11:50:52 +0000 Subject: [PATCH 402/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 1d3d0e3..5b29016 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-71 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-72 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index b20105f..0a52ecb 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-71 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-72 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 31994f9243016b3cc951ecc87f0b2e83e495c38c Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 12:21:44 +0000 Subject: [PATCH 403/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 7ec8d39..5dc6c6d 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-72 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-73 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 9ff8fc9e72f29703c8a76e3de54aa98997f4292d Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 12:22:54 +0000 Subject: [PATCH 404/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 5b29016..b8cfd20 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-72 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-73 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 0a52ecb..2adc2ef 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-72 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-73 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From e66e782e4af0dbe66e37c09a3674a1c5fc919a01 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 09:38:51 -0300 Subject: [PATCH 405/684] portal: add test user cleanup tool --- scripts/test_user_cleanup.py | 276 +++++++++++++++++++++++++++++++++++ scripts/test_user_cleanup.sh | 18 +++ 2 files changed, 294 insertions(+) create mode 100755 scripts/test_user_cleanup.py create mode 100755 scripts/test_user_cleanup.sh diff --git a/scripts/test_user_cleanup.py b/scripts/test_user_cleanup.py new file mode 100755 index 0000000..d29b775 --- /dev/null +++ b/scripts/test_user_cleanup.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import argparse +import sys +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Iterable +from urllib.parse import quote + +import httpx + +from atlas_portal import db, settings +from atlas_portal.keycloak import admin_client + + +@dataclass(frozen=True) +class KeycloakUser: + id: str + username: str + + +@dataclass(frozen=True) +class PortalRequest: + request_code: str + username: str + status: str + + +def _dedupe_by_id(users: Iterable[KeycloakUser]) -> list[KeycloakUser]: + seen: set[str] = set() + out: list[KeycloakUser] = [] + for user in users: + if user.id in seen: + continue + seen.add(user.id) + out.append(user) + return out + + +def _iter_keycloak_users_for_prefix(prefix: str, max_results: int) -> list[KeycloakUser]: + client = admin_client() + if not client.ready(): + raise RuntimeError("keycloak admin client not configured in this environment") + + url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users" + # Keycloak can return false positives for search; we do a strict prefix match client-side. + params = {"search": prefix, "max": str(max_results), "briefRepresentation": "true"} + with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: + resp = http.get(url, params=params, headers=client.headers()) + resp.raise_for_status() + payload = resp.json() + + if not isinstance(payload, list): + return [] + + found: list[KeycloakUser] = [] + for item in payload: + if not isinstance(item, dict): + continue + username = item.get("username") + user_id = item.get("id") + if not isinstance(username, str) or not isinstance(user_id, str): + continue + if not username.startswith(prefix): + continue + if username.startswith("service-account-"): + continue + found.append(KeycloakUser(id=user_id, username=username)) + return found + + +def _find_keycloak_users(prefixes: list[str], max_results: int, protected: set[str]) -> list[KeycloakUser]: + matches: list[KeycloakUser] = [] + for prefix in prefixes: + matches.extend(_iter_keycloak_users_for_prefix(prefix, max_results=max_results)) + + deduped = _dedupe_by_id(matches) + return [user for user in deduped if user.username not in protected] + + +def _delete_keycloak_users(users: list[KeycloakUser]) -> None: + if not users: + return + + client = admin_client() + if not client.ready(): + raise RuntimeError("keycloak admin client not configured in this environment") + + base = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users" + with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: + for user in users: + url = f"{base}/{quote(user.id, safe='')}" + resp = http.delete(url, headers=client.headers()) + # Deleting a non-existent user is treated as success for idempotency. + if resp.status_code == 404: + continue + resp.raise_for_status() + + +def _find_portal_requests(prefixes: list[str], max_results: int) -> list[PortalRequest]: + if not db.configured(): + return [] + + like_prefixes = [f"{prefix}%" for prefix in prefixes] + rows: list[dict[str, Any]] = [] + with db.connect() as conn: + for like in like_prefixes: + cursor = conn.execute( + """ + SELECT request_code, username, status + FROM access_requests + WHERE username LIKE %s + ORDER BY created_at DESC + LIMIT %s + """, + (like, max_results), + ) + batch = cursor.fetchall() + if isinstance(batch, list): + rows.extend([r for r in batch if isinstance(r, dict)]) + + out: list[PortalRequest] = [] + for row in rows: + request_code = row.get("request_code") + username = row.get("username") + status = row.get("status") + if not isinstance(request_code, str) or not isinstance(username, str) or not isinstance(status, str): + continue + out.append(PortalRequest(request_code=request_code, username=username, status=status)) + return out + + +def _delete_portal_requests(prefixes: list[str]) -> int: + if not db.configured(): + return 0 + + like_prefixes = [f"{prefix}%" for prefix in prefixes] + deleted = 0 + with db.connect() as conn: + for like in like_prefixes: + cursor = conn.execute("DELETE FROM access_requests WHERE username LIKE %s", (like,)) + deleted += cursor.rowcount or 0 + return deleted + + +def _summarize_portal_requests(rows: list[PortalRequest]) -> dict[str, int]: + counts: dict[str, int] = defaultdict(int) + for row in rows: + counts[row.status] += 1 + return dict(counts) + + +def _parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="test_user_cleanup", + description=( + "Manual-only cleanup for test users/requests. " + "This script is intended to be run inside the bstein-dev-home backend container." + ), + ) + parser.add_argument( + "--prefix", + action="append", + required=True, + help="Username prefix to target (repeatable). Example: --prefix test-", + ) + parser.add_argument( + "--max", + type=int, + default=500, + help="Maximum users/requests to enumerate per prefix (default: 500).", + ) + parser.add_argument( + "--apply", + action="store_true", + help="Apply deletions (default is dry-run). Requires --confirm.", + ) + parser.add_argument( + "--confirm", + default="", + help="Required when using --apply. Must exactly equal the comma-separated prefix list.", + ) + parser.add_argument( + "--skip-keycloak", + action="store_true", + help="Skip deleting Keycloak users.", + ) + parser.add_argument( + "--skip-portal", + action="store_true", + help="Skip deleting portal (DB) access requests.", + ) + parser.add_argument( + "--protect", + action="append", + default=[], + help="Extra usernames to never delete (repeatable).", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="List matched usernames/request codes.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = _parse_args(argv) + prefixes = sorted({p.strip() for p in args.prefix if p.strip()}) + if not prefixes: + print("error: no valid --prefix values provided", file=sys.stderr) + return 2 + + expected_confirm = ",".join(prefixes) + protected = {"bstein", "robotuser", *[p.strip() for p in args.protect if p.strip()]} + + if args.apply and args.confirm != expected_confirm: + print( + f"error: refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')", + file=sys.stderr, + ) + return 2 + + keycloak_users: list[KeycloakUser] = [] + portal_requests: list[PortalRequest] = [] + + if not args.skip_keycloak: + keycloak_users = _find_keycloak_users(prefixes, max_results=args.max, protected=protected) + + if not args.skip_portal: + portal_requests = _find_portal_requests(prefixes, max_results=args.max) + + print(f"prefixes: {expected_confirm}") + print(f"mode: {'APPLY' if args.apply else 'DRY-RUN'}") + if protected: + print(f"protected usernames: {', '.join(sorted(protected))}") + + if not args.skip_keycloak: + print(f"keycloak users matched: {len(keycloak_users)}") + if args.verbose and keycloak_users: + for user in sorted(keycloak_users, key=lambda u: u.username): + print(f" - {user.username}") + + if not args.skip_portal: + print(f"portal requests matched: {len(portal_requests)}") + if portal_requests: + summary = _summarize_portal_requests(portal_requests) + summary_str = ", ".join(f"{k}={v}" for k, v in sorted(summary.items())) + print(f" statuses: {summary_str}") + if args.verbose and portal_requests: + for req in portal_requests[: min(50, len(portal_requests))]: + print(f" - {req.request_code} ({req.status})") + if len(portal_requests) > 50: + print(f" ... and {len(portal_requests) - 50} more") + + if not args.apply: + print("dry-run complete (no changes made)") + return 0 + + if not args.skip_portal: + deleted = _delete_portal_requests(prefixes) + print(f"deleted portal requests: {deleted}") + + if not args.skip_keycloak: + _delete_keycloak_users(keycloak_users) + print(f"deleted keycloak users: {len(keycloak_users)}") + + print("done") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) + diff --git a/scripts/test_user_cleanup.sh b/scripts/test_user_cleanup.sh new file mode 100755 index 0000000..346aedc --- /dev/null +++ b/scripts/test_user_cleanup.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Manual-only helper to run `scripts/test_user_cleanup.py` inside the portal backend container. +# +# Usage (dry-run): +# scripts/test_user_cleanup.sh --prefix test- +# +# Usage (apply): +# scripts/test_user_cleanup.sh --prefix test- --apply --confirm test- + +NS="${PORTAL_NAMESPACE:-bstein-dev-home}" +TARGET="${PORTAL_BACKEND_EXEC_TARGET:-deploy/bstein-dev-home-backend}" + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" + +cat "${SCRIPT_DIR}/test_user_cleanup.py" | kubectl -n "${NS}" exec -i "${TARGET}" -- python - "$@" + -- 2.47.2 From 58f36edf92e5affabddc411d39beb68b2ddeed8b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 15:34:57 +0000 Subject: [PATCH 406/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 5dc6c6d..6cc3e62 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-73 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-74 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 291073884a615b6a4d0323e3d733009eefdbcf9c Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 15:36:08 +0000 Subject: [PATCH 407/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index b8cfd20..4aa7257 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-73 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-74 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 2adc2ef..ecf4b34 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-73 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-74 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 64cdcec3645401217675d5c7893b42a7b8205d17 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 16:04:59 +0000 Subject: [PATCH 408/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 6cc3e62..e030132 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-74 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-75 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 513dce99b6fe85618ff833444165fbcd8b3c77b7 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 16:06:10 +0000 Subject: [PATCH 409/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 4aa7257..cac82b9 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-74 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-75 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index ecf4b34..ed2cba5 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-74 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-75 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From f735dba10df6a5062c9d9fef752a7c5dfb2abc15 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 16:15:00 +0000 Subject: [PATCH 410/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index e030132..b84bb81 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-75 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-76 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 211504d47a4d16ecd95b39e15086d808de6c3ae7 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sun, 4 Jan 2026 16:16:11 +0000 Subject: [PATCH 411/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index cac82b9..9e56073 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-75 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-76 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index ed2cba5..962538e 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-75 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-76 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From cf0e5bfc89d41faa14dc0b9471d9650512ea68f6 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 01:03:38 +0000 Subject: [PATCH 412/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index b84bb81..3b76ae6 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-76 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-77 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 70ed083d96a1598bb24ceb89ca4862f7aca00d37 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 01:04:49 +0000 Subject: [PATCH 413/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 9e56073..4fe671a 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-76 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-77 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 962538e..2474a12 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-76 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-77 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 4c59fccedf4805982be61a0f1593186b44afb0bb Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 01:54:42 +0000 Subject: [PATCH 414/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 3b76ae6..fd81a00 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-77 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-78 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From eff9bfb761643b95b5d5a8721fd711dc879a13f9 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 01:55:52 +0000 Subject: [PATCH 415/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 4fe671a..a5d78e3 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-77 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-78 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 2474a12..df9bbee 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-77 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-78 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 2e52956155d683f475539ba5c351ebe8e144ad39 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 23:00:40 -0300 Subject: [PATCH 416/684] test(portal): align onboarding E2E with vaultwarden-first flow --- scripts/tests/test_portal_onboarding_flow.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index a34c36d..c75ec56 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -387,14 +387,21 @@ def main() -> int: if isinstance(required_actions, list): required = {a for a in required_actions if isinstance(a, str)} - missing = [name for name in ("UPDATE_PASSWORD", "VERIFY_EMAIL") if name not in required] - if missing: - raise SystemExit(f"Keycloak user missing required actions {missing}: requiredActions={sorted(required)}") - if "CONFIGURE_TOTP" in required: + unexpected = sorted(required.intersection({"UPDATE_PASSWORD", "VERIFY_EMAIL", "CONFIGURE_TOTP"})) + if unexpected: raise SystemExit( - f"Keycloak user should not require CONFIGURE_TOTP at first login: requiredActions={sorted(required)}" + "Keycloak user should not require actions at first login " + f"(Vaultwarden-first onboarding): unexpected requiredActions={unexpected} full={sorted(required)}" ) + email_verified = full.get("emailVerified") + if email_verified is not True: + raise SystemExit(f"Keycloak user should have emailVerified=true: emailVerified={email_verified!r}") + + kc_email = full.get("email") + if isinstance(kc_email, str) and contact_email and kc_email != contact_email: + raise SystemExit(f"Keycloak user email mismatch: expected {contact_email!r} got {kc_email!r}") + print(f"PASS: onboarding provisioning completed for {request_code} ({username})") return 0 -- 2.47.2 From 0805dbc5e9a673bf47e5269ae1c4ee6ec9486c97 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 4 Jan 2026 23:04:50 -0300 Subject: [PATCH 417/684] test(portal): tolerate slow approval endpoint --- scripts/tests/test_portal_onboarding_flow.py | 22 ++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/scripts/tests/test_portal_onboarding_flow.py b/scripts/tests/test_portal_onboarding_flow.py index c75ec56..9c5124a 100644 --- a/scripts/tests/test_portal_onboarding_flow.py +++ b/scripts/tests/test_portal_onboarding_flow.py @@ -337,8 +337,26 @@ def main() -> int: raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}") approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve" - approve_resp = _request_json("POST", approve_url, portal_bearer, payload=None, timeout_s=60) - if not isinstance(approve_resp, dict) or approve_resp.get("ok") is not True: + approve_timeout_s = int(os.environ.get("E2E_APPROVE_TIMEOUT_SECONDS", "180")) + approve_attempts = int(os.environ.get("E2E_APPROVE_ATTEMPTS", "3")) + approve_resp = None + approve_error = None + for attempt in range(1, approve_attempts + 1): + try: + approve_resp = _request_json("POST", approve_url, portal_bearer, payload=None, timeout_s=approve_timeout_s) + approve_error = None + break + except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc: + approve_error = str(exc) + if attempt == approve_attempts: + break + time.sleep(3) + if approve_resp is None: + print( + "WARNING: portal approval request did not return a response; " + f"continuing to poll status (last_error={approve_error})" + ) + elif not isinstance(approve_resp, dict) or approve_resp.get("ok") is not True: raise SystemExit(f"unexpected approval response: {approve_resp}") status_url = f"{portal_base}/api/access/request/status" -- 2.47.2 From 010a0b5e225df1656bebbb5e97886800721cfeec Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 02:38:45 +0000 Subject: [PATCH 418/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index fd81a00..27442f1 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-78 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-79 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 5470002e3e518768a0a5d6d9bf9cf44ba3e906a7 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 02:39:56 +0000 Subject: [PATCH 419/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a5d78e3..a40a5e1 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-78 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-79 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index df9bbee..d5090e5 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-78 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-79 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 3ede68867645e0ea8f41496d28f8b5a763729647 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 03:10:47 +0000 Subject: [PATCH 420/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 27442f1..fed2a6e 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-79 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-80 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 38eceaadfa91e92f88d29b976c06bc9058cad050 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 03:11:58 +0000 Subject: [PATCH 421/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a40a5e1..c0fd5d0 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-79 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-80 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index d5090e5..2b968b6 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-79 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-80 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 08c54d3d012d618194fa30a2e7cf26140f1b0306 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 00:25:22 -0300 Subject: [PATCH 422/684] scripts: add atlas test cleanup --- scripts/test_atlas_user_cleanup.py | 434 +++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100755 scripts/test_atlas_user_cleanup.py diff --git a/scripts/test_atlas_user_cleanup.py b/scripts/test_atlas_user_cleanup.py new file mode 100755 index 0000000..2ca837a --- /dev/null +++ b/scripts/test_atlas_user_cleanup.py @@ -0,0 +1,434 @@ +#!/usr/bin/env python3 +"""Clean up Atlas test users and portal requests (manual-only). + +Default behavior is DRY RUN. This script is intended for operators to clean up +test accounts created via the bstein-dev-home onboarding portal. + +Targets (best-effort): + - Keycloak users in realm "atlas" + - Atlas portal Postgres rows (access_requests + dependent tables) + - Vaultwarden users/invites created by the portal + +Safety: + - Requires an explicit username prefix (e.g. "test-") + - Dry-run unless --apply is set + - Validates prefixes to a conservative charset +""" + +from __future__ import annotations + +import argparse +import base64 +import json +import os +import re +import subprocess +import sys +import time +import urllib.parse +import urllib.request +from dataclasses import dataclass +from typing import Any, Iterable + + +_SAFE_PREFIX_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$") + + +@dataclass(frozen=True) +class KeycloakUser: + user_id: str + username: str + email: str + + +@dataclass(frozen=True) +class PortalRequestRow: + request_code: str + username: str + status: str + + +@dataclass(frozen=True) +class VaultwardenUser: + user_id: str + email: str + status: int + + +def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str: + proc = subprocess.run( + cmd, + input=input_bytes, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + if proc.returncode != 0: + stderr = proc.stderr.decode("utf-8", errors="replace").strip() + raise RuntimeError(f"command failed ({proc.returncode}): {' '.join(cmd)}\n{stderr}") + return proc.stdout.decode("utf-8", errors="replace") + + +def _kubectl_get_secret_value(namespace: str, name: str, key: str) -> str: + raw_b64 = _run( + [ + "kubectl", + "-n", + namespace, + "get", + "secret", + name, + "-o", + f"jsonpath={{.data.{key}}}", + ] + ).strip() + if not raw_b64: + raise RuntimeError(f"secret {namespace}/{name} key {key} is empty") + return base64.b64decode(raw_b64).decode("utf-8").strip() + + +def _kubectl_first_pod(namespace: str) -> str: + raw = _run( + [ + "kubectl", + "-n", + namespace, + "get", + "pods", + "-o", + "json", + ] + ) + data = json.loads(raw) + items = data.get("items") or [] + if not isinstance(items, list) or not items: + raise RuntimeError(f"no pods found in namespace {namespace}") + pod_name = items[0].get("metadata", {}).get("name") + if not isinstance(pod_name, str) or not pod_name: + raise RuntimeError(f"unexpected pod list in namespace {namespace}") + return pod_name + + +def _validate_prefixes(prefixes: list[str]) -> list[str]: + cleaned: list[str] = [] + for prefix in prefixes: + prefix = prefix.strip() + if not prefix: + continue + if not _SAFE_PREFIX_RE.match(prefix): + raise SystemExit( + f"invalid prefix '{prefix}': must match {_SAFE_PREFIX_RE.pattern} (alnum plus ._-)" + ) + cleaned.append(prefix) + if not cleaned: + raise SystemExit("at least one --prefix is required") + return cleaned + + +def _starts_with_any(value: str, prefixes: Iterable[str]) -> bool: + return any(value.startswith(p) for p in prefixes) + + +def _keycloak_token(server: str, realm: str, client_id: str, client_secret: str) -> str: + data = urllib.parse.urlencode( + { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + } + ).encode("utf-8") + req = urllib.request.Request( + f"{server}/realms/{realm}/protocol/openid-connect/token", + data=data, + method="POST", + ) + req.add_header("Content-Type", "application/x-www-form-urlencoded") + with urllib.request.urlopen(req, timeout=15) as resp: + payload = json.loads(resp.read().decode("utf-8")) + token = payload.get("access_token") + if not isinstance(token, str) or not token: + raise RuntimeError("failed to obtain keycloak access token") + return token + + +def _keycloak_list_users(server: str, realm: str, token: str, search: str) -> list[KeycloakUser]: + query = urllib.parse.urlencode({"max": "1000", "search": search}) + req = urllib.request.Request(f"{server}/admin/realms/{realm}/users?{query}", method="GET") + req.add_header("Authorization", f"Bearer {token}") + with urllib.request.urlopen(req, timeout=30) as resp: + payload = json.loads(resp.read().decode("utf-8")) + if not isinstance(payload, list): + raise RuntimeError("unexpected keycloak users response") + users: list[KeycloakUser] = [] + for item in payload: + if not isinstance(item, dict): + continue + user_id = item.get("id") + username = item.get("username") or "" + email = item.get("email") or "" + if not isinstance(user_id, str) or not user_id: + continue + if not isinstance(username, str): + continue + users.append(KeycloakUser(user_id=user_id, username=username, email=str(email))) + return users + + +def _keycloak_delete_user(server: str, realm: str, token: str, user_id: str) -> None: + req = urllib.request.Request(f"{server}/admin/realms/{realm}/users/{user_id}", method="DELETE") + req.add_header("Authorization", f"Bearer {token}") + try: + with urllib.request.urlopen(req, timeout=30) as resp: + _ = resp.read() + except urllib.error.HTTPError as exc: + if exc.code == 404: + return + raise + + +def _psql_json(portal_db_url: str, sql: str) -> list[dict[str, Any]]: + postgres_pod = _kubectl_first_pod("postgres") + out = _run( + [ + "kubectl", + "-n", + "postgres", + "exec", + "-i", + postgres_pod, + "--", + "psql", + portal_db_url, + "-At", + "-F", + "\t", + "-c", + sql, + ] + ) + rows: list[dict[str, Any]] = [] + for line in out.splitlines(): + parts = line.split("\t") + rows.append({"cols": parts}) + return rows + + +def _portal_list_requests(portal_db_url: str, prefixes: list[str]) -> list[PortalRequestRow]: + clauses = " OR ".join([f"username LIKE '{p}%'" for p in prefixes]) + sql = ( + "SELECT request_code, username, status " + "FROM access_requests " + f"WHERE {clauses} " + "ORDER BY created_at DESC;" + ) + raw_rows = _psql_json(portal_db_url, sql) + parsed: list[PortalRequestRow] = [] + for row in raw_rows: + cols = row.get("cols") or [] + if len(cols) < 3: + continue + parsed.append(PortalRequestRow(request_code=cols[0], username=cols[1], status=cols[2])) + return parsed + + +def _portal_delete_requests(portal_db_url: str, prefixes: list[str]) -> int: + clauses = " OR ".join([f"username LIKE '{p}%'" for p in prefixes]) + sql = f"DELETE FROM access_requests WHERE {clauses};" + postgres_pod = _kubectl_first_pod("postgres") + out = _run( + [ + "kubectl", + "-n", + "postgres", + "exec", + "-i", + postgres_pod, + "--", + "psql", + portal_db_url, + "-c", + sql, + ] + ) + # psql prints "DELETE " + match = re.search(r"DELETE\\s+(\\d+)", out) + return int(match.group(1)) if match else 0 + + +def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str: + data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8") + req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST") + req.add_header("Content-Type", "application/x-www-form-urlencoded") + with urllib.request.urlopen(req, timeout=10) as resp: + set_cookie = resp.headers.get("Set-Cookie") or "" + cookie = set_cookie.split(";", 1)[0].strip() + if not cookie: + raise RuntimeError("vaultwarden admin cookie missing") + return cookie + + +def _vaultwarden_list_users(base_url: str, cookie: str) -> list[VaultwardenUser]: + req = urllib.request.Request(f"{base_url}/admin/users", method="GET") + req.add_header("Cookie", cookie) + with urllib.request.urlopen(req, timeout=30) as resp: + payload = json.loads(resp.read().decode("utf-8")) + if not isinstance(payload, list): + raise RuntimeError("unexpected vaultwarden /admin/users response") + users: list[VaultwardenUser] = [] + for item in payload: + if not isinstance(item, dict): + continue + user_id = item.get("id") + email = item.get("email") + status = item.get("_status") + if not isinstance(user_id, str) or not user_id: + continue + if not isinstance(email, str) or not email: + continue + if not isinstance(status, int): + status = -1 + users.append(VaultwardenUser(user_id=user_id, email=email, status=status)) + return users + + +def _vaultwarden_delete_user(base_url: str, cookie: str, user_id: str) -> None: + req = urllib.request.Request(f"{base_url}/admin/users/{user_id}", method="DELETE") + req.add_header("Cookie", cookie) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + _ = resp.read() + except urllib.error.HTTPError as exc: + if exc.code in {404}: + return + if exc.code == 429: + raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc + raise + + +def _port_forward(namespace: str, target: str, local_port: int, remote_port: int) -> subprocess.Popen[bytes]: + # Keep stdout/stderr muted to avoid leaking internal details in output. + return subprocess.Popen( + [ + "kubectl", + "-n", + namespace, + "port-forward", + target, + f"{local_port}:{remote_port}", + "--address", + "127.0.0.1", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--prefix", + action="append", + default=[], + help="Username prefix to match (repeatable). Example: --prefix test-", + ) + parser.add_argument( + "--apply", + action="store_true", + help="Actually delete; otherwise dry-run only.", + ) + parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.") + parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.") + parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.") + args = parser.parse_args() + + prefixes = _validate_prefixes(args.prefix) + apply = bool(args.apply) + + print("Atlas test-user cleanup") + print("prefixes:", ", ".join(prefixes)) + print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)") + print() + + if not args.skip_portal_db: + portal_db_url = _kubectl_get_secret_value("bstein-dev-home", "atlas-portal-db", "PORTAL_DATABASE_URL") + requests = _portal_list_requests(portal_db_url, prefixes) + print(f"Portal DB: {len(requests)} access_requests matched") + for row in requests[:50]: + print(f" {row.request_code}\t{row.status}\t{row.username}") + if len(requests) > 50: + print(f" ... and {len(requests) - 50} more") + if apply and requests: + deleted = _portal_delete_requests(portal_db_url, prefixes) + print(f"Portal DB: deleted {deleted} access_requests (cascade removes tasks/steps/artifacts).") + print() + + if not args.skip_keycloak: + kc_server = os.getenv("KEYCLOAK_PUBLIC_URL", "https://sso.bstein.dev").rstrip("/") + kc_realm = os.getenv("KEYCLOAK_REALM", "atlas") + kc_client_id = os.getenv("KEYCLOAK_ADMIN_CLIENT_ID", "bstein-dev-home-admin") + kc_client_secret = _kubectl_get_secret_value( + "bstein-dev-home", "bstein-dev-home-keycloak-admin", "client_secret" + ) + token = _keycloak_token(kc_server, kc_realm, kc_client_id, kc_client_secret) + found: dict[str, KeycloakUser] = {} + for prefix in prefixes: + for user in _keycloak_list_users(kc_server, kc_realm, token, prefix): + if not _starts_with_any(user.username, prefixes): + continue + found[user.user_id] = user + users = list(found.values()) + users.sort(key=lambda u: u.username) + print(f"Keycloak: {len(users)} users matched") + for user in users[:50]: + email = user.email or "-" + print(f" {user.username}\t{email}\t{user.user_id}") + if len(users) > 50: + print(f" ... and {len(users) - 50} more") + if apply and users: + for user in users: + _keycloak_delete_user(kc_server, kc_realm, token, user.user_id) + print(f"Keycloak: deleted {len(users)} users.") + print() + + if not args.skip_vaultwarden: + pf = _port_forward("vaultwarden", "svc/vaultwarden-service", 18081, 80) + try: + # wait briefly for the port-forward to come up + for _ in range(30): + try: + urllib.request.urlopen("http://127.0.0.1:18081/", timeout=1).read(1) + break + except Exception: + time.sleep(0.2) + + admin_token = _kubectl_get_secret_value("vaultwarden", "vaultwarden-admin", "ADMIN_TOKEN") + base_url = "http://127.0.0.1:18081" + cookie = _vaultwarden_admin_cookie(admin_token, base_url) + users = _vaultwarden_list_users(base_url, cookie) + matched: list[VaultwardenUser] = [] + for user in users: + local = user.email.split("@", 1)[0] + if _starts_with_any(local, prefixes): + matched.append(user) + matched.sort(key=lambda u: u.email) + print(f"Vaultwarden: {len(matched)} users matched") + for user in matched[:50]: + print(f" {user.email}\tstatus={user.status}\t{user.user_id}") + if len(matched) > 50: + print(f" ... and {len(matched) - 50} more") + if apply and matched: + for user in matched: + _vaultwarden_delete_user(base_url, cookie, user.user_id) + print(f"Vaultwarden: deleted {len(matched)} users.") + print() + finally: + pf.terminate() + try: + pf.wait(timeout=3) + except Exception: + pf.kill() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) -- 2.47.2 From 089e8155ae8afa56b359be2d375dbbba2cc954b2 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 05:32:52 +0000 Subject: [PATCH 423/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index fed2a6e..27b9c86 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-80 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-81 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 86c6c5a0f8ef7a475ef6adb20c1d46fdd7380ccf Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 05:34:16 +0000 Subject: [PATCH 424/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index c0fd5d0..000ce2e 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-80 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-81 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 2b968b6..4036f5d 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-80 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-81 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 53585597878f7a24125ccc4d3719e03eb3954632 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 05:47:12 +0000 Subject: [PATCH 425/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 27b9c86..8bdc718 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-81 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-82 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 9d9c2830f7f6eb67c5bb89bcdf60da3c0ad4da08 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 05:48:17 +0000 Subject: [PATCH 426/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 000ce2e..a51850d 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-81 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-82 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 4036f5d..8bbfb2f 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-81 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-82 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 3e2a90e37707e6e9e63434cf12f7c76277b87014 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 05:59:13 +0000 Subject: [PATCH 427/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 8bdc718..27c0d07 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-82 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-83 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ad7ac5b38df1f511591650425f5b8fa6ddab90ed Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 06:00:18 +0000 Subject: [PATCH 428/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index a51850d..bc3f541 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-82 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-83 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 8bbfb2f..b3deefb 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-82 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-83 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 5877611b4fdf2bb3b7aff9811b1a54e862a16e1a Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 06:19:15 +0000 Subject: [PATCH 429/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 27c0d07..2a717a3 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-83 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 55b25fbfd6262783aefc19cd7fc3ec8d704ab163 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Mon, 5 Jan 2026 06:20:19 +0000 Subject: [PATCH 430/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index bc3f541..3a8ac2f 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-83 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index b3deefb..1c1e1a9 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-83 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 89d47cba79313b296fb09d00552a15e1034412e2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 12:24:12 -0300 Subject: [PATCH 431/684] scripts: harden atlas cleanup script --- scripts/test_atlas_user_cleanup.py | 91 +++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 8 deletions(-) diff --git a/scripts/test_atlas_user_cleanup.py b/scripts/test_atlas_user_cleanup.py index 2ca837a..41ba708 100755 --- a/scripts/test_atlas_user_cleanup.py +++ b/scripts/test_atlas_user_cleanup.py @@ -12,6 +12,7 @@ Targets (best-effort): Safety: - Requires an explicit username prefix (e.g. "test-") - Dry-run unless --apply is set + - --apply requires an explicit --confirm guard - Validates prefixes to a conservative charset """ @@ -259,8 +260,13 @@ def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str: data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8") req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST") req.add_header("Content-Type", "application/x-www-form-urlencoded") - with urllib.request.urlopen(req, timeout=10) as resp: - set_cookie = resp.headers.get("Set-Cookie") or "" + try: + with urllib.request.urlopen(req, timeout=10) as resp: + set_cookie = resp.headers.get("Set-Cookie") or "" + except urllib.error.HTTPError as exc: + if exc.code == 429: + raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc + raise cookie = set_cookie.split(";", 1)[0].strip() if not cookie: raise RuntimeError("vaultwarden admin cookie missing") @@ -270,8 +276,13 @@ def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str: def _vaultwarden_list_users(base_url: str, cookie: str) -> list[VaultwardenUser]: req = urllib.request.Request(f"{base_url}/admin/users", method="GET") req.add_header("Cookie", cookie) - with urllib.request.urlopen(req, timeout=30) as resp: - payload = json.loads(resp.read().decode("utf-8")) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + payload = json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + if exc.code == 429: + raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc + raise if not isinstance(payload, list): raise RuntimeError("unexpected vaultwarden /admin/users response") users: list[VaultwardenUser] = [] @@ -336,17 +347,49 @@ def main() -> int: action="store_true", help="Actually delete; otherwise dry-run only.", ) + parser.add_argument( + "--confirm", + default="", + help=( + "Required when using --apply. Must exactly equal the comma-separated " + "sorted prefix list (e.g. 'atlas-,bob-,e2e-,test-')." + ), + ) parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.") parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.") parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.") + parser.add_argument( + "--protect-keycloak-username", + action="append", + default=[], + help="Keycloak usernames that must never be deleted (repeatable).", + ) + parser.add_argument( + "--protect-vaultwarden-email", + action="append", + default=[], + help="Vaultwarden emails that must never be deleted (repeatable).", + ) args = parser.parse_args() - prefixes = _validate_prefixes(args.prefix) + prefixes = sorted(set(_validate_prefixes(args.prefix))) apply = bool(args.apply) + expected_confirm = ",".join(prefixes) + protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]} + protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()} + + if apply and args.confirm != expected_confirm: + raise SystemExit( + f"refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')" + ) print("Atlas test-user cleanup") - print("prefixes:", ", ".join(prefixes)) + print("prefixes:", expected_confirm) print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)") + if protected_keycloak: + print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak))) + if protected_vaultwarden: + print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden))) print() if not args.skip_portal_db: @@ -375,6 +418,8 @@ def main() -> int: for user in _keycloak_list_users(kc_server, kc_realm, token, prefix): if not _starts_with_any(user.username, prefixes): continue + if user.username in protected_keycloak: + continue found[user.user_id] = user users = list(found.values()) users.sort(key=lambda u: u.username) @@ -403,12 +448,42 @@ def main() -> int: admin_token = _kubectl_get_secret_value("vaultwarden", "vaultwarden-admin", "ADMIN_TOKEN") base_url = "http://127.0.0.1:18081" - cookie = _vaultwarden_admin_cookie(admin_token, base_url) - users = _vaultwarden_list_users(base_url, cookie) + try: + cookie = "" + for attempt in range(7): + try: + cookie = _vaultwarden_admin_cookie(admin_token, base_url) + break + except RuntimeError as exc: + if "rate limited" in str(exc).lower(): + time.sleep(min(60.0, 2.0**attempt)) + continue + raise + if not cookie: + raise RuntimeError("vaultwarden admin login repeatedly rate limited") + + users: list[VaultwardenUser] = [] + for attempt in range(7): + try: + users = _vaultwarden_list_users(base_url, cookie) + break + except RuntimeError as exc: + if "rate limited" in str(exc).lower(): + time.sleep(min(60.0, 2.0**attempt)) + continue + raise + if not users: + raise RuntimeError("vaultwarden user list unavailable (possibly rate limited)") + except RuntimeError as exc: + print(f"Vaultwarden: ERROR: {exc}") + print() + return 1 matched: list[VaultwardenUser] = [] for user in users: local = user.email.split("@", 1)[0] if _starts_with_any(local, prefixes): + if user.email in protected_vaultwarden: + continue matched.append(user) matched.sort(key=lambda u: u.email) print(f"Vaultwarden: {len(matched)} users matched") -- 2.47.2 From 28a5d53c98618a977587af8b583ecd0266f18e4c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 13:30:33 -0300 Subject: [PATCH 432/684] monitoring(dashboards): tune namespace share metrics --- scripts/dashboards_render_atlas.py | 17 +++++++++++------ services/monitoring/dashboards/atlas-gpu.json | 2 +- .../monitoring/dashboards/atlas-overview.json | 10 +++++----- services/monitoring/grafana-dashboard-gpu.yaml | 2 +- .../monitoring/grafana-dashboard-overview.yaml | 10 +++++----- 5 files changed, 23 insertions(+), 18 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 34a108a..c08b493 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -46,6 +46,8 @@ PERCENT_THRESHOLDS = { ], } +NAMESPACE_CPU_WINDOW = "1m" + # --------------------------------------------------------------------------- # Cluster metadata # --------------------------------------------------------------------------- @@ -172,7 +174,7 @@ def node_io_expr(scope=""): def namespace_selector(scope_var): - return f'namespace!="",pod!="",container!="",{scope_var}' + return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}' def namespace_gpu_selector(scope_var): @@ -180,7 +182,10 @@ def namespace_gpu_selector(scope_var): def namespace_cpu_raw(scope_var): - return f"sum(rate(container_cpu_usage_seconds_total{{{namespace_selector(scope_var)}}}[5m])) by (namespace)" + return ( + "sum(rate(container_cpu_usage_seconds_total" + f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)" + ) def namespace_ram_raw(scope_var): @@ -942,7 +947,7 @@ def build_overview(): namespace_cpu_share_expr(cpu_scope), {"h": 9, "w": 8, "x": 0, "y": 16}, links=namespace_scope_links("namespace_scope_cpu"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) panels.append( @@ -952,7 +957,7 @@ def build_overview(): namespace_gpu_share_expr(gpu_scope), {"h": 9, "w": 8, "x": 8, "y": 16}, links=namespace_scope_links("namespace_scope_gpu"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) panels.append( @@ -962,7 +967,7 @@ def build_overview(): namespace_ram_share_expr(ram_scope), {"h": 9, "w": 8, "x": 16, "y": 16}, links=namespace_scope_links("namespace_scope_ram"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) @@ -1783,7 +1788,7 @@ def build_gpu_dashboard(): namespace_gpu_share_expr(gpu_scope), {"h": 8, "w": 12, "x": 0, "y": 0}, links=namespace_scope_links("namespace_scope_gpu"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) panels.append( diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index d4ad913..2e71045 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -71,7 +71,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 2, diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index ce1b0a3..109988b 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1086,7 +1086,7 @@ }, "targets": [ { - "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1137,7 +1137,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 12, @@ -1206,7 +1206,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 13, @@ -1224,7 +1224,7 @@ }, "targets": [ { - "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1275,7 +1275,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 14, diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 41b4734..56965eb 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -80,7 +80,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 2, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 557d120..a13ec6f 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1095,7 +1095,7 @@ data: }, "targets": [ { - "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1146,7 +1146,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 12, @@ -1215,7 +1215,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 13, @@ -1233,7 +1233,7 @@ data: }, "targets": [ { - "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1284,7 +1284,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 14, -- 2.47.2 From a4105c68dbfa8435b359a6c5d75dbfcfb6d11203 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 13:51:25 -0300 Subject: [PATCH 433/684] scripts: add vaultwarden test cleanup --- scripts/test_vaultwarden_user_cleanup.py | 318 +++++++++++++++++++++++ scripts/test_vaultwarden_user_cleanup.sh | 15 ++ 2 files changed, 333 insertions(+) create mode 100755 scripts/test_vaultwarden_user_cleanup.py create mode 100755 scripts/test_vaultwarden_user_cleanup.sh diff --git a/scripts/test_vaultwarden_user_cleanup.py b/scripts/test_vaultwarden_user_cleanup.py new file mode 100755 index 0000000..93b7aa5 --- /dev/null +++ b/scripts/test_vaultwarden_user_cleanup.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 +"""Clean up Vaultwarden test users and invites (manual-only). + +This script deletes Vaultwarden rows directly from the Postgres database. It is +intended only for removing test fallout (e.g. e2e-*, test-*) and is deliberately +conservative: + +- Requires one or more explicit email prefixes (repeatable). +- Dry-run by default; --apply requires an exact --confirm guard. +- Refuses to delete any user with dependent data in Vaultwarden tables. +- Supports a protected email allowlist to prevent catastrophic mistakes. + +Example (dry-run): + scripts/test_vaultwarden_user_cleanup.py --prefix e2e- + +Example (apply): + scripts/test_vaultwarden_user_cleanup.py --prefix e2e- --apply --confirm e2e- +""" + +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +from dataclasses import dataclass +from typing import Iterable, Sequence + + +_SAFE_PREFIX_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$") +_UUID_RE = re.compile(r"^[0-9a-fA-F-]{32,36}$") + + +@dataclass(frozen=True) +class VaultwardenUser: + uuid: str + email: str + dependent_rows: int + + +def _run(cmd: Sequence[str], *, input_bytes: bytes | None = None) -> str: + proc = subprocess.run( + list(cmd), + input=input_bytes, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + if proc.returncode != 0: + stderr = proc.stderr.decode("utf-8", errors="replace").strip() + raise RuntimeError(f"command failed ({proc.returncode}): {' '.join(cmd)}\n{stderr}") + return proc.stdout.decode("utf-8", errors="replace") + + +def _kubectl_first_pod(namespace: str) -> str: + raw = _run(["kubectl", "-n", namespace, "get", "pods", "-o", "json"]) + data = json.loads(raw) + items = data.get("items") or [] + if not isinstance(items, list) or not items: + raise RuntimeError(f"no pods found in namespace {namespace}") + name = items[0].get("metadata", {}).get("name") + if not isinstance(name, str) or not name: + raise RuntimeError(f"unexpected pod list in namespace {namespace}") + return name + + +def _psql(sql: str) -> str: + pod = _kubectl_first_pod("postgres") + return _run( + [ + "kubectl", + "-n", + "postgres", + "exec", + "-i", + pod, + "--", + "psql", + "-U", + "postgres", + "-d", + "vaultwarden", + "-At", + "-F", + "\t", + "-c", + sql, + ] + ) + + +def _validate_prefixes(prefixes: Iterable[str]) -> list[str]: + cleaned: list[str] = [] + for prefix in prefixes: + prefix = prefix.strip() + if not prefix: + continue + if not _SAFE_PREFIX_RE.match(prefix): + raise SystemExit( + f"invalid prefix '{prefix}': must match {_SAFE_PREFIX_RE.pattern} (alnum plus ._-)" + ) + if not prefix.endswith("-"): + raise SystemExit(f"refusing prefix '{prefix}': must end with '-' for safety") + cleaned.append(prefix) + if not cleaned: + raise SystemExit("at least one --prefix is required") + return sorted(set(cleaned)) + + +def _parse_rows(tsv: str) -> list[list[str]]: + rows: list[list[str]] = [] + for line in tsv.splitlines(): + line = line.strip() + if not line: + continue + rows.append(line.split("\t")) + return rows + + +def _sql_or_email_prefixes(prefixes: list[str]) -> str: + # prefixes validated to safe charset; safe to interpolate. + clauses = [f"email LIKE '{p}%'" for p in prefixes] + return " OR ".join(clauses) if clauses else "FALSE" + + +def _sql_quote(value: str) -> str: + return "'" + value.replace("'", "''") + "'" + + +def _sql_text_array(values: Iterable[str]) -> str: + items = ",".join(_sql_quote(v) for v in values) + return f"ARRAY[{items}]::text[]" + + +def _list_users(prefixes: list[str], protected: set[str]) -> list[VaultwardenUser]: + clause = _sql_or_email_prefixes(prefixes) + sql = f""" + WITH candidates AS ( + SELECT uuid, email + FROM users + WHERE enabled + AND ({clause}) + AND email <> ALL({_sql_text_array(sorted(protected))}) + ) + SELECT + candidates.uuid, + candidates.email, + ( + (SELECT COUNT(*) FROM auth_requests WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM ciphers WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM devices WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM emergency_access WHERE grantor_uuid = candidates.uuid OR grantee_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM favorites WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM folders WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM sends WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM twofactor WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM twofactor_incomplete WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM users_collections WHERE user_uuid = candidates.uuid) + + (SELECT COUNT(*) FROM users_organizations WHERE user_uuid = candidates.uuid) + ) AS dependent_rows + FROM candidates + ORDER BY candidates.email; + """ + out = _psql(sql) + users: list[VaultwardenUser] = [] + for row in _parse_rows(out): + if len(row) < 3: + continue + uuid, email, dep_raw = row[0].strip(), row[1].strip(), row[2].strip() + if not uuid or not email: + continue + if not _UUID_RE.match(uuid): + continue + try: + dep = int(dep_raw) + except ValueError: + dep = 0 + users.append(VaultwardenUser(uuid=uuid, email=email, dependent_rows=dep)) + return users + + +def _list_invitations(prefixes: list[str], protected: set[str]) -> list[str]: + clause = _sql_or_email_prefixes(prefixes) + protected_clause = "" + if protected: + protected_clause = f"AND email <> ALL({_sql_text_array(sorted(protected))})" + sql = f"SELECT email FROM invitations WHERE ({clause}) {protected_clause} ORDER BY email;" + out = _psql(sql) + invites: list[str] = [] + for row in _parse_rows(out): + if not row: + continue + email = row[0].strip() + if email: + invites.append(email) + return invites + + +def _delete_invitations(emails: list[str]) -> int: + if not emails: + return 0 + email_list = ",".join(_sql_quote(e) for e in emails) + sql = f"DELETE FROM invitations WHERE email IN ({email_list});" + out = _psql(sql) + match = re.search(r"DELETE\s+(\d+)", out) + return int(match.group(1)) if match else 0 + + +def _delete_users(uuids: list[str]) -> int: + if not uuids: + return 0 + uuid_list = ",".join(_sql_quote(u) for u in uuids) + sql = f"DELETE FROM users WHERE uuid IN ({uuid_list});" + out = _psql(sql) + match = re.search(r"DELETE\s+(\d+)", out) + return int(match.group(1)) if match else 0 + + +def _parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="test_vaultwarden_user_cleanup", + description="Manual-only cleanup for Vaultwarden test users/invites (DB-level).", + ) + parser.add_argument( + "--prefix", + action="append", + required=True, + help="Email prefix to target (repeatable). Example: --prefix e2e-", + ) + parser.add_argument( + "--apply", + action="store_true", + help="Apply deletions (default is dry-run). Requires --confirm.", + ) + parser.add_argument( + "--confirm", + default="", + help="Required when using --apply. Must exactly equal the comma-separated prefix list.", + ) + parser.add_argument( + "--protect-email", + action="append", + default=[], + help="Vaultwarden emails that must never be deleted (repeatable).", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="List matched emails (and invitation emails).", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = _parse_args(argv) + prefixes = _validate_prefixes(args.prefix) + expected_confirm = ",".join(prefixes) + + protected = {e.strip() for e in args.protect_email if e.strip()} + protected |= { + "brad@bstein.dev", + "edstein87@outlook.com", + "indifox8@gmail.com", + "mgs.stein@gmail.com", + "patriot87@gmail.com", + } + + if args.apply and args.confirm != expected_confirm: + print( + f"error: refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')", + file=sys.stderr, + ) + return 2 + + users = _list_users(prefixes, protected=protected) + invites = _list_invitations(prefixes, protected=protected) + + print(f"prefixes: {expected_confirm}") + print(f"mode: {'APPLY' if args.apply else 'DRY-RUN'}") + if protected: + print(f"protected emails: {', '.join(sorted(protected))}") + print(f"vaultwarden users matched: {len(users)}") + print(f"vaultwarden invitations matched: {len(invites)}") + + if args.verbose: + for user in users[: min(100, len(users))]: + print(f" user: {user.email} (deps={user.dependent_rows})") + if len(users) > 100: + print(f" ... and {len(users) - 100} more users") + for email in invites[: min(100, len(invites))]: + print(f" invite: {email}") + if len(invites) > 100: + print(f" ... and {len(invites) - 100} more invitations") + + unsafe = [u for u in users if u.dependent_rows > 0] + if unsafe: + print("refusing to delete users with dependent data:", file=sys.stderr) + for user in unsafe[: min(50, len(unsafe))]: + print(f" - {user.email} deps={user.dependent_rows}", file=sys.stderr) + if len(unsafe) > 50: + print(f" ... and {len(unsafe) - 50} more", file=sys.stderr) + return 2 + + if not args.apply: + print("dry-run complete (no changes made)") + return 0 + + deleted_invites = _delete_invitations(invites) + deleted_users = _delete_users([u.uuid for u in users]) + print(f"deleted vaultwarden invitations: {deleted_invites}") + print(f"deleted vaultwarden users: {deleted_users}") + print("done") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/test_vaultwarden_user_cleanup.sh b/scripts/test_vaultwarden_user_cleanup.sh new file mode 100755 index 0000000..f21f85e --- /dev/null +++ b/scripts/test_vaultwarden_user_cleanup.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Manual-only helper to clean Vaultwarden test users and invites from Postgres. +# +# Usage (dry-run): +# scripts/test_vaultwarden_user_cleanup.sh --prefix e2e- +# +# Usage (apply): +# scripts/test_vaultwarden_user_cleanup.sh --prefix e2e- --apply --confirm e2e- + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" + +python3 "${SCRIPT_DIR}/test_vaultwarden_user_cleanup.py" "$@" + -- 2.47.2 From 6195005206d9620d00c99a97659683901c639bb0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 21:21:47 -0300 Subject: [PATCH 434/684] mailu: disable unauthenticated pod relay --- services/mailu/helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index a40f047..339884b 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -220,6 +220,7 @@ spec: smtp_use_tls: "yes" smtp_tls_security_level: "encrypt" smtp_sasl_security_options: "noanonymous" + smtpd_relay_restrictions: "permit_sasl_authenticated, reject_unauth_destination" redis: enabled: true architecture: standalone -- 2.47.2 From ec208fe0f6950dd81610d62e4d348bd0c36668f2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 21:27:19 -0300 Subject: [PATCH 435/684] mailu: remove pod network relay --- services/mailu/helmrelease.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index 339884b..32e3068 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -217,10 +217,10 @@ spec: nodeSelector: hardware: rpi4 overrides: - smtp_use_tls: "yes" - smtp_tls_security_level: "encrypt" - smtp_sasl_security_options: "noanonymous" - smtpd_relay_restrictions: "permit_sasl_authenticated, reject_unauth_destination" + postfix.cf: | + smtpd_relay_restrictions = permit_sasl_authenticated, reject_unauth_destination + podAnnotations: + bstein.dev/restarted-at: "2026-01-06T00:00:00Z" redis: enabled: true architecture: standalone -- 2.47.2 From d132917d9e735c57f2e08b0ec4063274df33785d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 21:44:29 -0300 Subject: [PATCH 436/684] monitoring: add Postmark bounce exporter --- scripts/monitoring_postmark_exporter.py | 120 +++++++++++++++++ .../monitoring_render_postmark_exporter.py | 35 +++++ services/monitoring/kustomization.yaml | 3 + .../postmark-exporter-deployment.yaml | 63 +++++++++ .../monitoring/postmark-exporter-script.yaml | 127 ++++++++++++++++++ .../monitoring/postmark-exporter-service.yaml | 18 +++ 6 files changed, 366 insertions(+) create mode 100644 scripts/monitoring_postmark_exporter.py create mode 100644 scripts/monitoring_render_postmark_exporter.py create mode 100644 services/monitoring/postmark-exporter-deployment.yaml create mode 100644 services/monitoring/postmark-exporter-script.yaml create mode 100644 services/monitoring/postmark-exporter-service.yaml diff --git a/scripts/monitoring_postmark_exporter.py b/scripts/monitoring_postmark_exporter.py new file mode 100644 index 0000000..dc0cac9 --- /dev/null +++ b/scripts/monitoring_postmark_exporter.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import datetime as dt +import os +import time +from dataclasses import dataclass + +import requests +from prometheus_client import Gauge, Info, start_http_server + + +@dataclass(frozen=True) +class Window: + label: str + days: int + + +WINDOWS = [ + Window("1d", 1), + Window("7d", 7), +] + +API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") +POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60")) +LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0") +LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000")) + +PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() +FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() + +EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") +EXPORTER_INFO.info( + { + "api_base": API_BASE, + "windows": ",".join(window.label for window in WINDOWS), + } +) + +POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)") +POSTMARK_LAST_SUCCESS = Gauge( + "postmark_last_success_timestamp_seconds", + "Unix timestamp of the last successful Postmark stats refresh", +) +POSTMARK_REQUEST_ERRORS = Gauge( + "postmark_request_errors_total", + "Total Postmark stats request errors since exporter start", +) + +POSTMARK_OUTBOUND_SENT = Gauge( + "postmark_outbound_sent", + "Outbound emails sent within the selected window", + labelnames=("window",), +) +POSTMARK_OUTBOUND_BOUNCED = Gauge( + "postmark_outbound_bounced", + "Outbound emails bounced within the selected window", + labelnames=("window",), +) +POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge( + "postmark_outbound_bounce_rate", + "Outbound bounce rate percentage within the selected window", + labelnames=("window",), +) + + +def fetch_outbound_stats(token: str, window: Window) -> dict: + today = dt.date.today() + fromdate = today - dt.timedelta(days=window.days) + params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()} + headers = { + "Accept": "application/json", + "X-Postmark-Server-Token": token, + } + response = requests.get( + f"{API_BASE}/stats/outbound", + headers=headers, + params=params, + timeout=15, + ) + response.raise_for_status() + return response.json() + + +def update_metrics(token: str) -> None: + for window in WINDOWS: + data = fetch_outbound_stats(token, window) + sent = int(data.get("Sent", 0) or 0) + bounced = int(data.get("Bounced", 0) or 0) + rate = (bounced / sent * 100.0) if sent else 0.0 + POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) + POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) + POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + + +def main() -> None: + if not PRIMARY_TOKEN and not FALLBACK_TOKEN: + raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required") + + start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS) + + tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token] + token_index = 0 + + while True: + token = tokens[token_index % len(tokens)] + token_index += 1 + try: + update_metrics(token) + POSTMARK_API_UP.set(1) + POSTMARK_LAST_SUCCESS.set(time.time()) + except Exception as exc: # noqa: BLE001 + POSTMARK_API_UP.set(0) + POSTMARK_REQUEST_ERRORS.inc() + print(f"postmark_exporter: refresh failed: {exc}", flush=True) + time.sleep(POLL_INTERVAL_SECONDS) + + +if __name__ == "__main__": + main() + diff --git a/scripts/monitoring_render_postmark_exporter.py b/scripts/monitoring_render_postmark_exporter.py new file mode 100644 index 0000000..b0a458a --- /dev/null +++ b/scripts/monitoring_render_postmark_exporter.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +from pathlib import Path + + +def indent(text: str, spaces: int) -> str: + prefix = " " * spaces + return "".join(prefix + line if line.strip("\n") else line for line in text.splitlines(keepends=True)) + + +def main() -> None: + root = Path(__file__).resolve().parents[1] + source = root / "scripts" / "monitoring_postmark_exporter.py" + target = root / "services" / "monitoring" / "postmark-exporter-script.yaml" + + payload = source.read_text(encoding="utf-8") + if not payload.endswith("\n"): + payload += "\n" + + yaml = ( + f"# services/monitoring/postmark-exporter-script.yaml\n" + f"apiVersion: v1\n" + f"kind: ConfigMap\n" + f"metadata:\n" + f" name: postmark-exporter-script\n" + f"data:\n" + f" monitoring_postmark_exporter.py: |\n" + f"{indent(payload, 4)}" + ) + + target.write_text(yaml, encoding="utf-8") + + +if __name__ == "__main__": + main() diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index ad53bb5..4d33ccf 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -5,6 +5,7 @@ namespace: monitoring resources: - namespace.yaml - rbac.yaml + - postmark-exporter-script.yaml - grafana-dashboard-overview.yaml - grafana-dashboard-pods.yaml - grafana-dashboard-nodes.yaml @@ -12,6 +13,8 @@ resources: - grafana-dashboard-network.yaml - grafana-dashboard-gpu.yaml - dcgm-exporter.yaml + - postmark-exporter-service.yaml + - postmark-exporter-deployment.yaml - grafana-folders.yaml - helmrelease.yaml - grafana-org-bootstrap.yaml diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml new file mode 100644 index 0000000..80b18ec --- /dev/null +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -0,0 +1,63 @@ +# services/monitoring/postmark-exporter-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postmark-exporter +spec: + replicas: 1 + selector: + matchLabels: + app: postmark-exporter + template: + metadata: + labels: + app: postmark-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8000" + prometheus.io/path: "/metrics" + spec: + containers: + - name: exporter + image: python:3.12-alpine + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + pip install --no-cache-dir prometheus-client==0.22.1 requests==2.32.3 + exec python /app/monitoring_postmark_exporter.py + env: + - name: POSTMARK_SERVER_TOKEN + valueFrom: + secretKeyRef: + name: postmark-exporter + key: relay-username + - name: POSTMARK_SERVER_TOKEN_FALLBACK + valueFrom: + secretKeyRef: + name: postmark-exporter + key: relay-password + - name: POLL_INTERVAL_SECONDS + value: "60" + - name: LISTEN_PORT + value: "8000" + ports: + - name: http + containerPort: 8000 + volumeMounts: + - name: script + mountPath: /app + readOnly: true + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 250m + memory: 256Mi + volumes: + - name: script + configMap: + name: postmark-exporter-script + diff --git a/services/monitoring/postmark-exporter-script.yaml b/services/monitoring/postmark-exporter-script.yaml new file mode 100644 index 0000000..3d753fa --- /dev/null +++ b/services/monitoring/postmark-exporter-script.yaml @@ -0,0 +1,127 @@ +# services/monitoring/postmark-exporter-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postmark-exporter-script +data: + monitoring_postmark_exporter.py: | + #!/usr/bin/env python3 + + import datetime as dt + import os + import time + from dataclasses import dataclass + + import requests + from prometheus_client import Gauge, Info, start_http_server + + + @dataclass(frozen=True) + class Window: + label: str + days: int + + + WINDOWS = [ + Window("1d", 1), + Window("7d", 7), + ] + + API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") + POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60")) + LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0") + LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000")) + + PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() + FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() + + EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") + EXPORTER_INFO.info( + { + "api_base": API_BASE, + "windows": ",".join(window.label for window in WINDOWS), + } + ) + + POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)") + POSTMARK_LAST_SUCCESS = Gauge( + "postmark_last_success_timestamp_seconds", + "Unix timestamp of the last successful Postmark stats refresh", + ) + POSTMARK_REQUEST_ERRORS = Gauge( + "postmark_request_errors_total", + "Total Postmark stats request errors since exporter start", + ) + + POSTMARK_OUTBOUND_SENT = Gauge( + "postmark_outbound_sent", + "Outbound emails sent within the selected window", + labelnames=("window",), + ) + POSTMARK_OUTBOUND_BOUNCED = Gauge( + "postmark_outbound_bounced", + "Outbound emails bounced within the selected window", + labelnames=("window",), + ) + POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge( + "postmark_outbound_bounce_rate", + "Outbound bounce rate percentage within the selected window", + labelnames=("window",), + ) + + + def fetch_outbound_stats(token: str, window: Window) -> dict: + today = dt.date.today() + fromdate = today - dt.timedelta(days=window.days) + params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()} + headers = { + "Accept": "application/json", + "X-Postmark-Server-Token": token, + } + response = requests.get( + f"{API_BASE}/stats/outbound", + headers=headers, + params=params, + timeout=15, + ) + response.raise_for_status() + return response.json() + + + def update_metrics(token: str) -> None: + for window in WINDOWS: + data = fetch_outbound_stats(token, window) + sent = int(data.get("Sent", 0) or 0) + bounced = int(data.get("Bounced", 0) or 0) + rate = (bounced / sent * 100.0) if sent else 0.0 + POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) + POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) + POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + + + def main() -> None: + if not PRIMARY_TOKEN and not FALLBACK_TOKEN: + raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required") + + start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS) + + tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token] + token_index = 0 + + while True: + token = tokens[token_index % len(tokens)] + token_index += 1 + try: + update_metrics(token) + POSTMARK_API_UP.set(1) + POSTMARK_LAST_SUCCESS.set(time.time()) + except Exception as exc: # noqa: BLE001 + POSTMARK_API_UP.set(0) + POSTMARK_REQUEST_ERRORS.inc() + print(f"postmark_exporter: refresh failed: {exc}", flush=True) + time.sleep(POLL_INTERVAL_SECONDS) + + + if __name__ == "__main__": + main() + diff --git a/services/monitoring/postmark-exporter-service.yaml b/services/monitoring/postmark-exporter-service.yaml new file mode 100644 index 0000000..957973a --- /dev/null +++ b/services/monitoring/postmark-exporter-service.yaml @@ -0,0 +1,18 @@ +# services/monitoring/postmark-exporter-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: postmark-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8000" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app: postmark-exporter + ports: + - name: http + port: 8000 + targetPort: http + -- 2.47.2 From 9be25e16fea347b421a0978e563ff2d98face54b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 21:55:59 -0300 Subject: [PATCH 437/684] monitoring: add Postmark mail dashboard --- scripts/dashboards_render_atlas.py | 244 ++++++ .../monitoring/dashboards/atlas-mail.json | 688 +++++++++++++++++ .../monitoring/dashboards/atlas-overview.json | 288 ++++++++ .../monitoring/grafana-dashboard-mail.yaml | 697 ++++++++++++++++++ .../grafana-dashboard-overview.yaml | 288 ++++++++ services/monitoring/helmrelease.yaml | 9 + services/monitoring/kustomization.yaml | 1 + 7 files changed, 2215 insertions(+) create mode 100644 services/monitoring/dashboards/atlas-mail.json create mode 100644 services/monitoring/grafana-dashboard-mail.yaml diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index c08b493..ea2330c 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -917,6 +917,75 @@ def build_overview(): ) ) + mail_bounce_rate_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 5}, + {"color": "orange", "value": 8}, + {"color": "red", "value": 10}, + ], + } + mail_bounce_count_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 1}, + {"color": "orange", "value": 10}, + {"color": "red", "value": 100}, + ], + } + mail_api_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "green", "value": 1}, + ], + } + mail_overview_panels = [ + ( + 30, + "Mail Bounce Rate (1d)", + 'postmark_outbound_bounce_rate{window="1d"}', + "percent", + mail_bounce_rate_thresholds, + ), + ( + 31, + "Mail Bounced (1d)", + 'postmark_outbound_bounced{window="1d"}', + "none", + mail_bounce_count_thresholds, + ), + ( + 32, + "Mail Sent (1d)", + 'postmark_outbound_sent{window="1d"}', + "none", + None, + ), + ( + 33, + "Postmark API Up", + "postmark_api_up", + "none", + mail_api_thresholds, + ), + ] + for idx, (panel_id, title, expr, unit, thresholds) in enumerate(mail_overview_panels): + panels.append( + stat_panel( + panel_id, + title, + expr, + {"h": 2, "w": 6, "x": 6 * idx, "y": 8}, + unit=unit, + thresholds=thresholds, + decimals=1 if unit == "percent" else 0, + links=link_to("atlas-mail"), + ) + ) + storage_panels = [ (23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"), (24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"), @@ -1778,6 +1847,177 @@ def build_network_dashboard(): } +def build_mail_dashboard(): + panels = [] + + bounce_rate_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 5}, + {"color": "orange", "value": 8}, + {"color": "red", "value": 10}, + ], + } + bounce_count_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 1}, + {"color": "orange", "value": 10}, + {"color": "red", "value": 100}, + ], + } + api_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "green", "value": 1}, + ], + } + + current_stats = [ + ( + 1, + "Bounce Rate (1d)", + 'postmark_outbound_bounce_rate{window="1d"}', + "percent", + bounce_rate_thresholds, + ), + ( + 2, + "Bounce Rate (7d)", + 'postmark_outbound_bounce_rate{window="7d"}', + "percent", + bounce_rate_thresholds, + ), + ( + 3, + "Bounced (1d)", + 'postmark_outbound_bounced{window="1d"}', + "none", + bounce_count_thresholds, + ), + ( + 4, + "Bounced (7d)", + 'postmark_outbound_bounced{window="7d"}', + "none", + bounce_count_thresholds, + ), + ] + for idx, (panel_id, title, expr, unit, thresholds) in enumerate(current_stats): + panels.append( + stat_panel( + panel_id, + title, + expr, + {"h": 4, "w": 6, "x": 6 * idx, "y": 0}, + unit=unit, + thresholds=thresholds, + decimals=1 if unit == "percent" else 0, + ) + ) + + panels.append( + stat_panel( + 5, + "Sent (1d)", + 'postmark_outbound_sent{window="1d"}', + {"h": 4, "w": 6, "x": 0, "y": 4}, + decimals=0, + ) + ) + panels.append( + stat_panel( + 6, + "Sent (7d)", + 'postmark_outbound_sent{window="7d"}', + {"h": 4, "w": 6, "x": 6, "y": 4}, + decimals=0, + ) + ) + panels.append( + stat_panel( + 7, + "Postmark API Up", + "postmark_api_up", + {"h": 4, "w": 6, "x": 12, "y": 4}, + thresholds=api_thresholds, + decimals=0, + ) + ) + panels.append( + stat_panel( + 8, + "Last Success", + "postmark_last_success_timestamp_seconds", + {"h": 4, "w": 6, "x": 18, "y": 4}, + unit="dateTimeAsIso", + decimals=0, + ) + ) + + panels.append( + timeseries_panel( + 9, + "Bounce Rate (1d vs 7d)", + "postmark_outbound_bounce_rate", + {"h": 8, "w": 12, "x": 0, "y": 8}, + unit="percent", + legend="{{window}}", + legend_display="table", + legend_placement="right", + ) + ) + panels.append( + timeseries_panel( + 10, + "Bounced (1d vs 7d)", + "postmark_outbound_bounced", + {"h": 8, "w": 12, "x": 12, "y": 8}, + unit="none", + legend="{{window}}", + legend_display="table", + legend_placement="right", + ) + ) + panels.append( + timeseries_panel( + 11, + "Sent (1d vs 7d)", + "postmark_outbound_sent", + {"h": 8, "w": 12, "x": 0, "y": 16}, + unit="none", + legend="{{window}}", + legend_display="table", + legend_placement="right", + ) + ) + panels.append( + timeseries_panel( + 12, + "Exporter Errors", + "postmark_request_errors_total", + {"h": 8, "w": 12, "x": 12, "y": 16}, + unit="none", + ) + ) + + return { + "uid": "atlas-mail", + "title": "Atlas Mail", + "folderUid": PRIVATE_FOLDER, + "editable": True, + "panels": panels, + "time": {"from": "now-30d", "to": "now"}, + "annotations": {"list": []}, + "schemaVersion": 39, + "style": "dark", + "tags": ["atlas", "mail"], + } + + def build_gpu_dashboard(): panels = [] gpu_scope = "$namespace_scope_gpu" @@ -1867,6 +2107,10 @@ DASHBOARDS = { "builder": build_network_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml", }, + "atlas-mail": { + "builder": build_mail_dashboard, + "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml", + }, "atlas-gpu": { "builder": build_gpu_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml", diff --git a/services/monitoring/dashboards/atlas-mail.json b/services/monitoring/dashboards/atlas-mail.json new file mode 100644 index 0000000..1e73d76 --- /dev/null +++ b/services/monitoring/dashboards/atlas-mail.json @@ -0,0 +1,688 @@ +{ + "uid": "atlas-mail", + "title": "Atlas Mail", + "folderUid": "atlas-internal", + "editable": true, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "Bounce Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 2, + "type": "stat", + "title": "Bounce Rate (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate{window=\"7d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Bounced (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Bounced (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced{window=\"7d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 5, + "type": "stat", + "title": "Sent (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_outbound_sent{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 6, + "type": "stat", + "title": "Sent (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_outbound_sent{window=\"7d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 7, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_api_up", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Last Success", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_last_success_timestamp_seconds", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "dateTimeAsIso", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 9, + "type": "timeseries", + "title": "Bounce Rate (1d vs 7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate", + "refId": "A", + "legendFormat": "{{window}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 10, + "type": "timeseries", + "title": "Bounced (1d vs 7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced", + "refId": "A", + "legendFormat": "{{window}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 11, + "type": "timeseries", + "title": "Sent (1d vs 7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "targets": [ + { + "expr": "postmark_outbound_sent", + "refId": "A", + "legendFormat": "{{window}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 12, + "type": "timeseries", + "title": "Exporter Errors", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "targets": [ + { + "expr": "postmark_request_errors_total", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + } + ], + "time": { + "from": "now-30d", + "to": "now" + }, + "annotations": { + "list": [] + }, + "schemaVersion": 39, + "style": "dark", + "tags": [ + "atlas", + "mail" + ] +} diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 109988b..4938485 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -786,6 +786,294 @@ } ] }, + { + "id": 30, + "type": "stat", + "title": "Mail Bounce Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 31, + "type": "stat", + "title": "Mail Bounced (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 6, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 32, + "type": "stat", + "title": "Mail Sent (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_sent{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 33, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_api_up", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, { "id": 23, "type": "stat", diff --git a/services/monitoring/grafana-dashboard-mail.yaml b/services/monitoring/grafana-dashboard-mail.yaml new file mode 100644 index 0000000..6c96358 --- /dev/null +++ b/services/monitoring/grafana-dashboard-mail.yaml @@ -0,0 +1,697 @@ +# services/monitoring/grafana-dashboard-mail.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-mail + labels: + grafana_dashboard: "1" +data: + atlas-mail.json: | + { + "uid": "atlas-mail", + "title": "Atlas Mail", + "folderUid": "atlas-internal", + "editable": true, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "Bounce Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 2, + "type": "stat", + "title": "Bounce Rate (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate{window=\"7d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Bounced (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Bounced (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced{window=\"7d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 5, + "type": "stat", + "title": "Sent (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_outbound_sent{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 6, + "type": "stat", + "title": "Sent (7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_outbound_sent{window=\"7d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 7, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_api_up", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Last Success", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 4 + }, + "targets": [ + { + "expr": "postmark_last_success_timestamp_seconds", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "dateTimeAsIso", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 9, + "type": "timeseries", + "title": "Bounce Rate (1d vs 7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate", + "refId": "A", + "legendFormat": "{{window}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 10, + "type": "timeseries", + "title": "Bounced (1d vs 7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced", + "refId": "A", + "legendFormat": "{{window}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 11, + "type": "timeseries", + "title": "Sent (1d vs 7d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "targets": [ + { + "expr": "postmark_outbound_sent", + "refId": "A", + "legendFormat": "{{window}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 12, + "type": "timeseries", + "title": "Exporter Errors", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "targets": [ + { + "expr": "postmark_request_errors_total", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + } + ], + "time": { + "from": "now-30d", + "to": "now" + }, + "annotations": { + "list": [] + }, + "schemaVersion": 39, + "style": "dark", + "tags": [ + "atlas", + "mail" + ] + } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index a13ec6f..7ebb687 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -795,6 +795,294 @@ data: } ] }, + { + "id": 30, + "type": "stat", + "title": "Mail Bounce Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 31, + "type": "stat", + "title": "Mail Bounced (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 6, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_bounced{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 32, + "type": "stat", + "title": "Mail Sent (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_outbound_sent{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 33, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "expr": "postmark_api_up", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, { "id": 23, "type": "stat", diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 9634405..40db9f4 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -371,6 +371,14 @@ spec: editable: true options: path: /var/lib/grafana/dashboards/network + - name: mail + orgId: 1 + folder: Atlas Internal + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/mail dashboardsConfigMaps: overview: grafana-dashboard-overview overview-public: grafana-dashboard-overview @@ -379,6 +387,7 @@ spec: storage: grafana-dashboard-storage gpu: grafana-dashboard-gpu network: grafana-dashboard-network + mail: grafana-dashboard-mail extraConfigmapMounts: - name: grafana-folders mountPath: /etc/grafana/provisioning/folders diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index 4d33ccf..1b3887d 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -12,6 +12,7 @@ resources: - grafana-dashboard-storage.yaml - grafana-dashboard-network.yaml - grafana-dashboard-gpu.yaml + - grafana-dashboard-mail.yaml - dcgm-exporter.yaml - postmark-exporter-service.yaml - postmark-exporter-deployment.yaml -- 2.47.2 From 12b579d951c4c1d909b2fde6a037efa965f15450 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 22:06:24 -0300 Subject: [PATCH 438/684] monitoring: add Postmark today window --- scripts/monitoring_postmark_exporter.py | 2 +- services/monitoring/postmark-exporter-script.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/monitoring_postmark_exporter.py b/scripts/monitoring_postmark_exporter.py index dc0cac9..ecae9f3 100644 --- a/scripts/monitoring_postmark_exporter.py +++ b/scripts/monitoring_postmark_exporter.py @@ -16,6 +16,7 @@ class Window: WINDOWS = [ + Window("today", 0), Window("1d", 1), Window("7d", 7), ] @@ -117,4 +118,3 @@ def main() -> None: if __name__ == "__main__": main() - diff --git a/services/monitoring/postmark-exporter-script.yaml b/services/monitoring/postmark-exporter-script.yaml index 3d753fa..884d963 100644 --- a/services/monitoring/postmark-exporter-script.yaml +++ b/services/monitoring/postmark-exporter-script.yaml @@ -23,6 +23,7 @@ data: WINDOWS = [ + Window("today", 0), Window("1d", 1), Window("7d", 7), ] @@ -124,4 +125,3 @@ data: if __name__ == "__main__": main() - -- 2.47.2 From 1fb56bae708fab107f48e31a53757e3c16978f3a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 5 Jan 2026 22:07:52 -0300 Subject: [PATCH 439/684] monitoring: restart postmark exporter --- services/monitoring/postmark-exporter-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml index 80b18ec..adadc4b 100644 --- a/services/monitoring/postmark-exporter-deployment.yaml +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -16,6 +16,7 @@ spec: prometheus.io/scrape: "true" prometheus.io/port: "8000" prometheus.io/path: "/metrics" + bstein.dev/restarted-at: "2026-01-06T00:00:00Z" spec: containers: - name: exporter @@ -60,4 +61,3 @@ spec: - name: script configMap: name: postmark-exporter-script - -- 2.47.2 From d5d2fc66b95875d75f6495f64cc602639425888c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 02:06:20 -0300 Subject: [PATCH 440/684] monitoring: refine mail stats and add send-limit usage --- scripts/dashboards_render_atlas.py | 117 +++++-- scripts/monitoring_postmark_exporter.py | 29 ++ .../monitoring/dashboards/atlas-mail.json | 300 ++++++++++++++++-- .../monitoring/dashboards/atlas-overview.json | 28 +- .../monitoring/grafana-dashboard-mail.yaml | 300 ++++++++++++++++-- .../grafana-dashboard-overview.yaml | 28 +- .../postmark-exporter-deployment.yaml | 8 + .../monitoring/postmark-exporter-script.yaml | 29 ++ 8 files changed, 741 insertions(+), 98 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index ea2330c..3cc9cb7 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -935,6 +935,15 @@ def build_overview(): {"color": "red", "value": 100}, ], } + mail_limit_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 70}, + {"color": "orange", "value": 85}, + {"color": "red", "value": 95}, + ], + } mail_api_thresholds = { "mode": "absolute", "steps": [ @@ -946,28 +955,28 @@ def build_overview(): ( 30, "Mail Bounce Rate (1d)", - 'postmark_outbound_bounce_rate{window="1d"}', + 'max(postmark_outbound_bounce_rate{window="1d"})', "percent", mail_bounce_rate_thresholds, ), ( 31, "Mail Bounced (1d)", - 'postmark_outbound_bounced{window="1d"}', + 'max(postmark_outbound_bounced{window="1d"})', "none", mail_bounce_count_thresholds, ), ( 32, - "Mail Sent (1d)", - 'postmark_outbound_sent{window="1d"}', - "none", - None, + "Mail Limit Used (30d)", + "max(postmark_sending_limit_used_percent)", + "percent", + mail_limit_thresholds, ), ( 33, "Postmark API Up", - "postmark_api_up", + "max(postmark_api_up)", "none", mail_api_thresholds, ), @@ -1875,33 +1884,42 @@ def build_mail_dashboard(): {"color": "green", "value": 1}, ], } + limit_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 70}, + {"color": "orange", "value": 85}, + {"color": "red", "value": 95}, + ], + } current_stats = [ ( 1, "Bounce Rate (1d)", - 'postmark_outbound_bounce_rate{window="1d"}', + 'max(postmark_outbound_bounce_rate{window="1d"})', "percent", bounce_rate_thresholds, ), ( 2, "Bounce Rate (7d)", - 'postmark_outbound_bounce_rate{window="7d"}', + 'max(postmark_outbound_bounce_rate{window="7d"})', "percent", bounce_rate_thresholds, ), ( 3, "Bounced (1d)", - 'postmark_outbound_bounced{window="1d"}', + 'max(postmark_outbound_bounced{window="1d"})', "none", bounce_count_thresholds, ), ( 4, "Bounced (7d)", - 'postmark_outbound_bounced{window="7d"}', + 'max(postmark_outbound_bounced{window="7d"})', "none", bounce_count_thresholds, ), @@ -1923,7 +1941,7 @@ def build_mail_dashboard(): stat_panel( 5, "Sent (1d)", - 'postmark_outbound_sent{window="1d"}', + 'max(postmark_outbound_sent{window="1d"})', {"h": 4, "w": 6, "x": 0, "y": 4}, decimals=0, ) @@ -1932,7 +1950,7 @@ def build_mail_dashboard(): stat_panel( 6, "Sent (7d)", - 'postmark_outbound_sent{window="7d"}', + 'max(postmark_outbound_sent{window="7d"})', {"h": 4, "w": 6, "x": 6, "y": 4}, decimals=0, ) @@ -1940,30 +1958,69 @@ def build_mail_dashboard(): panels.append( stat_panel( 7, - "Postmark API Up", - "postmark_api_up", + "Limit Used (30d)", + "max(postmark_sending_limit_used_percent)", {"h": 4, "w": 6, "x": 12, "y": 4}, + thresholds=limit_thresholds, + unit="percent", + decimals=1, + ) + ) + panels.append( + stat_panel( + 8, + "Send Limit (30d)", + "max(postmark_sending_limit)", + {"h": 4, "w": 6, "x": 18, "y": 4}, + decimals=0, + ) + ) + + panels.append( + stat_panel( + 9, + "Postmark API Up", + "max(postmark_api_up)", + {"h": 4, "w": 6, "x": 0, "y": 8}, thresholds=api_thresholds, decimals=0, ) ) panels.append( stat_panel( - 8, + 10, "Last Success", - "postmark_last_success_timestamp_seconds", - {"h": 4, "w": 6, "x": 18, "y": 4}, + "max(postmark_last_success_timestamp_seconds)", + {"h": 4, "w": 6, "x": 6, "y": 8}, unit="dateTimeAsIso", decimals=0, ) ) + panels.append( + stat_panel( + 11, + "Exporter Errors", + "sum(postmark_request_errors_total)", + {"h": 4, "w": 6, "x": 12, "y": 8}, + decimals=0, + ) + ) + panels.append( + stat_panel( + 12, + "Limit Used (30d)", + "max(postmark_sending_limit_used)", + {"h": 4, "w": 6, "x": 18, "y": 8}, + decimals=0, + ) + ) panels.append( timeseries_panel( - 9, + 13, "Bounce Rate (1d vs 7d)", - "postmark_outbound_bounce_rate", - {"h": 8, "w": 12, "x": 0, "y": 8}, + "max by (window) (postmark_outbound_bounce_rate)", + {"h": 8, "w": 12, "x": 0, "y": 12}, unit="percent", legend="{{window}}", legend_display="table", @@ -1972,10 +2029,10 @@ def build_mail_dashboard(): ) panels.append( timeseries_panel( - 10, + 14, "Bounced (1d vs 7d)", - "postmark_outbound_bounced", - {"h": 8, "w": 12, "x": 12, "y": 8}, + "max by (window) (postmark_outbound_bounced)", + {"h": 8, "w": 12, "x": 12, "y": 12}, unit="none", legend="{{window}}", legend_display="table", @@ -1984,10 +2041,10 @@ def build_mail_dashboard(): ) panels.append( timeseries_panel( - 11, + 15, "Sent (1d vs 7d)", - "postmark_outbound_sent", - {"h": 8, "w": 12, "x": 0, "y": 16}, + "max by (window) (postmark_outbound_sent)", + {"h": 8, "w": 12, "x": 0, "y": 20}, unit="none", legend="{{window}}", legend_display="table", @@ -1996,10 +2053,10 @@ def build_mail_dashboard(): ) panels.append( timeseries_panel( - 12, + 16, "Exporter Errors", - "postmark_request_errors_total", - {"h": 8, "w": 12, "x": 12, "y": 16}, + "sum(postmark_request_errors_total)", + {"h": 8, "w": 12, "x": 12, "y": 20}, unit="none", ) ) diff --git a/scripts/monitoring_postmark_exporter.py b/scripts/monitoring_postmark_exporter.py index ecae9f3..2a51a54 100644 --- a/scripts/monitoring_postmark_exporter.py +++ b/scripts/monitoring_postmark_exporter.py @@ -19,6 +19,7 @@ WINDOWS = [ Window("today", 0), Window("1d", 1), Window("7d", 7), + Window("30d", 30), ] API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") @@ -28,6 +29,12 @@ LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000")) PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() +LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip() +LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip() +try: + SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0 +except ValueError: + SENDING_LIMIT = 0.0 EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") EXPORTER_INFO.info( @@ -62,6 +69,18 @@ POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge( "Outbound bounce rate percentage within the selected window", labelnames=("window",), ) +POSTMARK_SENDING_LIMIT_GAUGE = Gauge( + "postmark_sending_limit", + "Configured Postmark sending limit for the active account", +) +POSTMARK_SENDING_LIMIT_USED = Gauge( + "postmark_sending_limit_used", + "Messages sent within the configured send limit window", +) +POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge( + "postmark_sending_limit_used_percent", + "Percent of the configured send limit used within the limit window", +) def fetch_outbound_stats(token: str, window: Window) -> dict: @@ -83,15 +102,25 @@ def fetch_outbound_stats(token: str, window: Window) -> dict: def update_metrics(token: str) -> None: + sent_by_window = {} for window in WINDOWS: data = fetch_outbound_stats(token, window) sent = int(data.get("Sent", 0) or 0) bounced = int(data.get("Bounced", 0) or 0) rate = (bounced / sent * 100.0) if sent else 0.0 + sent_by_window[window.label] = sent POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT) + limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0) + POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent) + if SENDING_LIMIT: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0) + else: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0) + def main() -> None: if not PRIMARY_TOKEN and not FALLBACK_TOKEN: diff --git a/services/monitoring/dashboards/atlas-mail.json b/services/monitoring/dashboards/atlas-mail.json index 1e73d76..a0b733d 100644 --- a/services/monitoring/dashboards/atlas-mail.json +++ b/services/monitoring/dashboards/atlas-mail.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -89,7 +89,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"7d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"7d\"})", "refId": "A" } ], @@ -158,7 +158,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -227,7 +227,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"7d\"}", + "expr": "max(postmark_outbound_bounced{window=\"7d\"})", "refId": "A" } ], @@ -296,7 +296,7 @@ }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_outbound_sent{window=\"1d\"})", "refId": "A" } ], @@ -357,7 +357,7 @@ }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"7d\"}", + "expr": "max(postmark_outbound_sent{window=\"7d\"})", "refId": "A" } ], @@ -405,7 +405,7 @@ { "id": 7, "type": "stat", - "title": "Postmark API Up", + "title": "Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -418,7 +418,137 @@ }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_sending_limit_used_percent)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Send Limit (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 4 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 9, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_api_up)", "refId": "A" } ], @@ -464,7 +594,7 @@ } }, { - "id": 8, + "id": 10, "type": "stat", "title": "Last Success", "datasource": { @@ -474,12 +604,12 @@ "gridPos": { "h": 4, "w": 6, - "x": 18, - "y": 4 + "x": 6, + "y": 8 }, "targets": [ { - "expr": "postmark_last_success_timestamp_seconds", + "expr": "max(postmark_last_success_timestamp_seconds)", "refId": "A" } ], @@ -525,7 +655,129 @@ } }, { - "id": 9, + "id": 11, + "type": "stat", + "title": "Exporter Errors", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "sum(postmark_request_errors_total)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 12, + "type": "stat", + "title": "Limit Used (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit_used)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 13, "type": "timeseries", "title": "Bounce Rate (1d vs 7d)", "datasource": { @@ -536,11 +788,11 @@ "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounce_rate", + "expr": "max by (window) (postmark_outbound_bounce_rate)", "refId": "A", "legendFormat": "{{window}}" } @@ -562,7 +814,7 @@ } }, { - "id": 10, + "id": 14, "type": "timeseries", "title": "Bounced (1d vs 7d)", "datasource": { @@ -573,11 +825,11 @@ "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounced", + "expr": "max by (window) (postmark_outbound_bounced)", "refId": "A", "legendFormat": "{{window}}" } @@ -599,7 +851,7 @@ } }, { - "id": 11, + "id": 15, "type": "timeseries", "title": "Sent (1d vs 7d)", "datasource": { @@ -610,11 +862,11 @@ "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_outbound_sent", + "expr": "max by (window) (postmark_outbound_sent)", "refId": "A", "legendFormat": "{{window}}" } @@ -636,7 +888,7 @@ } }, { - "id": 12, + "id": 16, "type": "timeseries", "title": "Exporter Errors", "datasource": { @@ -647,11 +899,11 @@ "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_request_errors_total", + "expr": "sum(postmark_request_errors_total)", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 4938485..707cc30 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -802,7 +802,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -878,7 +878,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -941,7 +941,7 @@ { "id": 32, "type": "stat", - "title": "Mail Sent (1d)", + "title": "Mail Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -954,7 +954,7 @@ }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_sending_limit_used_percent)", "refId": "A" } ], @@ -968,20 +968,28 @@ "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", - "value": 1 + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" }, - "decimals": 0 + "decimals": 1 }, "overrides": [] }, @@ -1022,7 +1030,7 @@ }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_api_up)", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-mail.yaml b/services/monitoring/grafana-dashboard-mail.yaml index 6c96358..f97ce60 100644 --- a/services/monitoring/grafana-dashboard-mail.yaml +++ b/services/monitoring/grafana-dashboard-mail.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -98,7 +98,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"7d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"7d\"})", "refId": "A" } ], @@ -167,7 +167,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -236,7 +236,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"7d\"}", + "expr": "max(postmark_outbound_bounced{window=\"7d\"})", "refId": "A" } ], @@ -305,7 +305,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_outbound_sent{window=\"1d\"})", "refId": "A" } ], @@ -366,7 +366,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"7d\"}", + "expr": "max(postmark_outbound_sent{window=\"7d\"})", "refId": "A" } ], @@ -414,7 +414,7 @@ data: { "id": 7, "type": "stat", - "title": "Postmark API Up", + "title": "Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -427,7 +427,137 @@ data: }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_sending_limit_used_percent)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Send Limit (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 4 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 9, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_api_up)", "refId": "A" } ], @@ -473,7 +603,7 @@ data: } }, { - "id": 8, + "id": 10, "type": "stat", "title": "Last Success", "datasource": { @@ -483,12 +613,12 @@ data: "gridPos": { "h": 4, "w": 6, - "x": 18, - "y": 4 + "x": 6, + "y": 8 }, "targets": [ { - "expr": "postmark_last_success_timestamp_seconds", + "expr": "max(postmark_last_success_timestamp_seconds)", "refId": "A" } ], @@ -534,7 +664,129 @@ data: } }, { - "id": 9, + "id": 11, + "type": "stat", + "title": "Exporter Errors", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "sum(postmark_request_errors_total)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 12, + "type": "stat", + "title": "Limit Used (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit_used)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 13, "type": "timeseries", "title": "Bounce Rate (1d vs 7d)", "datasource": { @@ -545,11 +797,11 @@ data: "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounce_rate", + "expr": "max by (window) (postmark_outbound_bounce_rate)", "refId": "A", "legendFormat": "{{window}}" } @@ -571,7 +823,7 @@ data: } }, { - "id": 10, + "id": 14, "type": "timeseries", "title": "Bounced (1d vs 7d)", "datasource": { @@ -582,11 +834,11 @@ data: "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounced", + "expr": "max by (window) (postmark_outbound_bounced)", "refId": "A", "legendFormat": "{{window}}" } @@ -608,7 +860,7 @@ data: } }, { - "id": 11, + "id": 15, "type": "timeseries", "title": "Sent (1d vs 7d)", "datasource": { @@ -619,11 +871,11 @@ data: "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_outbound_sent", + "expr": "max by (window) (postmark_outbound_sent)", "refId": "A", "legendFormat": "{{window}}" } @@ -645,7 +897,7 @@ data: } }, { - "id": 12, + "id": 16, "type": "timeseries", "title": "Exporter Errors", "datasource": { @@ -656,11 +908,11 @@ data: "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_request_errors_total", + "expr": "sum(postmark_request_errors_total)", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 7ebb687..65ae053 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -811,7 +811,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -887,7 +887,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -950,7 +950,7 @@ data: { "id": 32, "type": "stat", - "title": "Mail Sent (1d)", + "title": "Mail Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -963,7 +963,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_sending_limit_used_percent)", "refId": "A" } ], @@ -977,20 +977,28 @@ data: "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", - "value": 1 + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" }, - "decimals": 0 + "decimals": 1 }, "overrides": [] }, @@ -1031,7 +1039,7 @@ data: }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_api_up)", "refId": "A" } ], diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml index adadc4b..eb2877e 100644 --- a/services/monitoring/postmark-exporter-deployment.yaml +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -39,6 +39,14 @@ spec: secretKeyRef: name: postmark-exporter key: relay-password + - name: POSTMARK_SENDING_LIMIT + valueFrom: + secretKeyRef: + name: postmark-exporter + key: sending-limit + optional: true + - name: POSTMARK_SENDING_LIMIT_WINDOW + value: "30d" - name: POLL_INTERVAL_SECONDS value: "60" - name: LISTEN_PORT diff --git a/services/monitoring/postmark-exporter-script.yaml b/services/monitoring/postmark-exporter-script.yaml index 884d963..afe2221 100644 --- a/services/monitoring/postmark-exporter-script.yaml +++ b/services/monitoring/postmark-exporter-script.yaml @@ -26,6 +26,7 @@ data: Window("today", 0), Window("1d", 1), Window("7d", 7), + Window("30d", 30), ] API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") @@ -35,6 +36,12 @@ data: PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() + LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip() + LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip() + try: + SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0 + except ValueError: + SENDING_LIMIT = 0.0 EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") EXPORTER_INFO.info( @@ -69,6 +76,18 @@ data: "Outbound bounce rate percentage within the selected window", labelnames=("window",), ) + POSTMARK_SENDING_LIMIT_GAUGE = Gauge( + "postmark_sending_limit", + "Configured Postmark sending limit for the active account", + ) + POSTMARK_SENDING_LIMIT_USED = Gauge( + "postmark_sending_limit_used", + "Messages sent within the configured send limit window", + ) + POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge( + "postmark_sending_limit_used_percent", + "Percent of the configured send limit used within the limit window", + ) def fetch_outbound_stats(token: str, window: Window) -> dict: @@ -90,15 +109,25 @@ data: def update_metrics(token: str) -> None: + sent_by_window = {} for window in WINDOWS: data = fetch_outbound_stats(token, window) sent = int(data.get("Sent", 0) or 0) bounced = int(data.get("Bounced", 0) or 0) rate = (bounced / sent * 100.0) if sent else 0.0 + sent_by_window[window.label] = sent POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT) + limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0) + POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent) + if SENDING_LIMIT: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0) + else: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0) + def main() -> None: if not PRIMARY_TOKEN and not FALLBACK_TOKEN: -- 2.47.2 From 5fcff4fc8aee149c532e11741bc77825e1f898ce Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 02:34:52 -0300 Subject: [PATCH 441/684] monitoring: refine mail overview panels --- scripts/dashboards_render_atlas.py | 129 ++++--- .../monitoring/dashboards/atlas-overview.json | 324 ++++++++++-------- .../grafana-dashboard-overview.yaml | 324 ++++++++++-------- 3 files changed, 432 insertions(+), 345 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 3cc9cb7..0dcd5dd 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -926,15 +926,6 @@ def build_overview(): {"color": "red", "value": 10}, ], } - mail_bounce_count_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 10}, - {"color": "red", "value": 100}, - ], - } mail_limit_thresholds = { "mode": "absolute", "steps": [ @@ -944,56 +935,96 @@ def build_overview(): {"color": "red", "value": 95}, ], } - mail_api_thresholds = { + mail_success_thresholds = { "mode": "absolute", "steps": [ {"color": "red", "value": None}, - {"color": "green", "value": 1}, + {"color": "orange", "value": 90}, + {"color": "yellow", "value": 95}, + {"color": "green", "value": 98}, ], } - mail_overview_panels = [ - ( + panels.append( + stat_panel( 30, - "Mail Bounce Rate (1d)", - 'max(postmark_outbound_bounce_rate{window="1d"})', - "percent", - mail_bounce_rate_thresholds, - ), - ( - 31, - "Mail Bounced (1d)", - 'max(postmark_outbound_bounced{window="1d"})', - "none", - mail_bounce_count_thresholds, - ), - ( + "Mail Sent (1d)", + 'max(postmark_outbound_sent{window="1d"})', + {"h": 2, "w": 6, "x": 0, "y": 8}, + unit="none", + links=link_to("atlas-mail"), + ) + ) + panels.append( + { + "id": 31, + "type": "stat", + "title": "Mail Bounces (1d)", + "datasource": PROM_DS, + "gridPos": {"h": 2, "w": 6, "x": 6, "y": 8}, + "targets": [ + { + "expr": 'max(postmark_outbound_bounce_rate{window="1d"})', + "refId": "A", + "legendFormat": "Rate", + }, + { + "expr": 'max(postmark_outbound_bounced{window="1d"})', + "refId": "B", + "legendFormat": "Count", + }, + ], + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "custom": {"displayMode": "auto"}, + "thresholds": mail_bounce_rate_thresholds, + "unit": "none", + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "Rate"}, + "properties": [{"id": "unit", "value": "percent"}], + }, + { + "matcher": {"id": "byName", "options": "Count"}, + "properties": [{"id": "unit", "value": "none"}], + }, + ], + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, + "textMode": "name_and_value", + }, + "links": link_to("atlas-mail"), + } + ) + panels.append( + stat_panel( 32, + "Mail Success Rate (1d)", + 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', + {"h": 2, "w": 6, "x": 12, "y": 8}, + unit="percent", + thresholds=mail_success_thresholds, + decimals=1, + links=link_to("atlas-mail"), + ) + ) + panels.append( + stat_panel( + 33, "Mail Limit Used (30d)", "max(postmark_sending_limit_used_percent)", - "percent", - mail_limit_thresholds, - ), - ( - 33, - "Postmark API Up", - "max(postmark_api_up)", - "none", - mail_api_thresholds, - ), - ] - for idx, (panel_id, title, expr, unit, thresholds) in enumerate(mail_overview_panels): - panels.append( - stat_panel( - panel_id, - title, - expr, - {"h": 2, "w": 6, "x": 6 * idx, "y": 8}, - unit=unit, - thresholds=thresholds, - decimals=1 if unit == "percent" else 0, - links=link_to("atlas-mail"), - ) + {"h": 2, "w": 6, "x": 18, "y": 8}, + unit="percent", + thresholds=mail_limit_thresholds, + decimals=1, + links=link_to("atlas-mail"), ) + ) storage_panels = [ (23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"), diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 707cc30..957a6ed 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -789,7 +789,7 @@ { "id": 30, "type": "stat", - "title": "Mail Bounce Rate (1d)", + "title": "Mail Sent (1d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -802,7 +802,7 @@ }, "targets": [ { - "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", + "expr": "max(postmark_outbound_sent{window=\"1d\"})", "refId": "A" } ], @@ -812,6 +812,81 @@ "mode": "thresholds" }, "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 31, + "type": "stat", + "title": "Mail Bounces (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 6, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", + "refId": "A", + "legendFormat": "Rate" + }, + { + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", + "refId": "B", + "legendFormat": "Count" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "displayMode": "auto" + }, "thresholds": { "mode": "absolute", "steps": [ @@ -833,6 +908,103 @@ } ] }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rate" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Count" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 32, + "type": "stat", + "title": "Mail Success Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "clamp_min(100 - max(postmark_outbound_bounce_rate{window=\"1d\"}), 0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 90 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 98 + } + ] + }, "unit": "percent", "custom": { "displayMode": "auto" @@ -863,83 +1035,7 @@ ] }, { - "id": 31, - "type": "stat", - "title": "Mail Bounced (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 6, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounced{window=\"1d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 10 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "links": [ - { - "title": "Open atlas-mail dashboard", - "url": "/d/atlas-mail", - "targetBlank": true - } - ] - }, - { - "id": 32, + "id": 33, "type": "stat", "title": "Mail Limit Used (30d)", "datasource": { @@ -949,7 +1045,7 @@ "gridPos": { "h": 2, "w": 6, - "x": 12, + "x": 18, "y": 8 }, "targets": [ @@ -1014,74 +1110,6 @@ } ] }, - { - "id": 33, - "type": "stat", - "title": "Postmark API Up", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 18, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_api_up)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "links": [ - { - "title": "Open atlas-mail dashboard", - "url": "/d/atlas-mail", - "targetBlank": true - } - ] - }, { "id": 23, "type": "stat", diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 65ae053..cccde73 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -798,7 +798,7 @@ data: { "id": 30, "type": "stat", - "title": "Mail Bounce Rate (1d)", + "title": "Mail Sent (1d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -811,7 +811,7 @@ data: }, "targets": [ { - "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", + "expr": "max(postmark_outbound_sent{window=\"1d\"})", "refId": "A" } ], @@ -821,6 +821,81 @@ data: "mode": "thresholds" }, "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 31, + "type": "stat", + "title": "Mail Bounces (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 6, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", + "refId": "A", + "legendFormat": "Rate" + }, + { + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", + "refId": "B", + "legendFormat": "Count" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "displayMode": "auto" + }, "thresholds": { "mode": "absolute", "steps": [ @@ -842,6 +917,103 @@ data: } ] }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rate" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Count" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + }, + "links": [ + { + "title": "Open atlas-mail dashboard", + "url": "/d/atlas-mail", + "targetBlank": true + } + ] + }, + { + "id": 32, + "type": "stat", + "title": "Mail Success Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "clamp_min(100 - max(postmark_outbound_bounce_rate{window=\"1d\"}), 0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 90 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 98 + } + ] + }, "unit": "percent", "custom": { "displayMode": "auto" @@ -872,83 +1044,7 @@ data: ] }, { - "id": 31, - "type": "stat", - "title": "Mail Bounced (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 6, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounced{window=\"1d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 10 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "links": [ - { - "title": "Open atlas-mail dashboard", - "url": "/d/atlas-mail", - "targetBlank": true - } - ] - }, - { - "id": 32, + "id": 33, "type": "stat", "title": "Mail Limit Used (30d)", "datasource": { @@ -958,7 +1054,7 @@ data: "gridPos": { "h": 2, "w": 6, - "x": 12, + "x": 18, "y": 8 }, "targets": [ @@ -1023,74 +1119,6 @@ data: } ] }, - { - "id": 33, - "type": "stat", - "title": "Postmark API Up", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 18, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_api_up)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "links": [ - { - "title": "Open atlas-mail dashboard", - "url": "/d/atlas-mail", - "targetBlank": true - } - ] - }, { "id": 23, "type": "stat", -- 2.47.2 From 7d64f0d1d91b42615a848bd767decabafa467d57 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 09:03:28 -0300 Subject: [PATCH 442/684] mailu: harden relay restrictions --- services/mailu/helmrelease.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index 32e3068..e591e64 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -219,6 +219,11 @@ spec: overrides: postfix.cf: | smtpd_relay_restrictions = permit_sasl_authenticated, reject_unauth_destination + smtpd_sender_restrictions = reject_non_fqdn_sender, reject_unknown_sender_domain, reject_sender_login_mismatch, reject_authenticated_sender_login_mismatch + smtpd_tls_auth_only = yes + smtpd_client_connection_rate_limit = 30 + smtpd_client_message_rate_limit = 100 + smtpd_client_recipient_rate_limit = 200 podAnnotations: bstein.dev/restarted-at: "2026-01-06T00:00:00Z" redis: -- 2.47.2 From a14726350cff30dddc2727c5fa68bc35669df4cb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 09:50:40 -0300 Subject: [PATCH 443/684] monitoring: add titan-jh control plane node --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-nodes.json | 4 ++-- services/monitoring/dashboards/atlas-overview.json | 4 ++-- services/monitoring/dashboards/atlas-pods.json | 2 +- services/monitoring/grafana-dashboard-nodes.yaml | 4 ++-- services/monitoring/grafana-dashboard-overview.yaml | 4 ++-- services/monitoring/grafana-dashboard-pods.yaml | 2 +- services/monitoring/helmrelease.yaml | 9 +++++++++ 8 files changed, 20 insertions(+), 11 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 0dcd5dd..fa8f609 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -53,7 +53,7 @@ NAMESPACE_CPU_WINDOW = "1m" # --------------------------------------------------------------------------- CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"] -CONTROL_DEPENDENCIES = ["titan-db"] +CONTROL_DEPENDENCIES = ["titan-db", "titan-jh"] CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES WORKER_NODES = [ "titan-04", diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index ff69739..9be3adb 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -489,7 +489,7 @@ }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -526,7 +526,7 @@ }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 957a6ed..7a9c73e 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1711,7 +1711,7 @@ }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1748,7 +1748,7 @@ }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index b6d0be0..0d46f90 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -520,7 +520,7 @@ }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)))))", "refId": "A", "instant": true, "format": "table" diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 854f68a..1b87c60 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -498,7 +498,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -535,7 +535,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index cccde73..82ca78c 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1720,7 +1720,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1757,7 +1757,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 7d02e22..dda1a41 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -529,7 +529,7 @@ data: }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)))))", "refId": "A", "instant": true, "format": "table" diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 40db9f4..ddd24e5 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -195,6 +195,15 @@ spec: target_label: instance replacement: titan-db + # --- titan-jh node_exporter (external control-plane host) --- + - job_name: "titan-jh" + static_configs: + - targets: ["192.168.22.8:9100"] + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: titan-jh + # --- cert-manager (pods expose on 9402) --- - job_name: "cert-manager" kubernetes_sd_configs: [{ role: pod }] -- 2.47.2 From 109c17dd9519e6335aaedbc8346690a0237288ff Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 10:02:50 -0300 Subject: [PATCH 444/684] nextcloud: default mail html --- scripts/nextcloud-mail-sync.sh | 40 +++++++++++++++++++++++ services/nextcloud-mail-sync/cronjob.yaml | 17 ++++++++++ 2 files changed, 57 insertions(+) diff --git a/scripts/nextcloud-mail-sync.sh b/scripts/nextcloud-mail-sync.sh index a3ca3b6..6c883fc 100755 --- a/scripts/nextcloud-mail-sync.sh +++ b/scripts/nextcloud-mail-sync.sh @@ -7,11 +7,48 @@ KC_ADMIN_USER="${KC_ADMIN_USER:?}" KC_ADMIN_PASS="${KC_ADMIN_PASS:?}" MAILU_DOMAIN="${MAILU_DOMAIN:?}" ONLY_USERNAME="${ONLY_USERNAME:-}" +POSTGRES_HOST="${POSTGRES_HOST:-}" +POSTGRES_DB="${POSTGRES_DB:-}" +POSTGRES_USER="${POSTGRES_USER:-}" +POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}" if ! command -v jq >/dev/null 2>&1; then apt-get update && apt-get install -y jq curl >/dev/null fi +ensure_psql() { + if command -v psql >/dev/null 2>&1; then + return 0 + fi + apt-get update && apt-get install -y postgresql-client >/dev/null +} + +set_editor_mode_richtext() { + local ids=("$@") + + if [[ ${#ids[@]} -eq 0 ]]; then + return 0 + fi + + if [[ -z "${POSTGRES_HOST}" || -z "${POSTGRES_DB}" || -z "${POSTGRES_USER}" || -z "${POSTGRES_PASSWORD}" ]]; then + echo "WARN: missing postgres env; cannot update mail editor_mode" >&2 + return 0 + fi + + ensure_psql + + local ids_csv + ids_csv=$(IFS=,; echo "${ids[*]}") + + PGPASSWORD="${POSTGRES_PASSWORD}" psql \ + -h "${POSTGRES_HOST}" \ + -U "${POSTGRES_USER}" \ + -d "${POSTGRES_DB}" \ + -v ON_ERROR_STOP=1 \ + -c "UPDATE oc_mail_accounts SET editor_mode='richtext' WHERE id IN (${ids_csv}) AND editor_mode <> 'richtext';" \ + >/dev/null +} + list_mail_accounts() { local user_id="${1}" local export_out @@ -184,8 +221,10 @@ while read -r user; do mailu_account_count="0" fi primary_email_after="" + editor_mode_ids=() if [[ -n "${mailu_accounts_after}" ]]; then while IFS=$'\t' read -r _account_id account_email; do + editor_mode_ids+=("${_account_id}") if [[ "${account_email,,}" == "${desired_email,,}" ]]; then primary_email_after="${account_email}" break @@ -195,6 +234,7 @@ while read -r user; do fi done <<<"${mailu_accounts_after}" fi + set_editor_mode_richtext "${editor_mode_ids[@]}" else mailu_account_count="0" primary_email_after="" diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml index 55a593d..55ff608 100644 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -41,6 +41,23 @@ spec: key: password - name: MAILU_DOMAIN value: bstein.dev + - name: POSTGRES_HOST + value: postgres-service.postgres.svc.cluster.local + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: nextcloud-db + key: database + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-username + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-password resources: requests: cpu: 100m -- 2.47.2 From 6a4b7f443147149420e4c2ee76bfcccd958908c8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 12:47:34 -0300 Subject: [PATCH 445/684] titan-jh: enable node exporter --- hosts/roles/titan_jh/tasks/main.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hosts/roles/titan_jh/tasks/main.yaml b/hosts/roles/titan_jh/tasks/main.yaml index 0f66730..ad899a3 100644 --- a/hosts/roles/titan_jh/tasks/main.yaml +++ b/hosts/roles/titan_jh/tasks/main.yaml @@ -1,5 +1,18 @@ # hosts/roles/titan_jh/tasks/main.yaml --- +- name: Install node exporter + ansible.builtin.package: + name: prometheus-node-exporter + state: present + tags: ['jumphost', 'monitoring'] + +- name: Enable node exporter + ansible.builtin.service: + name: prometheus-node-exporter + enabled: true + state: started + tags: ['jumphost', 'monitoring'] + - name: Placeholder for jumphost hardening ansible.builtin.debug: msg: "Harden SSH, manage bastion tooling, and configure audit logging here." -- 2.47.2 From c693e695b4d591b69dc55a2e7bdd1fe064d1e859 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:00:14 -0300 Subject: [PATCH 446/684] mailu: harden relay + fix postmark exporter --- scripts/dashboards_render_atlas.py | 4 ++-- services/mailu/helmrelease.yaml | 4 ++++ services/monitoring/dashboards/atlas-overview.json | 4 ++-- services/monitoring/grafana-dashboard-overview.yaml | 4 ++-- services/monitoring/postmark-exporter-deployment.yaml | 4 ++-- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index fa8f609..0965860 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -960,7 +960,7 @@ def build_overview(): "type": "stat", "title": "Mail Bounces (1d)", "datasource": PROM_DS, - "gridPos": {"h": 2, "w": 6, "x": 6, "y": 8}, + "gridPos": {"h": 2, "w": 6, "x": 12, "y": 8}, "targets": [ { "expr": 'max(postmark_outbound_bounce_rate{window="1d"})', @@ -1006,7 +1006,7 @@ def build_overview(): 32, "Mail Success Rate (1d)", 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', - {"h": 2, "w": 6, "x": 12, "y": 8}, + {"h": 2, "w": 6, "x": 6, "y": 8}, unit="percent", thresholds=mail_success_thresholds, decimals=1, diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index e591e64..c72c38f 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -218,6 +218,10 @@ spec: hardware: rpi4 overrides: postfix.cf: | + smtpd_helo_required = yes + smtpd_helo_restrictions = reject_invalid_helo_hostname, reject_non_fqdn_helo_hostname + smtpd_client_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_pipelining + smtpd_recipient_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_destination, reject_non_fqdn_recipient, reject_unknown_recipient_domain smtpd_relay_restrictions = permit_sasl_authenticated, reject_unauth_destination smtpd_sender_restrictions = reject_non_fqdn_sender, reject_unknown_sender_domain, reject_sender_login_mismatch, reject_authenticated_sender_login_mismatch smtpd_tls_auth_only = yes diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 7a9c73e..0382199 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -864,7 +864,7 @@ "gridPos": { "h": 2, "w": 6, - "x": 6, + "x": 12, "y": 8 }, "targets": [ @@ -969,7 +969,7 @@ "gridPos": { "h": 2, "w": 6, - "x": 12, + "x": 6, "y": 8 }, "targets": [ diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 82ca78c..af69a39 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -873,7 +873,7 @@ data: "gridPos": { "h": 2, "w": 6, - "x": 6, + "x": 12, "y": 8 }, "targets": [ @@ -978,7 +978,7 @@ data: "gridPos": { "h": 2, "w": 6, - "x": 12, + "x": 6, "y": 8 }, "targets": [ diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml index eb2877e..646c455 100644 --- a/services/monitoring/postmark-exporter-deployment.yaml +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -33,12 +33,12 @@ spec: valueFrom: secretKeyRef: name: postmark-exporter - key: relay-username + key: server-token - name: POSTMARK_SERVER_TOKEN_FALLBACK valueFrom: secretKeyRef: name: postmark-exporter - key: relay-password + key: server-token-fallback - name: POSTMARK_SENDING_LIMIT valueFrom: secretKeyRef: -- 2.47.2 From 4d92263871e5f28eb9b0b723d3eb3836d4745fb8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:06:55 -0300 Subject: [PATCH 447/684] mailu: enable smtpd sasl auth --- scripts/nextcloud-maintenance.sh | 29 +++++++++++++++++++++++++++++ services/mailu/helmrelease.yaml | 6 ++++++ 2 files changed, 35 insertions(+) diff --git a/scripts/nextcloud-maintenance.sh b/scripts/nextcloud-maintenance.sh index af1694c..27c5270 100755 --- a/scripts/nextcloud-maintenance.sh +++ b/scripts/nextcloud-maintenance.sh @@ -22,6 +22,35 @@ run_occ theming:config url "https://cloud.bstein.dev" run_occ theming:config color "#0f172a" run_occ theming:config disable-user-theming yes +log "Applying Atlas Mail styling defaults" +run_occ app:install customcss >/dev/null 2>&1 || true +run_occ app:enable customcss >/dev/null 2>&1 || true +MAIL_CSS=$(cat <<'CSS' +.mail-message-body, .mail-message-body pre, .mail-message-body code, .mail-message-body table { + font-family: "Inter", "Source Sans 3", "Helvetica Neue", Arial, sans-serif; + font-size: 14px; + line-height: 1.6; + color: var(--color-main-text); +} +.mail-message-body pre { + background: rgba(15, 23, 42, 0.06); + padding: 12px; + border-radius: 8px; +} +.mail-message-body blockquote { + border-left: 3px solid var(--color-border); + padding-left: 12px; + margin: 8px 0; + color: var(--color-text-lighter); +} +.mail-message-body img { + max-width: 100%; + border-radius: 6px; +} +CSS +) +run_occ config:app:set customcss css --value "${MAIL_CSS}" >/dev/null + log "Setting default quota to 200 GB" run_occ config:app:set files default_quota --value "200 GB" diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index c72c38f..0344a2f 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -220,11 +220,17 @@ spec: postfix.cf: | smtpd_helo_required = yes smtpd_helo_restrictions = reject_invalid_helo_hostname, reject_non_fqdn_helo_hostname + smtpd_sasl_auth_enable = yes + smtpd_sasl_type = dovecot + smtpd_sasl_path = private/auth + smtpd_sasl_security_options = noanonymous + smtpd_sasl_tls_security_options = noanonymous smtpd_client_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_pipelining smtpd_recipient_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_destination, reject_non_fqdn_recipient, reject_unknown_recipient_domain smtpd_relay_restrictions = permit_sasl_authenticated, reject_unauth_destination smtpd_sender_restrictions = reject_non_fqdn_sender, reject_unknown_sender_domain, reject_sender_login_mismatch, reject_authenticated_sender_login_mismatch smtpd_tls_auth_only = yes + smtpd_forbid_unauth_pipelining = yes smtpd_client_connection_rate_limit = 30 smtpd_client_message_rate_limit = 100 smtpd_client_recipient_rate_limit = 200 -- 2.47.2 From e8cf4070b5e0ca0acb53f4a0465d21dfc0a8b1f8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:11:24 -0300 Subject: [PATCH 448/684] nextcloud: set theming via app config --- scripts/nextcloud-maintenance.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/nextcloud-maintenance.sh b/scripts/nextcloud-maintenance.sh index 27c5270..67b92f1 100755 --- a/scripts/nextcloud-maintenance.sh +++ b/scripts/nextcloud-maintenance.sh @@ -16,11 +16,11 @@ run_occ() { log() { echo "[$(date -Is)] $*"; } log "Applying Atlas theming" -run_occ theming:config name "Atlas Cloud" -run_occ theming:config slogan "Unified access to Atlas services" -run_occ theming:config url "https://cloud.bstein.dev" -run_occ theming:config color "#0f172a" -run_occ theming:config disable-user-theming yes +run_occ config:app:set theming name --value "Atlas Cloud" +run_occ config:app:set theming slogan --value "Unified access to Atlas services" +run_occ config:app:set theming url --value "https://cloud.bstein.dev" +run_occ config:app:set theming color --value "#0f172a" +run_occ config:app:set theming disable-user-theming --value "yes" log "Applying Atlas Mail styling defaults" run_occ app:install customcss >/dev/null 2>&1 || true -- 2.47.2 From bf358bcdfddbe6476d478be1de71f954493c6a89 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:14:38 -0300 Subject: [PATCH 449/684] flux: track nextcloud app --- .../flux-system/applications/kustomization.yaml | 1 + .../applications/nextcloud/kustomization.yaml | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index d944938..9fefc9c 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -26,4 +26,5 @@ resources: - ci-demo/kustomization.yaml - ci-demo/image-automation.yaml - ai-llm/kustomization.yaml + - nextcloud/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml b/clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml new file mode 100644 index 0000000..9bc39c1 --- /dev/null +++ b/clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml @@ -0,0 +1,16 @@ +# clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: nextcloud + namespace: flux-system +spec: + interval: 10m + path: ./services/nextcloud + targetNamespace: nextcloud + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + wait: true -- 2.47.2 From 8a12c8cdbd21930b1b7f9e3169bc852b9d6d229b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:16:47 -0300 Subject: [PATCH 450/684] nextcloud: call occ via absolute path --- scripts/nextcloud-maintenance.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/nextcloud-maintenance.sh b/scripts/nextcloud-maintenance.sh index 67b92f1..dc37c3d 100755 --- a/scripts/nextcloud-maintenance.sh +++ b/scripts/nextcloud-maintenance.sh @@ -10,7 +10,7 @@ apt-get update -qq apt-get install -y -qq curl jq >/dev/null run_occ() { - runuser -u www-data -- php occ "$@" + runuser -u www-data -- php /var/www/html/occ "$@" } log() { echo "[$(date -Is)] $*"; } -- 2.47.2 From 37e8e691e29dfe83a19e84291eed1c76b3c7e581 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:22:26 -0300 Subject: [PATCH 451/684] nextcloud: restore app files for maintenance job --- scripts/nextcloud-maintenance.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/nextcloud-maintenance.sh b/scripts/nextcloud-maintenance.sh index dc37c3d..d526674 100755 --- a/scripts/nextcloud-maintenance.sh +++ b/scripts/nextcloud-maintenance.sh @@ -15,6 +15,14 @@ run_occ() { log() { echo "[$(date -Is)] $*"; } +log "Ensuring Nextcloud app files are present" +if [[ ! -d /var/www/html/lib && -d /usr/src/nextcloud/lib ]]; then + rsync -a --delete \ + --exclude config \ + --exclude data \ + /usr/src/nextcloud/ /var/www/html/ +fi + log "Applying Atlas theming" run_occ config:app:set theming name --value "Atlas Cloud" run_occ config:app:set theming slogan --value "Unified access to Atlas services" -- 2.47.2 From 556b714e5097042a406f8c83f53f3bf1fbc3d4aa Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:38:10 -0300 Subject: [PATCH 452/684] nextcloud/monitoring: fix perms and mail panels --- scripts/dashboards_render_atlas.py | 190 +++--- scripts/nextcloud-maintenance.sh | 9 +- .../monitoring/dashboards/atlas-mail.json | 591 ++++++------------ .../monitoring/grafana-dashboard-mail.yaml | 591 ++++++------------ services/nextcloud/deployment.yaml | 3 + 5 files changed, 459 insertions(+), 925 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 0965860..4476773 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -1899,22 +1899,6 @@ def build_mail_dashboard(): {"color": "red", "value": 10}, ], } - bounce_count_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 10}, - {"color": "red", "value": 100}, - ], - } - api_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "red", "value": None}, - {"color": "green", "value": 1}, - ], - } limit_thresholds = { "mode": "absolute", "steps": [ @@ -1924,74 +1908,98 @@ def build_mail_dashboard(): {"color": "red", "value": 95}, ], } + success_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "orange", "value": 90}, + {"color": "yellow", "value": 95}, + {"color": "green", "value": 98}, + ], + } - current_stats = [ - ( + panels.append( + stat_panel( 1, - "Bounce Rate (1d)", - 'max(postmark_outbound_bounce_rate{window="1d"})', - "percent", - bounce_rate_thresholds, - ), - ( - 2, - "Bounce Rate (7d)", - 'max(postmark_outbound_bounce_rate{window="7d"})', - "percent", - bounce_rate_thresholds, - ), - ( - 3, - "Bounced (1d)", - 'max(postmark_outbound_bounced{window="1d"})', - "none", - bounce_count_thresholds, - ), - ( - 4, - "Bounced (7d)", - 'max(postmark_outbound_bounced{window="7d"})', - "none", - bounce_count_thresholds, - ), - ] - for idx, (panel_id, title, expr, unit, thresholds) in enumerate(current_stats): - panels.append( - stat_panel( - panel_id, - title, - expr, - {"h": 4, "w": 6, "x": 6 * idx, "y": 0}, - unit=unit, - thresholds=thresholds, - decimals=1 if unit == "percent" else 0, - ) + "Sent (1d)", + 'max(postmark_outbound_sent{window="1d"})', + {"h": 4, "w": 6, "x": 0, "y": 0}, + decimals=0, ) + ) + panels.append( + stat_panel( + 2, + "Sent (7d)", + 'max(postmark_outbound_sent{window="7d"})', + {"h": 4, "w": 6, "x": 6, "y": 0}, + decimals=0, + ) + ) + panels.append( + { + "id": 3, + "type": "stat", + "title": "Mail Bounces (1d)", + "datasource": PROM_DS, + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 0}, + "targets": [ + { + "expr": 'max(postmark_outbound_bounce_rate{window="1d"})', + "refId": "A", + "legendFormat": "Rate", + }, + { + "expr": 'max(postmark_outbound_bounced{window="1d"})', + "refId": "B", + "legendFormat": "Count", + }, + ], + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "custom": {"displayMode": "auto"}, + "thresholds": bounce_rate_thresholds, + "unit": "none", + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "Rate"}, + "properties": [{"id": "unit", "value": "percent"}], + }, + { + "matcher": {"id": "byName", "options": "Count"}, + "properties": [{"id": "unit", "value": "none"}], + }, + ], + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, + "textMode": "name_and_value", + }, + } + ) + panels.append( + stat_panel( + 4, + "Success Rate (1d)", + 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', + {"h": 4, "w": 6, "x": 18, "y": 0}, + unit="percent", + thresholds=success_thresholds, + decimals=1, + ) + ) panels.append( stat_panel( 5, - "Sent (1d)", - 'max(postmark_outbound_sent{window="1d"})', - {"h": 4, "w": 6, "x": 0, "y": 4}, - decimals=0, - ) - ) - panels.append( - stat_panel( - 6, - "Sent (7d)", - 'max(postmark_outbound_sent{window="7d"})', - {"h": 4, "w": 6, "x": 6, "y": 4}, - decimals=0, - ) - ) - panels.append( - stat_panel( - 7, "Limit Used (30d)", "max(postmark_sending_limit_used_percent)", - {"h": 4, "w": 6, "x": 12, "y": 4}, + {"h": 4, "w": 6, "x": 0, "y": 4}, thresholds=limit_thresholds, unit="percent", decimals=1, @@ -1999,49 +2007,29 @@ def build_mail_dashboard(): ) panels.append( stat_panel( - 8, + 6, "Send Limit (30d)", "max(postmark_sending_limit)", - {"h": 4, "w": 6, "x": 18, "y": 4}, - decimals=0, - ) - ) - - panels.append( - stat_panel( - 9, - "Postmark API Up", - "max(postmark_api_up)", - {"h": 4, "w": 6, "x": 0, "y": 8}, - thresholds=api_thresholds, + {"h": 4, "w": 6, "x": 6, "y": 4}, decimals=0, ) ) panels.append( stat_panel( - 10, + 7, "Last Success", "max(postmark_last_success_timestamp_seconds)", - {"h": 4, "w": 6, "x": 6, "y": 8}, + {"h": 4, "w": 6, "x": 12, "y": 4}, unit="dateTimeAsIso", decimals=0, ) ) panels.append( stat_panel( - 11, + 8, "Exporter Errors", "sum(postmark_request_errors_total)", - {"h": 4, "w": 6, "x": 12, "y": 8}, - decimals=0, - ) - ) - panels.append( - stat_panel( - 12, - "Limit Used (30d)", - "max(postmark_sending_limit_used)", - {"h": 4, "w": 6, "x": 18, "y": 8}, + {"h": 4, "w": 6, "x": 18, "y": 4}, decimals=0, ) ) diff --git a/scripts/nextcloud-maintenance.sh b/scripts/nextcloud-maintenance.sh index d526674..eb5ccca 100755 --- a/scripts/nextcloud-maintenance.sh +++ b/scripts/nextcloud-maintenance.sh @@ -23,6 +23,11 @@ if [[ ! -d /var/www/html/lib && -d /usr/src/nextcloud/lib ]]; then /usr/src/nextcloud/ /var/www/html/ fi +log "Ensuring Nextcloud permissions" +chown 33:33 /var/www/html || true +chmod 775 /var/www/html || true +chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps 2>/dev/null || true + log "Applying Atlas theming" run_occ config:app:set theming name --value "Atlas Cloud" run_occ config:app:set theming slogan --value "Unified access to Atlas services" @@ -59,8 +64,8 @@ CSS ) run_occ config:app:set customcss css --value "${MAIL_CSS}" >/dev/null -log "Setting default quota to 200 GB" -run_occ config:app:set files default_quota --value "200 GB" +log "Setting default quota to 250 GB" +run_occ config:app:set files default_quota --value "250 GB" API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1" AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true") diff --git a/services/monitoring/dashboards/atlas-mail.json b/services/monitoring/dashboards/atlas-mail.json index a0b733d..67c1766 100644 --- a/services/monitoring/dashboards/atlas-mail.json +++ b/services/monitoring/dashboards/atlas-mail.json @@ -7,282 +7,6 @@ { "id": 1, "type": "stat", - "title": "Bounce Rate (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 5 - }, - { - "color": "orange", - "value": 8 - }, - { - "color": "red", - "value": 10 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 1 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 2, - "type": "stat", - "title": "Bounce Rate (7d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounce_rate{window=\"7d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 5 - }, - { - "color": "orange", - "value": 8 - }, - { - "color": "red", - "value": 10 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 1 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 3, - "type": "stat", - "title": "Bounced (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounced{window=\"1d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 10 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 4, - "type": "stat", - "title": "Bounced (7d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounced{window=\"7d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 10 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 5, - "type": "stat", "title": "Sent (1d)", "datasource": { "type": "prometheus", @@ -292,7 +16,7 @@ "h": 4, "w": 6, "x": 0, - "y": 4 + "y": 0 }, "targets": [ { @@ -342,7 +66,7 @@ } }, { - "id": 6, + "id": 2, "type": "stat", "title": "Sent (7d)", "datasource": { @@ -353,7 +77,7 @@ "h": 4, "w": 6, "x": 6, - "y": 4 + "y": 0 }, "targets": [ { @@ -403,7 +127,174 @@ } }, { - "id": 7, + "id": 3, + "type": "stat", + "title": "Mail Bounces (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", + "refId": "A", + "legendFormat": "Rate" + }, + { + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", + "refId": "B", + "legendFormat": "Count" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "displayMode": "auto" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rate" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Count" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Success Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "targets": [ + { + "expr": "clamp_min(100 - max(postmark_outbound_bounce_rate{window=\"1d\"}), 0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 90 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 98 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 5, "type": "stat", "title": "Limit Used (30d)", "datasource": { @@ -413,7 +304,7 @@ "gridPos": { "h": 4, "w": 6, - "x": 12, + "x": 0, "y": 4 }, "targets": [ @@ -472,7 +363,7 @@ } }, { - "id": 8, + "id": 6, "type": "stat", "title": "Send Limit (30d)", "datasource": { @@ -482,7 +373,7 @@ "gridPos": { "h": 4, "w": 6, - "x": 18, + "x": 6, "y": 4 }, "targets": [ @@ -533,68 +424,7 @@ } }, { - "id": 9, - "type": "stat", - "title": "Postmark API Up", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_api_up)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 10, + "id": 7, "type": "stat", "title": "Last Success", "datasource": { @@ -604,8 +434,8 @@ "gridPos": { "h": 4, "w": 6, - "x": 6, - "y": 8 + "x": 12, + "y": 4 }, "targets": [ { @@ -655,7 +485,7 @@ } }, { - "id": 11, + "id": 8, "type": "stat", "title": "Exporter Errors", "datasource": { @@ -665,8 +495,8 @@ "gridPos": { "h": 4, "w": 6, - "x": 12, - "y": 8 + "x": 18, + "y": 4 }, "targets": [ { @@ -715,67 +545,6 @@ "textMode": "value" } }, - { - "id": 12, - "type": "stat", - "title": "Limit Used (30d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_sending_limit_used)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, { "id": 13, "type": "timeseries", diff --git a/services/monitoring/grafana-dashboard-mail.yaml b/services/monitoring/grafana-dashboard-mail.yaml index f97ce60..4c011a8 100644 --- a/services/monitoring/grafana-dashboard-mail.yaml +++ b/services/monitoring/grafana-dashboard-mail.yaml @@ -16,282 +16,6 @@ data: { "id": 1, "type": "stat", - "title": "Bounce Rate (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 5 - }, - { - "color": "orange", - "value": 8 - }, - { - "color": "red", - "value": 10 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 1 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 2, - "type": "stat", - "title": "Bounce Rate (7d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounce_rate{window=\"7d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 5 - }, - { - "color": "orange", - "value": 8 - }, - { - "color": "red", - "value": 10 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - }, - "decimals": 1 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 3, - "type": "stat", - "title": "Bounced (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounced{window=\"1d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 10 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 4, - "type": "stat", - "title": "Bounced (7d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 0 - }, - "targets": [ - { - "expr": "max(postmark_outbound_bounced{window=\"7d\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "orange", - "value": 10 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 5, - "type": "stat", "title": "Sent (1d)", "datasource": { "type": "prometheus", @@ -301,7 +25,7 @@ data: "h": 4, "w": 6, "x": 0, - "y": 4 + "y": 0 }, "targets": [ { @@ -351,7 +75,7 @@ data: } }, { - "id": 6, + "id": 2, "type": "stat", "title": "Sent (7d)", "datasource": { @@ -362,7 +86,7 @@ data: "h": 4, "w": 6, "x": 6, - "y": 4 + "y": 0 }, "targets": [ { @@ -412,7 +136,174 @@ data: } }, { - "id": 7, + "id": 3, + "type": "stat", + "title": "Mail Bounces (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", + "refId": "A", + "legendFormat": "Rate" + }, + { + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", + "refId": "B", + "legendFormat": "Count" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "displayMode": "auto" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rate" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Count" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Success Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "targets": [ + { + "expr": "clamp_min(100 - max(postmark_outbound_bounce_rate{window=\"1d\"}), 0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 90 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 98 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 5, "type": "stat", "title": "Limit Used (30d)", "datasource": { @@ -422,7 +313,7 @@ data: "gridPos": { "h": 4, "w": 6, - "x": 12, + "x": 0, "y": 4 }, "targets": [ @@ -481,7 +372,7 @@ data: } }, { - "id": 8, + "id": 6, "type": "stat", "title": "Send Limit (30d)", "datasource": { @@ -491,7 +382,7 @@ data: "gridPos": { "h": 4, "w": 6, - "x": 18, + "x": 6, "y": 4 }, "targets": [ @@ -542,68 +433,7 @@ data: } }, { - "id": 9, - "type": "stat", - "title": "Postmark API Up", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_api_up)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 10, + "id": 7, "type": "stat", "title": "Last Success", "datasource": { @@ -613,8 +443,8 @@ data: "gridPos": { "h": 4, "w": 6, - "x": 6, - "y": 8 + "x": 12, + "y": 4 }, "targets": [ { @@ -664,7 +494,7 @@ data: } }, { - "id": 11, + "id": 8, "type": "stat", "title": "Exporter Errors", "datasource": { @@ -674,8 +504,8 @@ data: "gridPos": { "h": 4, "w": 6, - "x": 12, - "y": 8 + "x": 18, + "y": 4 }, "targets": [ { @@ -724,67 +554,6 @@ data: "textMode": "value" } }, - { - "id": 12, - "type": "stat", - "title": "Limit Used (30d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 8 - }, - "targets": [ - { - "expr": "max(postmark_sending_limit_used)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, { "id": 13, "type": "timeseries", diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index b2c590f..731cfd2 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -28,8 +28,11 @@ spec: command: ["/bin/sh", "-c"] args: - | + chown 33:33 /var/www/html || true + chmod 775 /var/www/html || true chown -R 33:33 /var/www/html/config || true chown -R 33:33 /var/www/html/data || true + chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps || true securityContext: runAsUser: 0 runAsGroup: 0 -- 2.47.2 From d4f1d01b9c4a32bf3b800eefe95daa3d3e4c8cce Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:40:29 -0300 Subject: [PATCH 453/684] nextcloud: reset empty config on boot --- services/nextcloud/deployment.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 731cfd2..36f8227 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -28,6 +28,9 @@ spec: command: ["/bin/sh", "-c"] args: - | + if [ ! -s /var/www/html/config/config.php ]; then + rm -f /var/www/html/config/config.php || true + fi chown 33:33 /var/www/html || true chmod 775 /var/www/html || true chown -R 33:33 /var/www/html/config || true -- 2.47.2 From 46884bdd0cb919bc9ef0ec7498fda366ebd32088 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:43:18 -0300 Subject: [PATCH 454/684] nextcloud: ensure data dir and perms --- scripts/nextcloud-maintenance.sh | 3 ++- services/nextcloud/deployment.yaml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/nextcloud-maintenance.sh b/scripts/nextcloud-maintenance.sh index eb5ccca..ab38616 100755 --- a/scripts/nextcloud-maintenance.sh +++ b/scripts/nextcloud-maintenance.sh @@ -24,9 +24,10 @@ if [[ ! -d /var/www/html/lib && -d /usr/src/nextcloud/lib ]]; then fi log "Ensuring Nextcloud permissions" +mkdir -p /var/www/html/data chown 33:33 /var/www/html || true chmod 775 /var/www/html || true -chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps 2>/dev/null || true +chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps /var/www/html/data /var/www/html/config 2>/dev/null || true log "Applying Atlas theming" run_occ config:app:set theming name --value "Atlas Cloud" diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 36f8227..be43cf4 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -31,6 +31,7 @@ spec: if [ ! -s /var/www/html/config/config.php ]; then rm -f /var/www/html/config/config.php || true fi + mkdir -p /var/www/html/data || true chown 33:33 /var/www/html || true chmod 775 /var/www/html || true chown -R 33:33 /var/www/html/config || true -- 2.47.2 From 7283a740e6106e196243282c060c665fbc32ddd2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:46:16 -0300 Subject: [PATCH 455/684] nextcloud: install when config missing --- services/nextcloud/deployment.yaml | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index be43cf4..bc8c767 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -46,6 +46,57 @@ spec: - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php + - name: install-nextcloud + image: nextcloud:29-apache + command: ["/bin/sh", "-c"] + args: + - | + if [ ! -s /var/www/html/config/config.php ]; then + runuser -u www-data -- \ + php /var/www/html/occ maintenance:install \ + --database pgsql \ + --database-host "${POSTGRES_HOST}" \ + --database-name "${POSTGRES_DB}" \ + --database-user "${POSTGRES_USER}" \ + --database-pass "${POSTGRES_PASSWORD}" \ + --admin-user "${NEXTCLOUD_ADMIN_USER}" \ + --admin-pass "${NEXTCLOUD_ADMIN_PASSWORD}" \ + --data-dir "/var/www/html/data" + fi + env: + - name: POSTGRES_HOST + value: postgres-service.postgres.svc.cluster.local + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: nextcloud-db + key: database + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-username + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-password + - name: NEXTCLOUD_ADMIN_USER + valueFrom: + secretKeyRef: + name: nextcloud-admin + key: admin-user + - name: NEXTCLOUD_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: nextcloud-admin + key: admin-password + volumeMounts: + - name: nextcloud-data + mountPath: /var/www/html + - name: nextcloud-config + mountPath: /var/www/html/config/extra.config.php + subPath: extra.config.php containers: - name: nextcloud image: nextcloud:29-apache -- 2.47.2 From 6728b4f4aee1fdb963703dc11a0e1d969d5087b9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:46:36 -0300 Subject: [PATCH 456/684] atlasbot: add KB + read-only tools --- knowledge/catalog/atlas-summary.json | 8 + knowledge/catalog/atlas.json | 2656 +++++++++++++++++ knowledge/catalog/atlas.yaml | 1726 +++++++++++ knowledge/catalog/runbooks.json | 73 + knowledge/diagrams/atlas-http.mmd | 176 ++ scripts/knowledge_render_atlas.py | 554 ++++ .../communication/atlasbot-configmap.yaml | 401 ++- .../communication/atlasbot-deployment.yaml | 24 +- services/communication/atlasbot-rbac.yaml | 47 + services/communication/kustomization.yaml | 10 + 10 files changed, 5654 insertions(+), 21 deletions(-) create mode 100644 knowledge/catalog/atlas-summary.json create mode 100644 knowledge/catalog/atlas.json create mode 100644 knowledge/catalog/atlas.yaml create mode 100644 knowledge/catalog/runbooks.json create mode 100644 knowledge/diagrams/atlas-http.mmd create mode 100644 scripts/knowledge_render_atlas.py create mode 100644 services/communication/atlasbot-rbac.yaml diff --git a/knowledge/catalog/atlas-summary.json b/knowledge/catalog/atlas-summary.json new file mode 100644 index 0000000..16e3019 --- /dev/null +++ b/knowledge/catalog/atlas-summary.json @@ -0,0 +1,8 @@ +{ + "counts": { + "helmrelease_host_hints": 7, + "http_endpoints": 32, + "services": 42, + "workloads": 47 + } +} diff --git a/knowledge/catalog/atlas.json b/knowledge/catalog/atlas.json new file mode 100644 index 0000000..359af22 --- /dev/null +++ b/knowledge/catalog/atlas.json @@ -0,0 +1,2656 @@ +{ + "cluster": "atlas", + "sources": [ + { + "name": "ai-llm", + "path": "services/ai-llm", + "targetNamespace": "ai" + }, + { + "name": "bstein-dev-home", + "path": "services/bstein-dev-home", + "targetNamespace": "bstein-dev-home" + }, + { + "name": "ci-demo", + "path": "services/ci-demo", + "targetNamespace": null + }, + { + "name": "comms", + "path": "services/comms", + "targetNamespace": "comms" + }, + { + "name": "communication", + "path": "services/communication", + "targetNamespace": "comms" + }, + { + "name": "core", + "path": "infrastructure/core", + "targetNamespace": null + }, + { + "name": "crypto", + "path": "services/crypto", + "targetNamespace": "crypto" + }, + { + "name": "flux-system", + "path": "clusters/atlas/flux-system", + "targetNamespace": null + }, + { + "name": "gitea", + "path": "services/gitea", + "targetNamespace": "gitea" + }, + { + "name": "gitops-ui", + "path": "services/gitops-ui", + "targetNamespace": "flux-system" + }, + { + "name": "harbor", + "path": "services/harbor", + "targetNamespace": "harbor" + }, + { + "name": "helm", + "path": "infrastructure/sources/helm", + "targetNamespace": "flux-system" + }, + { + "name": "jellyfin", + "path": "services/jellyfin", + "targetNamespace": "jellyfin" + }, + { + "name": "jenkins", + "path": "services/jenkins", + "targetNamespace": "jenkins" + }, + { + "name": "keycloak", + "path": "services/keycloak", + "targetNamespace": "sso" + }, + { + "name": "longhorn-ui", + "path": "infrastructure/longhorn/ui-ingress", + "targetNamespace": "longhorn-system" + }, + { + "name": "mailu", + "path": "services/mailu", + "targetNamespace": "mailu-mailserver" + }, + { + "name": "metallb", + "path": "infrastructure/metallb", + "targetNamespace": "metallb-system" + }, + { + "name": "monerod", + "path": "services/crypto/monerod", + "targetNamespace": "crypto" + }, + { + "name": "monitoring", + "path": "services/monitoring", + "targetNamespace": null + }, + { + "name": "nextcloud", + "path": "services/nextcloud", + "targetNamespace": "nextcloud" + }, + { + "name": "nextcloud-mail-sync", + "path": "services/nextcloud-mail-sync", + "targetNamespace": "nextcloud" + }, + { + "name": "oauth2-proxy", + "path": "services/oauth2-proxy", + "targetNamespace": "sso" + }, + { + "name": "openldap", + "path": "services/openldap", + "targetNamespace": "sso" + }, + { + "name": "pegasus", + "path": "services/pegasus", + "targetNamespace": "jellyfin" + }, + { + "name": "sui-metrics", + "path": "services/sui-metrics/overlays/atlas", + "targetNamespace": "sui-metrics" + }, + { + "name": "traefik", + "path": "infrastructure/traefik", + "targetNamespace": "traefik" + }, + { + "name": "vault", + "path": "services/vault", + "targetNamespace": "vault" + }, + { + "name": "vault-csi", + "path": "infrastructure/vault-csi", + "targetNamespace": "kube-system" + }, + { + "name": "vaultwarden", + "path": "services/vaultwarden", + "targetNamespace": "vaultwarden" + }, + { + "name": "xmr-miner", + "path": "services/crypto/xmr-miner", + "targetNamespace": "crypto" + } + ], + "workloads": [ + { + "kind": "Deployment", + "namespace": "ai", + "name": "ollama", + "labels": { + "app": "ollama" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "ollama/ollama:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-backend", + "labels": { + "app": "bstein-dev-home-backend" + }, + "serviceAccountName": "bstein-dev-home", + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84" + ] + }, + { + "kind": "Deployment", + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-frontend", + "labels": { + "app": "bstein-dev-home-frontend" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84" + ] + }, + { + "kind": "Deployment", + "namespace": "bstein-dev-home", + "name": "chat-ai-gateway", + "labels": { + "app": "chat-ai-gateway" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "python:3.11-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "ci-demo", + "name": "ci-demo", + "labels": { + "app.kubernetes.io/name": "ci-demo" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi4" + }, + "images": [ + "registry.bstein.dev/infra/ci-demo:v0.0.0-3" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "atlasbot", + "labels": { + "app": "atlasbot" + }, + "serviceAccountName": "atlasbot", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "python:3.11-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "coturn", + "labels": { + "app": "coturn" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/coturn/coturn:4.6.2" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "element-call", + "labels": { + "app": "element-call" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/element-call:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "livekit", + "labels": { + "app": "livekit" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "livekit/livekit-server:v1.9.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "livekit-token-service", + "labels": { + "app": "livekit-token-service" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/lk-jwt-service:0.3.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "matrix-authentication-service", + "labels": { + "app": "matrix-authentication-service" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/matrix-authentication-service:1.8.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "matrix-wellknown", + "labels": { + "app": "matrix-wellknown" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "nginx:1.27-alpine" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "othrys-element-element-web", + "labels": { + "app.kubernetes.io/instance": "othrys-element", + "app.kubernetes.io/name": "element-web" + }, + "serviceAccountName": "othrys-element-element-web", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/element-web:v1.12.6" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "othrys-synapse-matrix-synapse", + "labels": { + "app.kubernetes.io/component": "synapse", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "matrix-synapse" + }, + "serviceAccountName": "default", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/synapse:v1.144.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "othrys-synapse-redis-master", + "labels": { + "app.kubernetes.io/component": "master", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/managed-by": "Helm", + "app.kubernetes.io/name": "redis", + "helm.sh/chart": "redis-17.17.1" + }, + "serviceAccountName": "othrys-synapse-redis", + "nodeSelector": {}, + "images": [ + "docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34" + ] + }, + { + "kind": "DaemonSet", + "namespace": "crypto", + "name": "monero-xmrig", + "labels": { + "app": "monero-xmrig" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "ghcr.io/tari-project/xmrig:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "crypto", + "name": "monero-p2pool", + "labels": { + "app": "monero-p2pool" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "debian:bookworm-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "crypto", + "name": "monerod", + "labels": { + "app": "monerod" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "registry.bstein.dev/crypto/monerod:0.18.4.1" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "helm-controller", + "labels": { + "app": "helm-controller", + "app.kubernetes.io/component": "helm-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "helm-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/helm-controller:v1.4.5" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "image-automation-controller", + "labels": { + "app": "image-automation-controller", + "app.kubernetes.io/component": "image-automation-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "image-automation-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/image-automation-controller:v1.0.4" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "image-reflector-controller", + "labels": { + "app": "image-reflector-controller", + "app.kubernetes.io/component": "image-reflector-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "image-reflector-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/image-reflector-controller:v1.0.4" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "kustomize-controller", + "labels": { + "app": "kustomize-controller", + "app.kubernetes.io/component": "kustomize-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "kustomize-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/kustomize-controller:v1.7.3" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "notification-controller", + "labels": { + "app": "notification-controller", + "app.kubernetes.io/component": "notification-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "notification-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/notification-controller:v1.7.5" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "source-controller", + "labels": { + "app": "source-controller", + "app.kubernetes.io/component": "source-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "source-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/source-controller:v1.7.4" + ] + }, + { + "kind": "Deployment", + "namespace": "gitea", + "name": "gitea", + "labels": { + "app": "gitea" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "gitea/gitea:1.23" + ] + }, + { + "kind": "Deployment", + "namespace": "jellyfin", + "name": "jellyfin", + "labels": { + "app": "jellyfin" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "docker.io/jellyfin/jellyfin:10.11.5" + ] + }, + { + "kind": "Deployment", + "namespace": "jellyfin", + "name": "pegasus", + "labels": { + "app": "pegasus" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "alpine:3.20", + "registry.bstein.dev/streaming/pegasus:1.2.32" + ] + }, + { + "kind": "Deployment", + "namespace": "jenkins", + "name": "jenkins", + "labels": { + "app": "jenkins" + }, + "serviceAccountName": "jenkins", + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "jenkins/jenkins:2.528.3-jdk21" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "nvidia-device-plugin-jetson", + "labels": { + "app.kubernetes.io/instance": "jetson", + "app.kubernetes.io/name": "nvidia-device-plugin" + }, + "serviceAccountName": null, + "nodeSelector": { + "jetson": "true", + "kubernetes.io/arch": "arm64" + }, + "images": [ + "nvcr.io/nvidia/k8s-device-plugin:v0.16.2" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "nvidia-device-plugin-minipc", + "labels": { + "app.kubernetes.io/instance": "titan22", + "app.kubernetes.io/name": "nvidia-device-plugin" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "amd64", + "kubernetes.io/hostname": "titan-22" + }, + "images": [ + "nvcr.io/nvidia/k8s-device-plugin:v0.16.2" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "nvidia-device-plugin-tethys", + "labels": { + "app.kubernetes.io/instance": "titan24", + "app.kubernetes.io/name": "nvidia-device-plugin" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "amd64", + "kubernetes.io/hostname": "titan-24" + }, + "images": [ + "nvcr.io/nvidia/k8s-device-plugin:v0.16.2" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "vault-csi-provider", + "labels": { + "app.kubernetes.io/name": "vault-csi-provider" + }, + "serviceAccountName": "vault-csi-provider", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "hashicorp/vault-csi-provider:1.7.0" + ] + }, + { + "kind": "Deployment", + "namespace": "longhorn-system", + "name": "oauth2-proxy-longhorn", + "labels": { + "app": "oauth2-proxy-longhorn" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "quay.io/oauth2-proxy/oauth2-proxy:v7.6.0" + ] + }, + { + "kind": "DaemonSet", + "namespace": "mailu-mailserver", + "name": "vip-controller", + "labels": { + "app": "vip-controller" + }, + "serviceAccountName": "vip-controller", + "nodeSelector": { + "mailu.bstein.dev/vip": "true" + }, + "images": [ + "lachlanevenson/k8s-kubectl:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "mailu-mailserver", + "name": "mailu-sync-listener", + "labels": { + "app": "mailu-sync-listener" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "python:3.11-alpine" + ] + }, + { + "kind": "DaemonSet", + "namespace": "metallb-system", + "name": "metallb-speaker", + "labels": { + "app.kubernetes.io/component": "speaker", + "app.kubernetes.io/instance": "metallb", + "app.kubernetes.io/name": "metallb" + }, + "serviceAccountName": "metallb-speaker", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "quay.io/frrouting/frr:10.4.1", + "quay.io/metallb/speaker:v0.15.3" + ] + }, + { + "kind": "Deployment", + "namespace": "metallb-system", + "name": "metallb-controller", + "labels": { + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": "metallb", + "app.kubernetes.io/name": "metallb" + }, + "serviceAccountName": "metallb-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "quay.io/metallb/controller:v0.15.3" + ] + }, + { + "kind": "DaemonSet", + "namespace": "monitoring", + "name": "dcgm-exporter", + "labels": { + "app": "dcgm-exporter" + }, + "serviceAccountName": "default", + "nodeSelector": {}, + "images": [ + "registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04" + ] + }, + { + "kind": "Deployment", + "namespace": "monitoring", + "name": "postmark-exporter", + "labels": { + "app": "postmark-exporter" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "python:3.12-alpine" + ] + }, + { + "kind": "Deployment", + "namespace": "nextcloud", + "name": "nextcloud", + "labels": { + "app": "nextcloud" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "nextcloud:29-apache" + ] + }, + { + "kind": "Deployment", + "namespace": "sso", + "name": "keycloak", + "labels": { + "app": "keycloak" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "quay.io/keycloak/keycloak:26.0.7" + ] + }, + { + "kind": "Deployment", + "namespace": "sso", + "name": "oauth2-proxy", + "labels": { + "app": "oauth2-proxy" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "quay.io/oauth2-proxy/oauth2-proxy:v7.6.0" + ] + }, + { + "kind": "StatefulSet", + "namespace": "sso", + "name": "openldap", + "labels": { + "app": "openldap" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "docker.io/osixia/openldap:1.5.0" + ] + }, + { + "kind": "Deployment", + "namespace": "sui-metrics", + "name": "sui-metrics", + "labels": { + "app": "sui-metrics" + }, + "serviceAccountName": "sui-metrics", + "nodeSelector": { + "kubernetes.io/hostname": "titan-24" + }, + "images": [ + "victoriametrics/vmagent:v1.103.0" + ] + }, + { + "kind": "Deployment", + "namespace": "traefik", + "name": "traefik", + "labels": { + "app": "traefik" + }, + "serviceAccountName": "traefik-ingress-controller", + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "traefik:v3.3.3" + ] + }, + { + "kind": "StatefulSet", + "namespace": "vault", + "name": "vault", + "labels": { + "app": "vault" + }, + "serviceAccountName": "vault", + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "hashicorp/vault:1.17.6" + ] + }, + { + "kind": "Deployment", + "namespace": "vaultwarden", + "name": "vaultwarden", + "labels": { + "app": "vaultwarden" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "vaultwarden/server:1.33.2" + ] + } + ], + "services": [ + { + "namespace": "ai", + "name": "ollama", + "type": "ClusterIP", + "selector": { + "app": "ollama" + }, + "ports": [ + { + "name": "http", + "port": 11434, + "targetPort": 11434, + "protocol": "TCP" + } + ] + }, + { + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-backend", + "type": "ClusterIP", + "selector": { + "app": "bstein-dev-home-backend" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-frontend", + "type": "ClusterIP", + "selector": { + "app": "bstein-dev-home-frontend" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 80, + "protocol": "TCP" + } + ] + }, + { + "namespace": "bstein-dev-home", + "name": "chat-ai-gateway", + "type": "ClusterIP", + "selector": { + "app": "chat-ai-gateway" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "ci-demo", + "name": "ci-demo", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/name": "ci-demo" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "coturn", + "type": "LoadBalancer", + "selector": { + "app": "coturn" + }, + "ports": [ + { + "name": "turn-udp", + "port": 3478, + "targetPort": 3478, + "protocol": "UDP" + }, + { + "name": "turn-tcp", + "port": 3478, + "targetPort": 3478, + "protocol": "TCP" + }, + { + "name": "turn-tls", + "port": 5349, + "targetPort": 5349, + "protocol": "TCP" + }, + { + "name": "relay-50000", + "port": 50000, + "targetPort": 50000, + "protocol": "UDP" + }, + { + "name": "relay-50001", + "port": 50001, + "targetPort": 50001, + "protocol": "UDP" + }, + { + "name": "relay-50002", + "port": 50002, + "targetPort": 50002, + "protocol": "UDP" + }, + { + "name": "relay-50003", + "port": 50003, + "targetPort": 50003, + "protocol": "UDP" + }, + { + "name": "relay-50004", + "port": 50004, + "targetPort": 50004, + "protocol": "UDP" + }, + { + "name": "relay-50005", + "port": 50005, + "targetPort": 50005, + "protocol": "UDP" + }, + { + "name": "relay-50006", + "port": 50006, + "targetPort": 50006, + "protocol": "UDP" + }, + { + "name": "relay-50007", + "port": 50007, + "targetPort": 50007, + "protocol": "UDP" + }, + { + "name": "relay-50008", + "port": 50008, + "targetPort": 50008, + "protocol": "UDP" + }, + { + "name": "relay-50009", + "port": 50009, + "targetPort": 50009, + "protocol": "UDP" + }, + { + "name": "relay-50010", + "port": 50010, + "targetPort": 50010, + "protocol": "UDP" + }, + { + "name": "relay-50011", + "port": 50011, + "targetPort": 50011, + "protocol": "UDP" + }, + { + "name": "relay-50012", + "port": 50012, + "targetPort": 50012, + "protocol": "UDP" + }, + { + "name": "relay-50013", + "port": 50013, + "targetPort": 50013, + "protocol": "UDP" + }, + { + "name": "relay-50014", + "port": 50014, + "targetPort": 50014, + "protocol": "UDP" + }, + { + "name": "relay-50015", + "port": 50015, + "targetPort": 50015, + "protocol": "UDP" + }, + { + "name": "relay-50016", + "port": 50016, + "targetPort": 50016, + "protocol": "UDP" + }, + { + "name": "relay-50017", + "port": 50017, + "targetPort": 50017, + "protocol": "UDP" + }, + { + "name": "relay-50018", + "port": 50018, + "targetPort": 50018, + "protocol": "UDP" + }, + { + "name": "relay-50019", + "port": 50019, + "targetPort": 50019, + "protocol": "UDP" + }, + { + "name": "relay-50020", + "port": 50020, + "targetPort": 50020, + "protocol": "UDP" + }, + { + "name": "relay-50021", + "port": 50021, + "targetPort": 50021, + "protocol": "UDP" + }, + { + "name": "relay-50022", + "port": 50022, + "targetPort": 50022, + "protocol": "UDP" + }, + { + "name": "relay-50023", + "port": 50023, + "targetPort": 50023, + "protocol": "UDP" + }, + { + "name": "relay-50024", + "port": 50024, + "targetPort": 50024, + "protocol": "UDP" + }, + { + "name": "relay-50025", + "port": 50025, + "targetPort": 50025, + "protocol": "UDP" + }, + { + "name": "relay-50026", + "port": 50026, + "targetPort": 50026, + "protocol": "UDP" + }, + { + "name": "relay-50027", + "port": 50027, + "targetPort": 50027, + "protocol": "UDP" + }, + { + "name": "relay-50028", + "port": 50028, + "targetPort": 50028, + "protocol": "UDP" + }, + { + "name": "relay-50029", + "port": 50029, + "targetPort": 50029, + "protocol": "UDP" + }, + { + "name": "relay-50030", + "port": 50030, + "targetPort": 50030, + "protocol": "UDP" + }, + { + "name": "relay-50031", + "port": 50031, + "targetPort": 50031, + "protocol": "UDP" + }, + { + "name": "relay-50032", + "port": 50032, + "targetPort": 50032, + "protocol": "UDP" + }, + { + "name": "relay-50033", + "port": 50033, + "targetPort": 50033, + "protocol": "UDP" + }, + { + "name": "relay-50034", + "port": 50034, + "targetPort": 50034, + "protocol": "UDP" + }, + { + "name": "relay-50035", + "port": 50035, + "targetPort": 50035, + "protocol": "UDP" + }, + { + "name": "relay-50036", + "port": 50036, + "targetPort": 50036, + "protocol": "UDP" + }, + { + "name": "relay-50037", + "port": 50037, + "targetPort": 50037, + "protocol": "UDP" + }, + { + "name": "relay-50038", + "port": 50038, + "targetPort": 50038, + "protocol": "UDP" + }, + { + "name": "relay-50039", + "port": 50039, + "targetPort": 50039, + "protocol": "UDP" + }, + { + "name": "relay-50040", + "port": 50040, + "targetPort": 50040, + "protocol": "UDP" + }, + { + "name": "relay-50041", + "port": 50041, + "targetPort": 50041, + "protocol": "UDP" + }, + { + "name": "relay-50042", + "port": 50042, + "targetPort": 50042, + "protocol": "UDP" + }, + { + "name": "relay-50043", + "port": 50043, + "targetPort": 50043, + "protocol": "UDP" + }, + { + "name": "relay-50044", + "port": 50044, + "targetPort": 50044, + "protocol": "UDP" + }, + { + "name": "relay-50045", + "port": 50045, + "targetPort": 50045, + "protocol": "UDP" + }, + { + "name": "relay-50046", + "port": 50046, + "targetPort": 50046, + "protocol": "UDP" + }, + { + "name": "relay-50047", + "port": 50047, + "targetPort": 50047, + "protocol": "UDP" + }, + { + "name": "relay-50048", + "port": 50048, + "targetPort": 50048, + "protocol": "UDP" + }, + { + "name": "relay-50049", + "port": 50049, + "targetPort": 50049, + "protocol": "UDP" + }, + { + "name": "relay-50050", + "port": 50050, + "targetPort": 50050, + "protocol": "UDP" + } + ] + }, + { + "namespace": "comms", + "name": "element-call", + "type": "ClusterIP", + "selector": { + "app": "element-call" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "livekit", + "type": "LoadBalancer", + "selector": { + "app": "livekit" + }, + "ports": [ + { + "name": "http", + "port": 7880, + "targetPort": 7880, + "protocol": "TCP" + }, + { + "name": "rtc-tcp", + "port": 7881, + "targetPort": 7881, + "protocol": "TCP" + }, + { + "name": "rtc-udp-7882", + "port": 7882, + "targetPort": 7882, + "protocol": "UDP" + }, + { + "name": "rtc-udp-7883", + "port": 7883, + "targetPort": 7883, + "protocol": "UDP" + } + ] + }, + { + "namespace": "comms", + "name": "livekit-token-service", + "type": "ClusterIP", + "selector": { + "app": "livekit-token-service" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "matrix-authentication-service", + "type": "ClusterIP", + "selector": { + "app": "matrix-authentication-service" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": "http", + "protocol": "TCP" + }, + { + "name": "internal", + "port": 8081, + "targetPort": "internal", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "matrix-wellknown", + "type": "ClusterIP", + "selector": { + "app": "matrix-wellknown" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 80, + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-element-element-web", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/instance": "othrys-element", + "app.kubernetes.io/name": "element-web" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-matrix-synapse", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "synapse", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "matrix-synapse" + }, + "ports": [ + { + "name": "http", + "port": 8008, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-redis-headless", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "redis" + }, + "ports": [ + { + "name": "tcp-redis", + "port": 6379, + "targetPort": "redis", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-redis-master", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "master", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "redis" + }, + "ports": [ + { + "name": "tcp-redis", + "port": 6379, + "targetPort": "redis", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-replication", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "synapse", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "matrix-synapse" + }, + "ports": [ + { + "name": "replication", + "port": 9093, + "targetPort": "replication", + "protocol": "TCP" + } + ] + }, + { + "namespace": "crypto", + "name": "monerod", + "type": "ClusterIP", + "selector": { + "app": "monerod" + }, + "ports": [ + { + "name": "rpc", + "port": 18081, + "targetPort": 18081, + "protocol": "TCP" + }, + { + "name": "p2p", + "port": 18080, + "targetPort": 18080, + "protocol": "TCP" + }, + { + "name": "zmq", + "port": 18083, + "targetPort": 18083, + "protocol": "TCP" + } + ] + }, + { + "namespace": "crypto", + "name": "p2pool", + "type": "ClusterIP", + "selector": { + "app": "p2pool" + }, + "ports": [ + { + "name": "stratum", + "port": 3333, + "targetPort": 3333, + "protocol": "TCP" + } + ] + }, + { + "namespace": "flux-system", + "name": "notification-controller", + "type": "ClusterIP", + "selector": { + "app": "notification-controller" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "flux-system", + "name": "source-controller", + "type": "ClusterIP", + "selector": { + "app": "source-controller" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "flux-system", + "name": "webhook-receiver", + "type": "ClusterIP", + "selector": { + "app": "notification-controller" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http-webhook", + "protocol": "TCP" + } + ] + }, + { + "namespace": "gitea", + "name": "gitea", + "type": "ClusterIP", + "selector": { + "app": "gitea" + }, + "ports": [ + { + "name": "http", + "port": 3000, + "targetPort": 3000, + "protocol": "TCP" + } + ] + }, + { + "namespace": "gitea", + "name": "gitea-ssh", + "type": "NodePort", + "selector": { + "app": "gitea" + }, + "ports": [ + { + "name": "ssh", + "port": 2242, + "targetPort": 2242, + "protocol": "TCP" + } + ] + }, + { + "namespace": "jellyfin", + "name": "jellyfin", + "type": "ClusterIP", + "selector": { + "app": "jellyfin" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8096, + "protocol": "TCP" + } + ] + }, + { + "namespace": "jellyfin", + "name": "pegasus", + "type": "ClusterIP", + "selector": { + "app": "pegasus" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "jenkins", + "name": "jenkins", + "type": "ClusterIP", + "selector": { + "app": "jenkins" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": 8080, + "protocol": "TCP" + }, + { + "name": "agent-listener", + "port": 50000, + "targetPort": 50000, + "protocol": "TCP" + } + ] + }, + { + "namespace": "kube-system", + "name": "traefik", + "type": "LoadBalancer", + "selector": { + "app.kubernetes.io/instance": "traefik-kube-system", + "app.kubernetes.io/name": "traefik" + }, + "ports": [ + { + "name": "web", + "port": 80, + "targetPort": "web", + "protocol": "TCP" + }, + { + "name": "websecure", + "port": 443, + "targetPort": "websecure", + "protocol": "TCP" + } + ] + }, + { + "namespace": "longhorn-system", + "name": "oauth2-proxy-longhorn", + "type": "ClusterIP", + "selector": { + "app": "oauth2-proxy-longhorn" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 4180, + "protocol": "TCP" + } + ] + }, + { + "namespace": "mailu-mailserver", + "name": "mailu-front-lb", + "type": "LoadBalancer", + "selector": { + "app.kubernetes.io/component": "front", + "app.kubernetes.io/instance": "mailu", + "app.kubernetes.io/name": "mailu" + }, + "ports": [ + { + "name": "smtp", + "port": 25, + "targetPort": 25, + "protocol": "TCP" + }, + { + "name": "smtps", + "port": 465, + "targetPort": 465, + "protocol": "TCP" + }, + { + "name": "submission", + "port": 587, + "targetPort": 587, + "protocol": "TCP" + }, + { + "name": "imaps", + "port": 993, + "targetPort": 993, + "protocol": "TCP" + }, + { + "name": "pop3s", + "port": 995, + "targetPort": 995, + "protocol": "TCP" + }, + { + "name": "sieve", + "port": 4190, + "targetPort": 4190, + "protocol": "TCP" + } + ] + }, + { + "namespace": "mailu-mailserver", + "name": "mailu-sync-listener", + "type": "ClusterIP", + "selector": { + "app": "mailu-sync-listener" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "metallb-system", + "name": "metallb-webhook-service", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": "metallb", + "app.kubernetes.io/name": "metallb" + }, + "ports": [ + { + "name": null, + "port": 443, + "targetPort": 9443, + "protocol": "TCP" + } + ] + }, + { + "namespace": "monitoring", + "name": "dcgm-exporter", + "type": "ClusterIP", + "selector": { + "app": "dcgm-exporter" + }, + "ports": [ + { + "name": "metrics", + "port": 9400, + "targetPort": "metrics", + "protocol": "TCP" + } + ] + }, + { + "namespace": "monitoring", + "name": "postmark-exporter", + "type": "ClusterIP", + "selector": { + "app": "postmark-exporter" + }, + "ports": [ + { + "name": "http", + "port": 8000, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "nextcloud", + "name": "nextcloud", + "type": "ClusterIP", + "selector": { + "app": "nextcloud" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "sso", + "name": "keycloak", + "type": "ClusterIP", + "selector": { + "app": "keycloak" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "sso", + "name": "oauth2-proxy", + "type": "ClusterIP", + "selector": { + "app": "oauth2-proxy" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 4180, + "protocol": "TCP" + } + ] + }, + { + "namespace": "sso", + "name": "openldap", + "type": "ClusterIP", + "selector": { + "app": "openldap" + }, + "ports": [ + { + "name": "ldap", + "port": 389, + "targetPort": "ldap", + "protocol": "TCP" + }, + { + "name": "ldaps", + "port": 636, + "targetPort": "ldaps", + "protocol": "TCP" + } + ] + }, + { + "namespace": "sui-metrics", + "name": "sui-metrics", + "type": "ClusterIP", + "selector": { + "app": "sui-metrics" + }, + "ports": [ + { + "name": "http", + "port": 8429, + "targetPort": 8429, + "protocol": "TCP" + } + ] + }, + { + "namespace": "traefik", + "name": "traefik-metrics", + "type": "ClusterIP", + "selector": { + "app": "traefik" + }, + "ports": [ + { + "name": "metrics", + "port": 9100, + "targetPort": "metrics", + "protocol": "TCP" + } + ] + }, + { + "namespace": "vault", + "name": "vault", + "type": "ClusterIP", + "selector": { + "app": "vault" + }, + "ports": [ + { + "name": "api", + "port": 8200, + "targetPort": 8200, + "protocol": "TCP" + }, + { + "name": "cluster", + "port": 8201, + "targetPort": 8201, + "protocol": "TCP" + } + ] + }, + { + "namespace": "vault", + "name": "vault-internal", + "type": "ClusterIP", + "selector": { + "app": "vault" + }, + "ports": [ + { + "name": "api", + "port": 8200, + "targetPort": 8200, + "protocol": "TCP" + }, + { + "name": "cluster", + "port": 8201, + "targetPort": 8201, + "protocol": "TCP" + } + ] + }, + { + "namespace": "vaultwarden", + "name": "vaultwarden-service", + "type": "ClusterIP", + "selector": { + "app": "vaultwarden" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + } + ], + "http_endpoints": [ + { + "host": "auth.bstein.dev", + "path": "/", + "backend": { + "namespace": "sso", + "service": "oauth2-proxy", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "oauth2-proxy" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "oauth2-proxy", + "source": "oauth2-proxy" + } + }, + { + "host": "bstein.dev", + "path": "/", + "backend": { + "namespace": "bstein-dev-home", + "service": "bstein-dev-home-frontend", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "bstein-dev-home-frontend" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "bstein-dev-home", + "source": "bstein-dev-home" + } + }, + { + "host": "bstein.dev", + "path": "/.well-known/matrix", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-synapse-matrix-synapse", + "source": "communication" + } + }, + { + "host": "bstein.dev", + "path": "/api", + "backend": { + "namespace": "bstein-dev-home", + "service": "bstein-dev-home-backend", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "bstein-dev-home-backend" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "bstein-dev-home", + "source": "bstein-dev-home" + } + }, + { + "host": "call.live.bstein.dev", + "path": "/", + "backend": { + "namespace": "comms", + "service": "element-call", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "element-call" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "element-call", + "source": "communication" + } + }, + { + "host": "chat.ai.bstein.dev", + "path": "/", + "backend": { + "namespace": "bstein-dev-home", + "service": "chat-ai-gateway", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "chat-ai-gateway" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "bstein-dev-home", + "source": "bstein-dev-home" + } + }, + { + "host": "ci.bstein.dev", + "path": "/", + "backend": { + "namespace": "jenkins", + "service": "jenkins", + "port": "http", + "workloads": [ + { + "kind": "Deployment", + "name": "jenkins" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "jenkins", + "source": "jenkins" + } + }, + { + "host": "cloud.bstein.dev", + "path": "/", + "backend": { + "namespace": "nextcloud", + "service": "nextcloud", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "nextcloud" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "nextcloud", + "source": "nextcloud" + } + }, + { + "host": "kit.live.bstein.dev", + "path": "/livekit/jwt", + "backend": { + "namespace": "comms", + "service": "livekit-token-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "livekit-token-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "livekit-jwt-ingress", + "source": "communication" + } + }, + { + "host": "kit.live.bstein.dev", + "path": "/livekit/sfu", + "backend": { + "namespace": "comms", + "service": "livekit", + "port": 7880, + "workloads": [ + { + "kind": "Deployment", + "name": "livekit" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "livekit-ingress", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/", + "backend": { + "namespace": "comms", + "service": "othrys-element-element-web", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-element-element-web" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-element-element-web", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/.well-known/matrix", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-synapse-matrix-synapse", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/.well-known/matrix/client", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/.well-known/matrix/server", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/_matrix", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-synapse-matrix-synapse", + "source": "communication" + } + }, + { + "host": "longhorn.bstein.dev", + "path": "/", + "backend": { + "namespace": "longhorn-system", + "service": "oauth2-proxy-longhorn", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "oauth2-proxy-longhorn" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "longhorn-ingress", + "source": "longhorn-ui" + } + }, + { + "host": "mail.bstein.dev", + "path": "/", + "backend": { + "namespace": "mailu-mailserver", + "service": "mailu-front", + "port": 443, + "workloads": [] + }, + "via": { + "kind": "IngressRoute", + "name": "mailu", + "source": "mailu" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-authentication-service", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/.well-known/matrix/client", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-matrix-live", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/.well-known/matrix/server", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-matrix-live", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-synapse-matrix-synapse", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/login", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-authentication-service-compat", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/logout", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-authentication-service-compat", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/refresh", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-authentication-service-compat", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_synapse", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-synapse-matrix-synapse", + "source": "communication" + } + }, + { + "host": "monero.bstein.dev", + "path": "/", + "backend": { + "namespace": "crypto", + "service": "monerod", + "port": 18081, + "workloads": [ + { + "kind": "Deployment", + "name": "monerod" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "monerod", + "source": "monerod" + } + }, + { + "host": "pegasus.bstein.dev", + "path": "/", + "backend": { + "namespace": "jellyfin", + "service": "pegasus", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "pegasus" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "pegasus", + "source": "pegasus" + } + }, + { + "host": "scm.bstein.dev", + "path": "/", + "backend": { + "namespace": "gitea", + "service": "gitea", + "port": 3000, + "workloads": [ + { + "kind": "Deployment", + "name": "gitea" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "gitea-ingress", + "source": "gitea" + } + }, + { + "host": "secret.bstein.dev", + "path": "/", + "backend": { + "namespace": "vault", + "service": "vault", + "port": 8200, + "workloads": [ + { + "kind": "StatefulSet", + "name": "vault" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "vault", + "source": "vault" + } + }, + { + "host": "sso.bstein.dev", + "path": "/", + "backend": { + "namespace": "sso", + "service": "keycloak", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "keycloak" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "keycloak", + "source": "keycloak" + } + }, + { + "host": "stream.bstein.dev", + "path": "/", + "backend": { + "namespace": "jellyfin", + "service": "jellyfin", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "jellyfin" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "jellyfin", + "source": "jellyfin" + } + }, + { + "host": "vault.bstein.dev", + "path": "/", + "backend": { + "namespace": "vaultwarden", + "service": "vaultwarden-service", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "vaultwarden" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "vaultwarden-ingress", + "source": "vaultwarden" + } + } + ], + "helmrelease_host_hints": { + "gitops-ui:flux-system/weave-gitops": [ + "cd.bstein.dev" + ], + "harbor:harbor/harbor": [ + "registry.bstein.dev" + ], + "mailu:mailu-mailserver/mailu": [ + "bstein.dev", + "mail.bstein.dev" + ], + "monitoring:monitoring/alertmanager": [ + "alerts.bstein.dev" + ], + "monitoring:monitoring/grafana": [ + "metrics.bstein.dev", + "sso.bstein.dev" + ] + } +} diff --git a/knowledge/catalog/atlas.yaml b/knowledge/catalog/atlas.yaml new file mode 100644 index 0000000..4b2e8bd --- /dev/null +++ b/knowledge/catalog/atlas.yaml @@ -0,0 +1,1726 @@ +# Generated by scripts/knowledge_render_atlas.py (do not edit by hand) +cluster: atlas +sources: +- name: ai-llm + path: services/ai-llm + targetNamespace: ai +- name: bstein-dev-home + path: services/bstein-dev-home + targetNamespace: bstein-dev-home +- name: ci-demo + path: services/ci-demo + targetNamespace: null +- name: comms + path: services/comms + targetNamespace: comms +- name: communication + path: services/communication + targetNamespace: comms +- name: core + path: infrastructure/core + targetNamespace: null +- name: crypto + path: services/crypto + targetNamespace: crypto +- name: flux-system + path: clusters/atlas/flux-system + targetNamespace: null +- name: gitea + path: services/gitea + targetNamespace: gitea +- name: gitops-ui + path: services/gitops-ui + targetNamespace: flux-system +- name: harbor + path: services/harbor + targetNamespace: harbor +- name: helm + path: infrastructure/sources/helm + targetNamespace: flux-system +- name: jellyfin + path: services/jellyfin + targetNamespace: jellyfin +- name: jenkins + path: services/jenkins + targetNamespace: jenkins +- name: keycloak + path: services/keycloak + targetNamespace: sso +- name: longhorn-ui + path: infrastructure/longhorn/ui-ingress + targetNamespace: longhorn-system +- name: mailu + path: services/mailu + targetNamespace: mailu-mailserver +- name: metallb + path: infrastructure/metallb + targetNamespace: metallb-system +- name: monerod + path: services/crypto/monerod + targetNamespace: crypto +- name: monitoring + path: services/monitoring + targetNamespace: null +- name: nextcloud + path: services/nextcloud + targetNamespace: nextcloud +- name: nextcloud-mail-sync + path: services/nextcloud-mail-sync + targetNamespace: nextcloud +- name: oauth2-proxy + path: services/oauth2-proxy + targetNamespace: sso +- name: openldap + path: services/openldap + targetNamespace: sso +- name: pegasus + path: services/pegasus + targetNamespace: jellyfin +- name: sui-metrics + path: services/sui-metrics/overlays/atlas + targetNamespace: sui-metrics +- name: traefik + path: infrastructure/traefik + targetNamespace: traefik +- name: vault + path: services/vault + targetNamespace: vault +- name: vault-csi + path: infrastructure/vault-csi + targetNamespace: kube-system +- name: vaultwarden + path: services/vaultwarden + targetNamespace: vaultwarden +- name: xmr-miner + path: services/crypto/xmr-miner + targetNamespace: crypto +workloads: +- kind: Deployment + namespace: ai + name: ollama + labels: + app: ollama + serviceAccountName: null + nodeSelector: {} + images: + - ollama/ollama:latest +- kind: Deployment + namespace: bstein-dev-home + name: bstein-dev-home-backend + labels: + app: bstein-dev-home-backend + serviceAccountName: bstein-dev-home + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 +- kind: Deployment + namespace: bstein-dev-home + name: bstein-dev-home-frontend + labels: + app: bstein-dev-home-frontend + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 +- kind: Deployment + namespace: bstein-dev-home + name: chat-ai-gateway + labels: + app: chat-ai-gateway + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - python:3.11-slim +- kind: Deployment + namespace: ci-demo + name: ci-demo + labels: + app.kubernetes.io/name: ci-demo + serviceAccountName: null + nodeSelector: + hardware: rpi4 + images: + - registry.bstein.dev/infra/ci-demo:v0.0.0-3 +- kind: Deployment + namespace: comms + name: atlasbot + labels: + app: atlasbot + serviceAccountName: atlasbot + nodeSelector: + hardware: rpi5 + images: + - python:3.11-slim +- kind: Deployment + namespace: comms + name: coturn + labels: + app: coturn + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/coturn/coturn:4.6.2 +- kind: Deployment + namespace: comms + name: element-call + labels: + app: element-call + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/element-call:latest +- kind: Deployment + namespace: comms + name: livekit + labels: + app: livekit + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - livekit/livekit-server:v1.9.0 +- kind: Deployment + namespace: comms + name: livekit-token-service + labels: + app: livekit-token-service + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/lk-jwt-service:0.3.0 +- kind: Deployment + namespace: comms + name: matrix-authentication-service + labels: + app: matrix-authentication-service + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/matrix-authentication-service:1.8.0 +- kind: Deployment + namespace: comms + name: matrix-wellknown + labels: + app: matrix-wellknown + serviceAccountName: null + nodeSelector: {} + images: + - nginx:1.27-alpine +- kind: Deployment + namespace: comms + name: othrys-element-element-web + labels: + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/name: element-web + serviceAccountName: othrys-element-element-web + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/element-web:v1.12.6 +- kind: Deployment + namespace: comms + name: othrys-synapse-matrix-synapse + labels: + app.kubernetes.io/component: synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: matrix-synapse + serviceAccountName: default + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/synapse:v1.144.0 +- kind: Deployment + namespace: comms + name: othrys-synapse-redis-master + labels: + app.kubernetes.io/component: master + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 + serviceAccountName: othrys-synapse-redis + nodeSelector: {} + images: + - docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34 +- kind: DaemonSet + namespace: crypto + name: monero-xmrig + labels: + app: monero-xmrig + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - ghcr.io/tari-project/xmrig:latest +- kind: Deployment + namespace: crypto + name: monero-p2pool + labels: + app: monero-p2pool + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - debian:bookworm-slim +- kind: Deployment + namespace: crypto + name: monerod + labels: + app: monerod + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - registry.bstein.dev/crypto/monerod:0.18.4.1 +- kind: Deployment + namespace: flux-system + name: helm-controller + labels: + app: helm-controller + app.kubernetes.io/component: helm-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: helm-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/helm-controller:v1.4.5 +- kind: Deployment + namespace: flux-system + name: image-automation-controller + labels: + app: image-automation-controller + app.kubernetes.io/component: image-automation-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: image-automation-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/image-automation-controller:v1.0.4 +- kind: Deployment + namespace: flux-system + name: image-reflector-controller + labels: + app: image-reflector-controller + app.kubernetes.io/component: image-reflector-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: image-reflector-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/image-reflector-controller:v1.0.4 +- kind: Deployment + namespace: flux-system + name: kustomize-controller + labels: + app: kustomize-controller + app.kubernetes.io/component: kustomize-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: kustomize-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/kustomize-controller:v1.7.3 +- kind: Deployment + namespace: flux-system + name: notification-controller + labels: + app: notification-controller + app.kubernetes.io/component: notification-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: notification-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/notification-controller:v1.7.5 +- kind: Deployment + namespace: flux-system + name: source-controller + labels: + app: source-controller + app.kubernetes.io/component: source-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: source-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/source-controller:v1.7.4 +- kind: Deployment + namespace: gitea + name: gitea + labels: + app: gitea + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - gitea/gitea:1.23 +- kind: Deployment + namespace: jellyfin + name: jellyfin + labels: + app: jellyfin + serviceAccountName: null + nodeSelector: {} + images: + - docker.io/jellyfin/jellyfin:10.11.5 +- kind: Deployment + namespace: jellyfin + name: pegasus + labels: + app: pegasus + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - alpine:3.20 + - registry.bstein.dev/streaming/pegasus:1.2.32 +- kind: Deployment + namespace: jenkins + name: jenkins + labels: + app: jenkins + serviceAccountName: jenkins + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - jenkins/jenkins:2.528.3-jdk21 +- kind: DaemonSet + namespace: kube-system + name: nvidia-device-plugin-jetson + labels: + app.kubernetes.io/instance: jetson + app.kubernetes.io/name: nvidia-device-plugin + serviceAccountName: null + nodeSelector: + jetson: 'true' + kubernetes.io/arch: arm64 + images: + - nvcr.io/nvidia/k8s-device-plugin:v0.16.2 +- kind: DaemonSet + namespace: kube-system + name: nvidia-device-plugin-minipc + labels: + app.kubernetes.io/instance: titan22 + app.kubernetes.io/name: nvidia-device-plugin + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: amd64 + kubernetes.io/hostname: titan-22 + images: + - nvcr.io/nvidia/k8s-device-plugin:v0.16.2 +- kind: DaemonSet + namespace: kube-system + name: nvidia-device-plugin-tethys + labels: + app.kubernetes.io/instance: titan24 + app.kubernetes.io/name: nvidia-device-plugin + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: amd64 + kubernetes.io/hostname: titan-24 + images: + - nvcr.io/nvidia/k8s-device-plugin:v0.16.2 +- kind: DaemonSet + namespace: kube-system + name: vault-csi-provider + labels: + app.kubernetes.io/name: vault-csi-provider + serviceAccountName: vault-csi-provider + nodeSelector: + kubernetes.io/os: linux + images: + - hashicorp/vault-csi-provider:1.7.0 +- kind: Deployment + namespace: longhorn-system + name: oauth2-proxy-longhorn + labels: + app: oauth2-proxy-longhorn + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 +- kind: DaemonSet + namespace: mailu-mailserver + name: vip-controller + labels: + app: vip-controller + serviceAccountName: vip-controller + nodeSelector: + mailu.bstein.dev/vip: 'true' + images: + - lachlanevenson/k8s-kubectl:latest +- kind: Deployment + namespace: mailu-mailserver + name: mailu-sync-listener + labels: + app: mailu-sync-listener + serviceAccountName: null + nodeSelector: {} + images: + - python:3.11-alpine +- kind: DaemonSet + namespace: metallb-system + name: metallb-speaker + labels: + app.kubernetes.io/component: speaker + app.kubernetes.io/instance: metallb + app.kubernetes.io/name: metallb + serviceAccountName: metallb-speaker + nodeSelector: + kubernetes.io/os: linux + images: + - quay.io/frrouting/frr:10.4.1 + - quay.io/metallb/speaker:v0.15.3 +- kind: Deployment + namespace: metallb-system + name: metallb-controller + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: metallb + app.kubernetes.io/name: metallb + serviceAccountName: metallb-controller + nodeSelector: + kubernetes.io/os: linux + images: + - quay.io/metallb/controller:v0.15.3 +- kind: DaemonSet + namespace: monitoring + name: dcgm-exporter + labels: + app: dcgm-exporter + serviceAccountName: default + nodeSelector: {} + images: + - registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 +- kind: Deployment + namespace: monitoring + name: postmark-exporter + labels: + app: postmark-exporter + serviceAccountName: null + nodeSelector: {} + images: + - python:3.12-alpine +- kind: Deployment + namespace: nextcloud + name: nextcloud + labels: + app: nextcloud + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - nextcloud:29-apache +- kind: Deployment + namespace: sso + name: keycloak + labels: + app: keycloak + serviceAccountName: null + nodeSelector: {} + images: + - quay.io/keycloak/keycloak:26.0.7 +- kind: Deployment + namespace: sso + name: oauth2-proxy + labels: + app: oauth2-proxy + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 +- kind: StatefulSet + namespace: sso + name: openldap + labels: + app: openldap + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - docker.io/osixia/openldap:1.5.0 +- kind: Deployment + namespace: sui-metrics + name: sui-metrics + labels: + app: sui-metrics + serviceAccountName: sui-metrics + nodeSelector: + kubernetes.io/hostname: titan-24 + images: + - victoriametrics/vmagent:v1.103.0 +- kind: Deployment + namespace: traefik + name: traefik + labels: + app: traefik + serviceAccountName: traefik-ingress-controller + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - traefik:v3.3.3 +- kind: StatefulSet + namespace: vault + name: vault + labels: + app: vault + serviceAccountName: vault + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - hashicorp/vault:1.17.6 +- kind: Deployment + namespace: vaultwarden + name: vaultwarden + labels: + app: vaultwarden + serviceAccountName: null + nodeSelector: {} + images: + - vaultwarden/server:1.33.2 +services: +- namespace: ai + name: ollama + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: 11434 + protocol: TCP +- namespace: bstein-dev-home + name: bstein-dev-home-backend + type: ClusterIP + selector: + app: bstein-dev-home-backend + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP +- namespace: bstein-dev-home + name: bstein-dev-home-frontend + type: ClusterIP + selector: + app: bstein-dev-home-frontend + ports: + - name: http + port: 80 + targetPort: 80 + protocol: TCP +- namespace: bstein-dev-home + name: chat-ai-gateway + type: ClusterIP + selector: + app: chat-ai-gateway + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP +- namespace: ci-demo + name: ci-demo + type: ClusterIP + selector: + app.kubernetes.io/name: ci-demo + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: comms + name: coturn + type: LoadBalancer + selector: + app: coturn + ports: + - name: turn-udp + port: 3478 + targetPort: 3478 + protocol: UDP + - name: turn-tcp + port: 3478 + targetPort: 3478 + protocol: TCP + - name: turn-tls + port: 5349 + targetPort: 5349 + protocol: TCP + - name: relay-50000 + port: 50000 + targetPort: 50000 + protocol: UDP + - name: relay-50001 + port: 50001 + targetPort: 50001 + protocol: UDP + - name: relay-50002 + port: 50002 + targetPort: 50002 + protocol: UDP + - name: relay-50003 + port: 50003 + targetPort: 50003 + protocol: UDP + - name: relay-50004 + port: 50004 + targetPort: 50004 + protocol: UDP + - name: relay-50005 + port: 50005 + targetPort: 50005 + protocol: UDP + - name: relay-50006 + port: 50006 + targetPort: 50006 + protocol: UDP + - name: relay-50007 + port: 50007 + targetPort: 50007 + protocol: UDP + - name: relay-50008 + port: 50008 + targetPort: 50008 + protocol: UDP + - name: relay-50009 + port: 50009 + targetPort: 50009 + protocol: UDP + - name: relay-50010 + port: 50010 + targetPort: 50010 + protocol: UDP + - name: relay-50011 + port: 50011 + targetPort: 50011 + protocol: UDP + - name: relay-50012 + port: 50012 + targetPort: 50012 + protocol: UDP + - name: relay-50013 + port: 50013 + targetPort: 50013 + protocol: UDP + - name: relay-50014 + port: 50014 + targetPort: 50014 + protocol: UDP + - name: relay-50015 + port: 50015 + targetPort: 50015 + protocol: UDP + - name: relay-50016 + port: 50016 + targetPort: 50016 + protocol: UDP + - name: relay-50017 + port: 50017 + targetPort: 50017 + protocol: UDP + - name: relay-50018 + port: 50018 + targetPort: 50018 + protocol: UDP + - name: relay-50019 + port: 50019 + targetPort: 50019 + protocol: UDP + - name: relay-50020 + port: 50020 + targetPort: 50020 + protocol: UDP + - name: relay-50021 + port: 50021 + targetPort: 50021 + protocol: UDP + - name: relay-50022 + port: 50022 + targetPort: 50022 + protocol: UDP + - name: relay-50023 + port: 50023 + targetPort: 50023 + protocol: UDP + - name: relay-50024 + port: 50024 + targetPort: 50024 + protocol: UDP + - name: relay-50025 + port: 50025 + targetPort: 50025 + protocol: UDP + - name: relay-50026 + port: 50026 + targetPort: 50026 + protocol: UDP + - name: relay-50027 + port: 50027 + targetPort: 50027 + protocol: UDP + - name: relay-50028 + port: 50028 + targetPort: 50028 + protocol: UDP + - name: relay-50029 + port: 50029 + targetPort: 50029 + protocol: UDP + - name: relay-50030 + port: 50030 + targetPort: 50030 + protocol: UDP + - name: relay-50031 + port: 50031 + targetPort: 50031 + protocol: UDP + - name: relay-50032 + port: 50032 + targetPort: 50032 + protocol: UDP + - name: relay-50033 + port: 50033 + targetPort: 50033 + protocol: UDP + - name: relay-50034 + port: 50034 + targetPort: 50034 + protocol: UDP + - name: relay-50035 + port: 50035 + targetPort: 50035 + protocol: UDP + - name: relay-50036 + port: 50036 + targetPort: 50036 + protocol: UDP + - name: relay-50037 + port: 50037 + targetPort: 50037 + protocol: UDP + - name: relay-50038 + port: 50038 + targetPort: 50038 + protocol: UDP + - name: relay-50039 + port: 50039 + targetPort: 50039 + protocol: UDP + - name: relay-50040 + port: 50040 + targetPort: 50040 + protocol: UDP + - name: relay-50041 + port: 50041 + targetPort: 50041 + protocol: UDP + - name: relay-50042 + port: 50042 + targetPort: 50042 + protocol: UDP + - name: relay-50043 + port: 50043 + targetPort: 50043 + protocol: UDP + - name: relay-50044 + port: 50044 + targetPort: 50044 + protocol: UDP + - name: relay-50045 + port: 50045 + targetPort: 50045 + protocol: UDP + - name: relay-50046 + port: 50046 + targetPort: 50046 + protocol: UDP + - name: relay-50047 + port: 50047 + targetPort: 50047 + protocol: UDP + - name: relay-50048 + port: 50048 + targetPort: 50048 + protocol: UDP + - name: relay-50049 + port: 50049 + targetPort: 50049 + protocol: UDP + - name: relay-50050 + port: 50050 + targetPort: 50050 + protocol: UDP +- namespace: comms + name: element-call + type: ClusterIP + selector: + app: element-call + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP +- namespace: comms + name: livekit + type: LoadBalancer + selector: + app: livekit + ports: + - name: http + port: 7880 + targetPort: 7880 + protocol: TCP + - name: rtc-tcp + port: 7881 + targetPort: 7881 + protocol: TCP + - name: rtc-udp-7882 + port: 7882 + targetPort: 7882 + protocol: UDP + - name: rtc-udp-7883 + port: 7883 + targetPort: 7883 + protocol: UDP +- namespace: comms + name: livekit-token-service + type: ClusterIP + selector: + app: livekit-token-service + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP +- namespace: comms + name: matrix-authentication-service + type: ClusterIP + selector: + app: matrix-authentication-service + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP + - name: internal + port: 8081 + targetPort: internal + protocol: TCP +- namespace: comms + name: matrix-wellknown + type: ClusterIP + selector: + app: matrix-wellknown + ports: + - name: http + port: 80 + targetPort: 80 + protocol: TCP +- namespace: comms + name: othrys-element-element-web + type: ClusterIP + selector: + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/name: element-web + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: comms + name: othrys-synapse-matrix-synapse + type: ClusterIP + selector: + app.kubernetes.io/component: synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: matrix-synapse + ports: + - name: http + port: 8008 + targetPort: http + protocol: TCP +- namespace: comms + name: othrys-synapse-redis-headless + type: ClusterIP + selector: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + ports: + - name: tcp-redis + port: 6379 + targetPort: redis + protocol: TCP +- namespace: comms + name: othrys-synapse-redis-master + type: ClusterIP + selector: + app.kubernetes.io/component: master + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + ports: + - name: tcp-redis + port: 6379 + targetPort: redis + protocol: TCP +- namespace: comms + name: othrys-synapse-replication + type: ClusterIP + selector: + app.kubernetes.io/component: synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: matrix-synapse + ports: + - name: replication + port: 9093 + targetPort: replication + protocol: TCP +- namespace: crypto + name: monerod + type: ClusterIP + selector: + app: monerod + ports: + - name: rpc + port: 18081 + targetPort: 18081 + protocol: TCP + - name: p2p + port: 18080 + targetPort: 18080 + protocol: TCP + - name: zmq + port: 18083 + targetPort: 18083 + protocol: TCP +- namespace: crypto + name: p2pool + type: ClusterIP + selector: + app: p2pool + ports: + - name: stratum + port: 3333 + targetPort: 3333 + protocol: TCP +- namespace: flux-system + name: notification-controller + type: ClusterIP + selector: + app: notification-controller + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: flux-system + name: source-controller + type: ClusterIP + selector: + app: source-controller + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: flux-system + name: webhook-receiver + type: ClusterIP + selector: + app: notification-controller + ports: + - name: http + port: 80 + targetPort: http-webhook + protocol: TCP +- namespace: gitea + name: gitea + type: ClusterIP + selector: + app: gitea + ports: + - name: http + port: 3000 + targetPort: 3000 + protocol: TCP +- namespace: gitea + name: gitea-ssh + type: NodePort + selector: + app: gitea + ports: + - name: ssh + port: 2242 + targetPort: 2242 + protocol: TCP +- namespace: jellyfin + name: jellyfin + type: ClusterIP + selector: + app: jellyfin + ports: + - name: http + port: 80 + targetPort: 8096 + protocol: TCP +- namespace: jellyfin + name: pegasus + type: ClusterIP + selector: + app: pegasus + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: jenkins + name: jenkins + type: ClusterIP + selector: + app: jenkins + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP + - name: agent-listener + port: 50000 + targetPort: 50000 + protocol: TCP +- namespace: kube-system + name: traefik + type: LoadBalancer + selector: + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik + ports: + - name: web + port: 80 + targetPort: web + protocol: TCP + - name: websecure + port: 443 + targetPort: websecure + protocol: TCP +- namespace: longhorn-system + name: oauth2-proxy-longhorn + type: ClusterIP + selector: + app: oauth2-proxy-longhorn + ports: + - name: http + port: 80 + targetPort: 4180 + protocol: TCP +- namespace: mailu-mailserver + name: mailu-front-lb + type: LoadBalancer + selector: + app.kubernetes.io/component: front + app.kubernetes.io/instance: mailu + app.kubernetes.io/name: mailu + ports: + - name: smtp + port: 25 + targetPort: 25 + protocol: TCP + - name: smtps + port: 465 + targetPort: 465 + protocol: TCP + - name: submission + port: 587 + targetPort: 587 + protocol: TCP + - name: imaps + port: 993 + targetPort: 993 + protocol: TCP + - name: pop3s + port: 995 + targetPort: 995 + protocol: TCP + - name: sieve + port: 4190 + targetPort: 4190 + protocol: TCP +- namespace: mailu-mailserver + name: mailu-sync-listener + type: ClusterIP + selector: + app: mailu-sync-listener + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP +- namespace: metallb-system + name: metallb-webhook-service + type: ClusterIP + selector: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: metallb + app.kubernetes.io/name: metallb + ports: + - name: null + port: 443 + targetPort: 9443 + protocol: TCP +- namespace: monitoring + name: dcgm-exporter + type: ClusterIP + selector: + app: dcgm-exporter + ports: + - name: metrics + port: 9400 + targetPort: metrics + protocol: TCP +- namespace: monitoring + name: postmark-exporter + type: ClusterIP + selector: + app: postmark-exporter + ports: + - name: http + port: 8000 + targetPort: http + protocol: TCP +- namespace: nextcloud + name: nextcloud + type: ClusterIP + selector: + app: nextcloud + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: sso + name: keycloak + type: ClusterIP + selector: + app: keycloak + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: sso + name: oauth2-proxy + type: ClusterIP + selector: + app: oauth2-proxy + ports: + - name: http + port: 80 + targetPort: 4180 + protocol: TCP +- namespace: sso + name: openldap + type: ClusterIP + selector: + app: openldap + ports: + - name: ldap + port: 389 + targetPort: ldap + protocol: TCP + - name: ldaps + port: 636 + targetPort: ldaps + protocol: TCP +- namespace: sui-metrics + name: sui-metrics + type: ClusterIP + selector: + app: sui-metrics + ports: + - name: http + port: 8429 + targetPort: 8429 + protocol: TCP +- namespace: traefik + name: traefik-metrics + type: ClusterIP + selector: + app: traefik + ports: + - name: metrics + port: 9100 + targetPort: metrics + protocol: TCP +- namespace: vault + name: vault + type: ClusterIP + selector: + app: vault + ports: + - name: api + port: 8200 + targetPort: 8200 + protocol: TCP + - name: cluster + port: 8201 + targetPort: 8201 + protocol: TCP +- namespace: vault + name: vault-internal + type: ClusterIP + selector: + app: vault + ports: + - name: api + port: 8200 + targetPort: 8200 + protocol: TCP + - name: cluster + port: 8201 + targetPort: 8201 + protocol: TCP +- namespace: vaultwarden + name: vaultwarden-service + type: ClusterIP + selector: + app: vaultwarden + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +http_endpoints: +- host: auth.bstein.dev + path: / + backend: + namespace: sso + service: oauth2-proxy + port: 80 + workloads: + - kind: Deployment + name: oauth2-proxy + via: + kind: Ingress + name: oauth2-proxy + source: oauth2-proxy +- host: bstein.dev + path: / + backend: + namespace: bstein-dev-home + service: bstein-dev-home-frontend + port: 80 + workloads: + - kind: Deployment + name: bstein-dev-home-frontend + via: + kind: Ingress + name: bstein-dev-home + source: bstein-dev-home +- host: bstein.dev + path: /.well-known/matrix + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: &id001 + - kind: Deployment + name: othrys-synapse-matrix-synapse + via: + kind: Ingress + name: othrys-synapse-matrix-synapse + source: communication +- host: bstein.dev + path: /api + backend: + namespace: bstein-dev-home + service: bstein-dev-home-backend + port: 80 + workloads: + - kind: Deployment + name: bstein-dev-home-backend + via: + kind: Ingress + name: bstein-dev-home + source: bstein-dev-home +- host: call.live.bstein.dev + path: / + backend: + namespace: comms + service: element-call + port: 80 + workloads: + - kind: Deployment + name: element-call + via: + kind: Ingress + name: element-call + source: communication +- host: chat.ai.bstein.dev + path: / + backend: + namespace: bstein-dev-home + service: chat-ai-gateway + port: 80 + workloads: + - kind: Deployment + name: chat-ai-gateway + via: + kind: Ingress + name: bstein-dev-home + source: bstein-dev-home +- host: ci.bstein.dev + path: / + backend: + namespace: jenkins + service: jenkins + port: http + workloads: + - kind: Deployment + name: jenkins + via: + kind: Ingress + name: jenkins + source: jenkins +- host: cloud.bstein.dev + path: / + backend: + namespace: nextcloud + service: nextcloud + port: 80 + workloads: + - kind: Deployment + name: nextcloud + via: + kind: Ingress + name: nextcloud + source: nextcloud +- host: kit.live.bstein.dev + path: /livekit/jwt + backend: + namespace: comms + service: livekit-token-service + port: 8080 + workloads: + - kind: Deployment + name: livekit-token-service + via: + kind: Ingress + name: livekit-jwt-ingress + source: communication +- host: kit.live.bstein.dev + path: /livekit/sfu + backend: + namespace: comms + service: livekit + port: 7880 + workloads: + - kind: Deployment + name: livekit + via: + kind: Ingress + name: livekit-ingress + source: communication +- host: live.bstein.dev + path: / + backend: + namespace: comms + service: othrys-element-element-web + port: 80 + workloads: + - kind: Deployment + name: othrys-element-element-web + via: + kind: Ingress + name: othrys-element-element-web + source: communication +- host: live.bstein.dev + path: /.well-known/matrix + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: *id001 + via: + kind: Ingress + name: othrys-synapse-matrix-synapse + source: communication +- host: live.bstein.dev + path: /.well-known/matrix/client + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: &id002 + - kind: Deployment + name: matrix-wellknown + via: + kind: Ingress + name: matrix-wellknown + source: communication +- host: live.bstein.dev + path: /.well-known/matrix/server + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id002 + via: + kind: Ingress + name: matrix-wellknown + source: communication +- host: live.bstein.dev + path: /_matrix + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: *id001 + via: + kind: Ingress + name: othrys-synapse-matrix-synapse + source: communication +- host: longhorn.bstein.dev + path: / + backend: + namespace: longhorn-system + service: oauth2-proxy-longhorn + port: 80 + workloads: + - kind: Deployment + name: oauth2-proxy-longhorn + via: + kind: Ingress + name: longhorn-ingress + source: longhorn-ui +- host: mail.bstein.dev + path: / + backend: + namespace: mailu-mailserver + service: mailu-front + port: 443 + workloads: [] + via: + kind: IngressRoute + name: mailu + source: mailu +- host: matrix.live.bstein.dev + path: / + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: &id003 + - kind: Deployment + name: matrix-authentication-service + via: + kind: Ingress + name: matrix-authentication-service + source: communication +- host: matrix.live.bstein.dev + path: /.well-known/matrix/client + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id002 + via: + kind: Ingress + name: matrix-wellknown-matrix-live + source: communication +- host: matrix.live.bstein.dev + path: /.well-known/matrix/server + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id002 + via: + kind: Ingress + name: matrix-wellknown-matrix-live + source: communication +- host: matrix.live.bstein.dev + path: /_matrix + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: *id001 + via: + kind: Ingress + name: othrys-synapse-matrix-synapse + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/login + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: *id003 + via: + kind: Ingress + name: matrix-authentication-service-compat + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/logout + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: *id003 + via: + kind: Ingress + name: matrix-authentication-service-compat + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/refresh + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: *id003 + via: + kind: Ingress + name: matrix-authentication-service-compat + source: communication +- host: matrix.live.bstein.dev + path: /_synapse + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: *id001 + via: + kind: Ingress + name: othrys-synapse-matrix-synapse + source: communication +- host: monero.bstein.dev + path: / + backend: + namespace: crypto + service: monerod + port: 18081 + workloads: + - kind: Deployment + name: monerod + via: + kind: Ingress + name: monerod + source: monerod +- host: pegasus.bstein.dev + path: / + backend: + namespace: jellyfin + service: pegasus + port: 80 + workloads: + - kind: Deployment + name: pegasus + via: + kind: Ingress + name: pegasus + source: pegasus +- host: scm.bstein.dev + path: / + backend: + namespace: gitea + service: gitea + port: 3000 + workloads: + - kind: Deployment + name: gitea + via: + kind: Ingress + name: gitea-ingress + source: gitea +- host: secret.bstein.dev + path: / + backend: + namespace: vault + service: vault + port: 8200 + workloads: + - kind: StatefulSet + name: vault + via: + kind: Ingress + name: vault + source: vault +- host: sso.bstein.dev + path: / + backend: + namespace: sso + service: keycloak + port: 80 + workloads: + - kind: Deployment + name: keycloak + via: + kind: Ingress + name: keycloak + source: keycloak +- host: stream.bstein.dev + path: / + backend: + namespace: jellyfin + service: jellyfin + port: 80 + workloads: + - kind: Deployment + name: jellyfin + via: + kind: Ingress + name: jellyfin + source: jellyfin +- host: vault.bstein.dev + path: / + backend: + namespace: vaultwarden + service: vaultwarden-service + port: 80 + workloads: + - kind: Deployment + name: vaultwarden + via: + kind: Ingress + name: vaultwarden-ingress + source: vaultwarden +helmrelease_host_hints: + gitops-ui:flux-system/weave-gitops: + - cd.bstein.dev + harbor:harbor/harbor: + - registry.bstein.dev + mailu:mailu-mailserver/mailu: + - bstein.dev + - mail.bstein.dev + monitoring:monitoring/alertmanager: + - alerts.bstein.dev + monitoring:monitoring/grafana: + - metrics.bstein.dev + - sso.bstein.dev diff --git a/knowledge/catalog/runbooks.json b/knowledge/catalog/runbooks.json new file mode 100644 index 0000000..d7356ca --- /dev/null +++ b/knowledge/catalog/runbooks.json @@ -0,0 +1,73 @@ +[ + { + "path": "runbooks/ci-gitea-jenkins.md", + "title": "CI: Gitea \u2192 Jenkins pipeline", + "tags": [ + "atlas", + "ci", + "gitea", + "jenkins" + ], + "entrypoints": [ + "scm.bstein.dev", + "ci.bstein.dev" + ], + "source_paths": [ + "services/gitea", + "services/jenkins", + "scripts/jenkins_cred_sync.sh", + "scripts/gitea_cred_sync.sh" + ], + "body": "# CI: Gitea \u2192 Jenkins pipeline\n\n## What this is\nAtlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).\n\n## Where it is configured\n- Gitea manifests: `services/gitea/`\n- Jenkins manifests: `services/jenkins/`\n- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`\n\n## What users do (typical flow)\n- Create a repo in Gitea.\n- Create/update a Jenkins job/pipeline that can fetch the repo.\n- Configure a webhook (or SCM polling) so pushes trigger builds.\n\n## Troubleshooting (common)\n- \u201cWebhook not firing\u201d: confirm ingress host, webhook URL, and Jenkins job is reachable.\n- \u201cAuth denied cloning\u201d: confirm Keycloak group membership and that Jenkins has a valid token/credential configured." + }, + { + "path": "runbooks/kb-authoring.md", + "title": "KB authoring: what to write (and what not to)", + "tags": [ + "atlas", + "kb", + "runbooks" + ], + "entrypoints": [], + "source_paths": [ + "knowledge/runbooks", + "scripts/knowledge_render_atlas.py" + ], + "body": "# KB authoring: what to write (and what not to)\n\n## The goal\nGive Atlas assistants enough grounded, Atlas-specific context to answer \u201chow do I\u2026?\u201d questions without guessing.\n\n## What to capture (high value)\n- User workflows: \u201cclick here, set X, expected result\u201d\n- Operator workflows: \u201cedit these files, reconcile this kustomization, verify with these commands\u201d\n- Wiring: \u201cthis host routes to this service; this service depends on Postgres/Vault/etc\u201d\n- Failure modes: exact error messages + the 2\u20135 checks that usually resolve them\n- Permissions: Keycloak groups/roles and what they unlock\n\n## What to avoid (low value / fluff)\n- Generic Kubernetes explanations (link to upstream docs instead)\n- Copy-pasting large manifests (prefer file paths + small snippets)\n- Anything that will drift quickly (render it from GitOps instead)\n- Any secret values (reference Secret/Vault locations by name only)\n\n## Document pattern (recommended)\nEach runbook should answer:\n- \u201cWhat is this?\u201d\n- \u201cWhat do users do?\u201d\n- \u201cWhat do operators change (where in Git)?\u201d\n- \u201cHow do we verify it works?\u201d\n- \u201cWhat breaks and how to debug it?\u201d" + }, + { + "path": "runbooks/observability.md", + "title": "Observability: Grafana + VictoriaMetrics (how to query safely)", + "tags": [ + "atlas", + "monitoring", + "grafana", + "victoriametrics" + ], + "entrypoints": [ + "metrics.bstein.dev", + "alerts.bstein.dev" + ], + "source_paths": [ + "services/monitoring" + ], + "body": "# Observability: Grafana + VictoriaMetrics (how to query safely)\n\n## Where it is configured\n- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values)\n- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL)\n\n## Using metrics as a \u201ctool\u201d for Atlas assistants\nThe safest pattern is: map a small set of intents \u2192 fixed PromQL queries, then summarize results.\n\nExamples (intents)\n- \u201cIs the cluster healthy?\u201d \u2192 node readiness + pod restart rate\n- \u201cWhy is Element Call failing?\u201d \u2192 LiveKit/coturn pod restarts + synapse errors + ingress 5xx\n- \u201cIs Jenkins slow?\u201d \u2192 pod CPU/memory + HTTP latency metrics (if exported)\n\n## Why dashboards are not the KB\nDashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the\nKB focused on wiring, runbooks, and stable conventions." + }, + { + "path": "runbooks/template.md", + "title": "", + "tags": [ + "atlas", + "", + "" + ], + "entrypoints": [ + "" + ], + "source_paths": [ + "services/", + "clusters/atlas/<...>" + ], + "body": "# \n\n## What this is\n\n## For users (how to)\n\n## For operators (where configured)\n\n## Troubleshooting (symptoms \u2192 checks)" + } +] diff --git a/knowledge/diagrams/atlas-http.mmd b/knowledge/diagrams/atlas-http.mmd new file mode 100644 index 0000000..a6fc2b5 --- /dev/null +++ b/knowledge/diagrams/atlas-http.mmd @@ -0,0 +1,176 @@ +flowchart LR + host_auth_bstein_dev["auth.bstein.dev"] + svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"] + host_auth_bstein_dev --> svc_sso_oauth2_proxy + wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"] + svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy + host_bstein_dev["bstein.dev"] + svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"] + host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend + wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"] + svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend + svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] + host_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] + svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse + svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"] + host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend + wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"] + svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend + host_call_live_bstein_dev["call.live.bstein.dev"] + svc_comms_element_call["comms/element-call (Service)"] + host_call_live_bstein_dev --> svc_comms_element_call + wl_comms_element_call["comms/element-call (Deployment)"] + svc_comms_element_call --> wl_comms_element_call + host_chat_ai_bstein_dev["chat.ai.bstein.dev"] + svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"] + host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway + wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"] + svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway + host_ci_bstein_dev["ci.bstein.dev"] + svc_jenkins_jenkins["jenkins/jenkins (Service)"] + host_ci_bstein_dev --> svc_jenkins_jenkins + wl_jenkins_jenkins["jenkins/jenkins (Deployment)"] + svc_jenkins_jenkins --> wl_jenkins_jenkins + host_cloud_bstein_dev["cloud.bstein.dev"] + svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"] + host_cloud_bstein_dev --> svc_nextcloud_nextcloud + wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"] + svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud + host_kit_live_bstein_dev["kit.live.bstein.dev"] + svc_comms_livekit_token_service["comms/livekit-token-service (Service)"] + host_kit_live_bstein_dev --> svc_comms_livekit_token_service + wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"] + svc_comms_livekit_token_service --> wl_comms_livekit_token_service + svc_comms_livekit["comms/livekit (Service)"] + host_kit_live_bstein_dev --> svc_comms_livekit + wl_comms_livekit["comms/livekit (Deployment)"] + svc_comms_livekit --> wl_comms_livekit + host_live_bstein_dev["live.bstein.dev"] + svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"] + host_live_bstein_dev --> svc_comms_othrys_element_element_web + wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"] + svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web + host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"] + host_live_bstein_dev --> svc_comms_matrix_wellknown + wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"] + svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown + host_longhorn_bstein_dev["longhorn.bstein.dev"] + svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"] + host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn + wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"] + svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn + host_mail_bstein_dev["mail.bstein.dev"] + svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"] + host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front + host_matrix_live_bstein_dev["matrix.live.bstein.dev"] + svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"] + host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service + wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"] + svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service + host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown + host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + host_monero_bstein_dev["monero.bstein.dev"] + svc_crypto_monerod["crypto/monerod (Service)"] + host_monero_bstein_dev --> svc_crypto_monerod + wl_crypto_monerod["crypto/monerod (Deployment)"] + svc_crypto_monerod --> wl_crypto_monerod + host_pegasus_bstein_dev["pegasus.bstein.dev"] + svc_jellyfin_pegasus["jellyfin/pegasus (Service)"] + host_pegasus_bstein_dev --> svc_jellyfin_pegasus + wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"] + svc_jellyfin_pegasus --> wl_jellyfin_pegasus + host_scm_bstein_dev["scm.bstein.dev"] + svc_gitea_gitea["gitea/gitea (Service)"] + host_scm_bstein_dev --> svc_gitea_gitea + wl_gitea_gitea["gitea/gitea (Deployment)"] + svc_gitea_gitea --> wl_gitea_gitea + host_secret_bstein_dev["secret.bstein.dev"] + svc_vault_vault["vault/vault (Service)"] + host_secret_bstein_dev --> svc_vault_vault + wl_vault_vault["vault/vault (StatefulSet)"] + svc_vault_vault --> wl_vault_vault + host_sso_bstein_dev["sso.bstein.dev"] + svc_sso_keycloak["sso/keycloak (Service)"] + host_sso_bstein_dev --> svc_sso_keycloak + wl_sso_keycloak["sso/keycloak (Deployment)"] + svc_sso_keycloak --> wl_sso_keycloak + host_stream_bstein_dev["stream.bstein.dev"] + svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"] + host_stream_bstein_dev --> svc_jellyfin_jellyfin + wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"] + svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin + host_vault_bstein_dev["vault.bstein.dev"] + svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"] + host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service + wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"] + svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden + + subgraph bstein_dev_home[bstein-dev-home] + svc_bstein_dev_home_bstein_dev_home_frontend + wl_bstein_dev_home_bstein_dev_home_frontend + svc_bstein_dev_home_bstein_dev_home_backend + wl_bstein_dev_home_bstein_dev_home_backend + svc_bstein_dev_home_chat_ai_gateway + wl_bstein_dev_home_chat_ai_gateway + end + subgraph comms[comms] + svc_comms_othrys_synapse_matrix_synapse + wl_comms_othrys_synapse_matrix_synapse + svc_comms_element_call + wl_comms_element_call + svc_comms_livekit_token_service + wl_comms_livekit_token_service + svc_comms_livekit + wl_comms_livekit + svc_comms_othrys_element_element_web + wl_comms_othrys_element_element_web + svc_comms_matrix_wellknown + wl_comms_matrix_wellknown + svc_comms_matrix_authentication_service + wl_comms_matrix_authentication_service + end + subgraph crypto[crypto] + svc_crypto_monerod + wl_crypto_monerod + end + subgraph gitea[gitea] + svc_gitea_gitea + wl_gitea_gitea + end + subgraph jellyfin[jellyfin] + svc_jellyfin_pegasus + wl_jellyfin_pegasus + svc_jellyfin_jellyfin + wl_jellyfin_jellyfin + end + subgraph jenkins[jenkins] + svc_jenkins_jenkins + wl_jenkins_jenkins + end + subgraph longhorn_system[longhorn-system] + svc_longhorn_system_oauth2_proxy_longhorn + wl_longhorn_system_oauth2_proxy_longhorn + end + subgraph mailu_mailserver[mailu-mailserver] + svc_mailu_mailserver_mailu_front + end + subgraph nextcloud[nextcloud] + svc_nextcloud_nextcloud + wl_nextcloud_nextcloud + end + subgraph sso[sso] + svc_sso_oauth2_proxy + wl_sso_oauth2_proxy + svc_sso_keycloak + wl_sso_keycloak + end + subgraph vault[vault] + svc_vault_vault + wl_vault_vault + end + subgraph vaultwarden[vaultwarden] + svc_vaultwarden_vaultwarden_service + wl_vaultwarden_vaultwarden + end diff --git a/scripts/knowledge_render_atlas.py b/scripts/knowledge_render_atlas.py new file mode 100644 index 0000000..50ac84c --- /dev/null +++ b/scripts/knowledge_render_atlas.py @@ -0,0 +1,554 @@ +#!/usr/bin/env python3 +"""Render Atlas knowledge artifacts from Flux + kustomize manifests. + +Outputs (committed to git for stable diffs + RAG): +- knowledge/catalog/*.yaml +- knowledge/diagrams/*.mmd + +This is intentionally conservative: +- never includes Secret objects +- never includes secret values +- keeps output deterministic (sorted) +""" + +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterable + +import yaml + +REPO_ROOT = Path(__file__).resolve().parents[1] + +CLUSTER_SCOPED_KINDS = { + "Namespace", + "Node", + "CustomResourceDefinition", + "ClusterRole", + "ClusterRoleBinding", + "StorageClass", + "PersistentVolume", + "MutatingWebhookConfiguration", + "ValidatingWebhookConfiguration", + "APIService", +} + +INCLUDED_KINDS = { + "Namespace", + "Deployment", + "StatefulSet", + "DaemonSet", + "Service", + "Ingress", + "IngressRoute", # traefik + "HelmRelease", # only to harvest ingress hostnames from values +} + + +def _run(cmd: list[str], *, cwd: Path) -> str: + res = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, check=False) + if res.returncode != 0: + raise RuntimeError( + f"Command failed ({res.returncode}): {' '.join(cmd)}\n{res.stderr.strip()}" + ) + return res.stdout + + +def kustomize_build(path: Path) -> str: + rel = path.relative_to(REPO_ROOT) + try: + return _run(["kubectl", "kustomize", str(rel)], cwd=REPO_ROOT) + except Exception as e: + msg = str(e) + if "is not in or below" in msg: + # Repo uses configMapGenerators that reference ../../scripts/*.py. + # Kustomize load restriction must be disabled for a full render. + try: + return _run( + ["kubectl", "kustomize", "--load-restrictor=LoadRestrictionsNone", str(rel)], + cwd=REPO_ROOT, + ) + except Exception: + pass + return _run(["kustomize", "build", "--load-restrictor=LoadRestrictionsNone", str(rel)], cwd=REPO_ROOT) + + +def _iter_docs(raw_yaml: str) -> Iterable[dict[str, Any]]: + for doc in yaml.safe_load_all(raw_yaml): + if not isinstance(doc, dict): + continue + kind = doc.get("kind") + if kind == "List" and isinstance(doc.get("items"), list): + for item in doc["items"]: + if isinstance(item, dict): + yield item + continue + if kind: + yield doc + + +def _meta(doc: dict[str, Any]) -> tuple[str, str | None]: + md = doc.get("metadata") or {} + name = md.get("name") or "" + namespace = md.get("namespace") + return name, namespace + + +def _is_namespaced(doc: dict[str, Any]) -> bool: + kind = doc.get("kind") or "" + return kind not in CLUSTER_SCOPED_KINDS + + +@dataclass(frozen=True) +class FluxKustomization: + name: str + path: str + target_namespace: str | None + + +def find_flux_kustomizations() -> list[FluxKustomization]: + """Find Flux Kustomization CRs under clusters/atlas/flux-system.""" + root = REPO_ROOT / "clusters" / "atlas" / "flux-system" + items: list[FluxKustomization] = [] + for file in sorted(root.rglob("*.yaml")): + raw = file.read_text() + for doc in _iter_docs(raw): + if doc.get("kind") != "Kustomization": + continue + api = str(doc.get("apiVersion") or "") + if not api.startswith("kustomize.toolkit.fluxcd.io/"): + continue + name, _ = _meta(doc) + spec = doc.get("spec") or {} + path = spec.get("path") + if not isinstance(path, str) or not path.strip(): + continue + items.append( + FluxKustomization( + name=name, + path=path.strip().lstrip("./"), + target_namespace=spec.get("targetNamespace"), + ) + ) + return sorted(items, key=lambda k: k.name) + + +def _safe_string_scan_for_hosts(value: Any) -> set[str]: + """Best-effort host scan from HelmRelease values without chart rendering.""" + hosts: set[str] = set() + if isinstance(value, str): + for m in re.finditer(r"(?i)([a-z0-9-]+(?:\.[a-z0-9-]+)+)", value): + host = m.group(1).lower() + if host.endswith("bstein.dev"): + hosts.add(host) + return hosts + if isinstance(value, list): + for item in value: + hosts |= _safe_string_scan_for_hosts(item) + return hosts + if isinstance(value, dict): + for item in value.values(): + hosts |= _safe_string_scan_for_hosts(item) + return hosts + return hosts + + +def _service_ports(svc: dict[str, Any]) -> list[dict[str, Any]]: + spec = svc.get("spec") or {} + out: list[dict[str, Any]] = [] + for p in spec.get("ports") or []: + if not isinstance(p, dict): + continue + out.append( + { + "name": p.get("name"), + "port": p.get("port"), + "targetPort": p.get("targetPort"), + "protocol": p.get("protocol", "TCP"), + } + ) + return out + + +def _workload_labels(doc: dict[str, Any]) -> dict[str, str]: + tpl = (doc.get("spec") or {}).get("template") or {} + md = tpl.get("metadata") or {} + labels = md.get("labels") or {} + return {str(k): str(v) for k, v in labels.items()} if isinstance(labels, dict) else {} + + +def _service_selector(doc: dict[str, Any]) -> dict[str, str]: + spec = doc.get("spec") or {} + sel = spec.get("selector") or {} + return {str(k): str(v) for k, v in sel.items()} if isinstance(sel, dict) else {} + + +def _selector_matches(selector: dict[str, str], labels: dict[str, str]) -> bool: + if not selector: + return False + return all(labels.get(k) == v for k, v in selector.items()) + + +def _sanitize_node_id(text: str) -> str: + return re.sub(r"[^a-zA-Z0-9_]", "_", text) + + +def extract_catalog( + rendered: list[tuple[FluxKustomization, list[dict[str, Any]]]], +) -> tuple[dict[str, Any], dict[str, Any], str]: + """Build knowledge catalog + mermaid diagram from rendered docs.""" + # Index workloads and services for mapping. + workloads: dict[tuple[str, str], dict[str, Any]] = {} + services: dict[tuple[str, str], dict[str, Any]] = {} + ingresses: list[dict[str, Any]] = [] + ingressroutes: list[dict[str, Any]] = [] + helmrelease_hosts: dict[str, list[str]] = {} + + for src, docs in rendered: + for doc in docs: + kind = doc.get("kind") + if kind not in INCLUDED_KINDS: + continue + if kind == "Secret": + continue + + name, namespace = _meta(doc) + if _is_namespaced(doc) and not namespace and src.target_namespace: + namespace = src.target_namespace + doc = dict(doc) + doc.setdefault("metadata", {})["namespace"] = namespace + + if kind in ("Deployment", "StatefulSet", "DaemonSet"): + workloads[(namespace or "", name)] = { + "kind": kind, + "namespace": namespace or "", + "name": name, + "labels": _workload_labels(doc), + "serviceAccountName": ((doc.get("spec") or {}).get("template") or {}) + .get("spec", {}) + .get("serviceAccountName"), + "nodeSelector": ((doc.get("spec") or {}).get("template") or {}) + .get("spec", {}) + .get("nodeSelector", {}), + "images": sorted( + { + c.get("image") + for c in ( + (((doc.get("spec") or {}).get("template") or {}).get("spec") or {}).get( + "containers" + ) + or [] + ) + if isinstance(c, dict) and c.get("image") + } + ), + } + elif kind == "Service": + services[(namespace or "", name)] = { + "namespace": namespace or "", + "name": name, + "type": (doc.get("spec") or {}).get("type", "ClusterIP"), + "selector": _service_selector(doc), + "ports": _service_ports(doc), + } + elif kind == "Ingress": + ingresses.append({"source": src.name, "doc": doc}) + elif kind == "IngressRoute": + ingressroutes.append({"source": src.name, "doc": doc}) + elif kind == "HelmRelease": + spec = doc.get("spec") or {} + vals = spec.get("values") or {} + hosts = sorted(_safe_string_scan_for_hosts(vals)) + if hosts: + helmrelease_hosts[f"{src.name}:{namespace or ''}/{name}"] = hosts + + # Map services to workloads. + service_to_workloads: dict[tuple[str, str], list[dict[str, str]]] = {} + for (ns, svc_name), svc in services.items(): + selector = svc.get("selector") or {} + matches: list[dict[str, str]] = [] + for (w_ns, w_name), w in workloads.items(): + if w_ns != ns: + continue + if _selector_matches(selector, w.get("labels") or {}): + matches.append({"kind": w["kind"], "name": w_name}) + service_to_workloads[(ns, svc_name)] = sorted(matches, key=lambda m: (m["kind"], m["name"])) + + # Extract HTTP endpoints. + endpoints: list[dict[str, Any]] = [] + + def add_endpoint( + *, + host: str, + path: str, + namespace: str, + service: str, + port: Any, + source: str, + kind: str, + obj_name: str, + ): + wk = service_to_workloads.get((namespace, service), []) + endpoints.append( + { + "host": host, + "path": path, + "backend": { + "namespace": namespace, + "service": service, + "port": port, + "workloads": wk, + }, + "via": {"kind": kind, "name": obj_name, "source": source}, + } + ) + + for item in ingresses: + doc = item["doc"] + source = item["source"] + name, namespace = _meta(doc) + namespace = namespace or "" + spec = doc.get("spec") or {} + for rule in spec.get("rules") or []: + if not isinstance(rule, dict): + continue + host = (rule.get("host") or "").strip() + http = rule.get("http") or {} + for p in http.get("paths") or []: + if not isinstance(p, dict): + continue + backend = (p.get("backend") or {}).get("service") or {} + svc_name = backend.get("name") + svc_port = (backend.get("port") or {}).get("number") or (backend.get("port") or {}).get("name") + if not host or not svc_name: + continue + add_endpoint( + host=host, + path=p.get("path") or "/", + namespace=namespace, + service=svc_name, + port=svc_port, + source=source, + kind="Ingress", + obj_name=name, + ) + + host_re = re.compile(r"Host\(`([^`]+)`\)") + pathprefix_re = re.compile(r"PathPrefix\(`([^`]+)`\)") + for item in ingressroutes: + doc = item["doc"] + source = item["source"] + name, namespace = _meta(doc) + namespace = namespace or "" + spec = doc.get("spec") or {} + for route in spec.get("routes") or []: + if not isinstance(route, dict): + continue + match = route.get("match") or "" + hosts = host_re.findall(match) + pathprefixes = pathprefix_re.findall(match) or ["/"] + for svc in route.get("services") or []: + if not isinstance(svc, dict): + continue + svc_name = svc.get("name") + svc_port = svc.get("port") + if not svc_name: + continue + for host in hosts: + for pp in pathprefixes: + add_endpoint( + host=host, + path=pp, + namespace=namespace, + service=svc_name, + port=svc_port, + source=source, + kind="IngressRoute", + obj_name=name, + ) + + endpoints = sorted( + endpoints, + key=lambda e: ( + e["host"], + e["path"], + e["backend"]["namespace"], + e["backend"]["service"], + ), + ) + + catalog = { + "cluster": "atlas", + "sources": [ + {"name": k.name, "path": k.path, "targetNamespace": k.target_namespace} + for k, _ in rendered + ], + "workloads": sorted( + list(workloads.values()), + key=lambda w: (w["namespace"], w["kind"], w["name"]), + ), + "services": sorted( + list(services.values()), + key=lambda s: (s["namespace"], s["name"]), + ), + "http_endpoints": endpoints, + "helmrelease_host_hints": {k: v for k, v in sorted(helmrelease_hosts.items())}, + } + + # Mermaid diagram: host -> service -> workload (grouped by namespace). + ns_nodes: dict[str, list[str]] = {} + lines: list[str] = ["flowchart LR"] + edges: set[tuple[str, str]] = set() + + def ensure_ns_node(ns: str, node_id: str): + ns_nodes.setdefault(ns, []) + if node_id not in ns_nodes[ns]: + ns_nodes[ns].append(node_id) + + host_nodes: dict[str, str] = {} + + for ep in endpoints: + host = ep["host"] + host_id = host_nodes.get(host) + if not host_id: + host_id = f"host_{_sanitize_node_id(host)}" + host_nodes[host] = host_id + lines.append(f' {host_id}["{host}"]') + + ns = ep["backend"]["namespace"] + svc = ep["backend"]["service"] + svc_id = f"svc_{_sanitize_node_id(ns)}_{_sanitize_node_id(svc)}" + if svc_id not in ns_nodes.get(ns, []): + lines.append(f' {svc_id}["{ns}/{svc} (Service)"]') + ensure_ns_node(ns, svc_id) + + if (host_id, svc_id) not in edges: + edges.add((host_id, svc_id)) + lines.append(f" {host_id} --> {svc_id}") + + for w in ep["backend"]["workloads"]: + w_id = f"wl_{_sanitize_node_id(ns)}_{_sanitize_node_id(w['name'])}" + if w_id not in ns_nodes.get(ns, []): + lines.append(f' {w_id}["{ns}/{w["name"]} ({w["kind"]})"]') + ensure_ns_node(ns, w_id) + if (svc_id, w_id) not in edges: + edges.add((svc_id, w_id)) + lines.append(f" {svc_id} --> {w_id}") + + # Wrap namespace subgraphs at the end for stability (sorted namespaces). + if ns_nodes: + lines.append("") + for ns in sorted(ns_nodes.keys()): + lines.append(f" subgraph { _sanitize_node_id(ns) }[{ns}]") + for node_id in ns_nodes[ns]: + lines.append(f" {node_id}") + lines.append(" end") + + diagram = "\n".join(lines).rstrip() + "\n" + + summary = { + "counts": { + "workloads": len(workloads), + "services": len(services), + "http_endpoints": len(endpoints), + "helmrelease_host_hints": sum(len(v) for v in helmrelease_hosts.values()), + } + } + + return catalog, summary, diagram + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--out", default="knowledge", help="Output base directory (default: knowledge/)") + ap.add_argument( + "--write", + action="store_true", + help="Write generated files (otherwise just print a summary).", + ) + args = ap.parse_args() + + out_dir = REPO_ROOT / args.out + flux = find_flux_kustomizations() + if not flux: + print("No Flux Kustomizations found under clusters/atlas/flux-system.", file=sys.stderr) + return 2 + + rendered: list[tuple[FluxKustomization, list[dict[str, Any]]]] = [] + for k in flux: + path = REPO_ROOT / k.path + if not path.exists(): + continue + raw = kustomize_build(path) + docs = [d for d in _iter_docs(raw) if d.get("kind") != "Secret"] + rendered.append((k, docs)) + + rendered = sorted(rendered, key=lambda item: item[0].name) + catalog, summary, diagram = extract_catalog(rendered) + + if not args.write: + print(json.dumps(summary, indent=2, sort_keys=True)) + return 0 + + (out_dir / "catalog").mkdir(parents=True, exist_ok=True) + (out_dir / "diagrams").mkdir(parents=True, exist_ok=True) + + catalog_path = out_dir / "catalog" / "atlas.yaml" + catalog_json_path = out_dir / "catalog" / "atlas.json" + summary_path = out_dir / "catalog" / "atlas-summary.json" + diagram_path = out_dir / "diagrams" / "atlas-http.mmd" + runbooks_json_path = out_dir / "catalog" / "runbooks.json" + + catalog_path.write_text( + "# Generated by scripts/knowledge_render_atlas.py (do not edit by hand)\n" + + yaml.safe_dump(catalog, sort_keys=False), + encoding="utf-8", + ) + catalog_json_path.write_text(json.dumps(catalog, indent=2, sort_keys=False) + "\n", encoding="utf-8") + summary_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8") + diagram_path.write_text(diagram, encoding="utf-8") + + # Render runbooks into JSON for lightweight, dependency-free consumption in-cluster. + runbooks_dir = out_dir / "runbooks" + runbooks: list[dict[str, Any]] = [] + if runbooks_dir.exists(): + for md_file in sorted(runbooks_dir.glob("*.md")): + raw = md_file.read_text(encoding="utf-8") + fm: dict[str, Any] = {} + body = raw + if raw.startswith("---\n"): + try: + _, rest = raw.split("---\n", 1) + fm_raw, body = rest.split("\n---\n", 1) + fm = yaml.safe_load(fm_raw) or {} + except Exception: + fm = {} + body = raw + runbooks.append( + { + "path": str(md_file.relative_to(out_dir)), + "title": fm.get("title") or md_file.stem, + "tags": fm.get("tags") or [], + "entrypoints": fm.get("entrypoints") or [], + "source_paths": fm.get("source_paths") or [], + "body": body.strip(), + } + ) + runbooks_json_path.write_text(json.dumps(runbooks, indent=2, sort_keys=False) + "\n", encoding="utf-8") + + print(f"Wrote {catalog_path.relative_to(REPO_ROOT)}") + print(f"Wrote {catalog_json_path.relative_to(REPO_ROOT)}") + print(f"Wrote {summary_path.relative_to(REPO_ROOT)}") + print(f"Wrote {diagram_path.relative_to(REPO_ROOT)}") + print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index 034d55e..0b4e38e 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -5,22 +5,74 @@ metadata: name: atlasbot data: bot.py: | - import json, os, time, collections, re - from urllib import request, parse, error + import collections + import json + import os + import re + import ssl + import time + from urllib import error, parse, request BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080") USER = os.environ["BOT_USER"] PASSWORD = os.environ["BOT_PASS"] ROOM_ALIAS = "#othrys:live.bstein.dev" + OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") API_KEY = os.environ.get("CHAT_API_KEY", "") + + KB_DIR = os.environ.get("KB_DIR", "") + VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428") + BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{USER},atlas") SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") + + MAX_KB_CHARS = int(os.environ.get("ATLASBOT_MAX_KB_CHARS", "2500")) + MAX_TOOL_CHARS = int(os.environ.get("ATLASBOT_MAX_TOOL_CHARS", "2500")) + + TOKEN_RE = re.compile(r"[a-z0-9][a-z0-9_.-]{1,}", re.IGNORECASE) + HOST_RE = re.compile(r"(?i)([a-z0-9-]+(?:\\.[a-z0-9-]+)+)") + STOPWORDS = { + "the", + "and", + "for", + "with", + "this", + "that", + "from", + "into", + "what", + "how", + "why", + "when", + "where", + "which", + "who", + "can", + "could", + "should", + "would", + "please", + "help", + "atlas", + "othrys", + } + + def _tokens(text: str) -> list[str]: + toks = [t.lower() for t in TOKEN_RE.findall(text or "")] + return [t for t in toks if t not in STOPWORDS and len(t) >= 2] + + + # Mention detection (Matrix rich mentions + plain @atlas). MENTION_TOKENS = [m.strip() for m in BOT_MENTIONS.split(",") if m.strip()] MENTION_LOCALPARTS = [m.lstrip("@").split(":", 1)[0] for m in MENTION_TOKENS] - MENTION_RE = re.compile(r"(? str: t = token.strip() if not t: @@ -43,6 +95,8 @@ data: return False return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids) + + # Matrix HTTP helper. def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): url = (base or BASE) + path data = None @@ -78,31 +132,317 @@ data: path = f"/_matrix/client/v3/rooms/{parse.quote(room)}/send/m.room.message" req("POST", path, token, body={"msgtype": "m.text", "body": text}) - history = collections.defaultdict(list) # (room_id, sender|None) -> list of str (short transcript) + + # Atlas KB loader (no external deps; files are pre-rendered JSON via scripts/knowledge_render_atlas.py). + KB = {"catalog": {}, "runbooks": []} + _HOST_INDEX: dict[str, list[dict]] = {} + _NAME_INDEX: set[str] = set() + + def _load_json_file(path: str) -> Any | None: + try: + with open(path, "rb") as f: + return json.loads(f.read().decode("utf-8")) + except Exception: + return None + + def load_kb(): + global KB, _HOST_INDEX, _NAME_INDEX + if not KB_DIR: + return + catalog = _load_json_file(os.path.join(KB_DIR, "catalog", "atlas.json")) or {} + runbooks = _load_json_file(os.path.join(KB_DIR, "catalog", "runbooks.json")) or [] + KB = {"catalog": catalog, "runbooks": runbooks} + + host_index: dict[str, list[dict]] = collections.defaultdict(list) + for ep in catalog.get("http_endpoints", []) if isinstance(catalog, dict) else []: + host = (ep.get("host") or "").lower() + if host: + host_index[host].append(ep) + _HOST_INDEX = {k: host_index[k] for k in sorted(host_index.keys())} + + names: set[str] = set() + for s in catalog.get("services", []) if isinstance(catalog, dict) else []: + if isinstance(s, dict) and s.get("name"): + names.add(str(s["name"]).lower()) + for w in catalog.get("workloads", []) if isinstance(catalog, dict) else []: + if isinstance(w, dict) and w.get("name"): + names.add(str(w["name"]).lower()) + _NAME_INDEX = names + + def kb_retrieve(query: str, *, limit: int = 3) -> str: + q = (query or "").strip() + if not q or not KB.get("runbooks"): + return "" + ql = q.lower() + q_tokens = _tokens(q) + if not q_tokens: + return "" + + scored: list[tuple[int, dict]] = [] + for doc in KB.get("runbooks", []): + if not isinstance(doc, dict): + continue + title = str(doc.get("title") or "") + body = str(doc.get("body") or "") + tags = doc.get("tags") or [] + entrypoints = doc.get("entrypoints") or [] + hay = (title + "\n" + " ".join(tags) + "\n" + " ".join(entrypoints) + "\n" + body).lower() + score = 0 + for t in set(q_tokens): + if t in hay: + score += 3 if t in title.lower() else 1 + for h in entrypoints: + if isinstance(h, str) and h.lower() in ql: + score += 4 + if score: + scored.append((score, doc)) + + scored.sort(key=lambda x: x[0], reverse=True) + picked = [d for _, d in scored[:limit]] + if not picked: + return "" + + parts: list[str] = ["Atlas KB (retrieved):"] + used = 0 + for d in picked: + path = d.get("path") or "" + title = d.get("title") or path + body = (d.get("body") or "").strip() + snippet = body[:900].strip() + chunk = f"- {title} ({path})\n{snippet}" + if used + len(chunk) > MAX_KB_CHARS: + break + parts.append(chunk) + used += len(chunk) + return "\n".join(parts).strip() + + def catalog_hints(query: str) -> tuple[str, list[tuple[str, str]]]: + q = (query or "").strip() + if not q or not KB.get("catalog"): + return "", [] + ql = q.lower() + hosts = {m.group(1).lower() for m in HOST_RE.finditer(ql) if m.group(1).lower().endswith("bstein.dev")} + + # Also match by known workload/service names. + for t in _tokens(ql): + if t in _NAME_INDEX: + hosts |= {ep["host"].lower() for ep in KB["catalog"].get("http_endpoints", []) if isinstance(ep, dict) and ep.get("backend", {}).get("service") == t} + + edges: list[tuple[str, str]] = [] + lines: list[str] = [] + for host in sorted(hosts): + for ep in _HOST_INDEX.get(host, []): + backend = ep.get("backend") or {} + ns = backend.get("namespace") or "" + svc = backend.get("service") or "" + path = ep.get("path") or "/" + if not svc: + continue + wk = backend.get("workloads") or [] + wk_str = ", ".join(f"{w.get('kind')}:{w.get('name')}" for w in wk if isinstance(w, dict) and w.get("name")) or "unknown" + lines.append(f"- {host}{path} → {ns}/{svc} → {wk_str}") + for w in wk: + if isinstance(w, dict) and w.get("name"): + edges.append((ns, str(w["name"]))) + if not lines: + return "", [] + return "Atlas endpoints (from GitOps):\n" + "\n".join(lines[:20]), edges + + + # Kubernetes API (read-only). RBAC is provided via ServiceAccount atlasbot. + _K8S_TOKEN: str | None = None + _K8S_CTX: ssl.SSLContext | None = None + + def _k8s_context() -> ssl.SSLContext: + global _K8S_CTX + if _K8S_CTX is not None: + return _K8S_CTX + ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + ctx = ssl.create_default_context(cafile=ca_path) + _K8S_CTX = ctx + return ctx + + def _k8s_token() -> str: + global _K8S_TOKEN + if _K8S_TOKEN: + return _K8S_TOKEN + token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" + with open(token_path, "r", encoding="utf-8") as f: + _K8S_TOKEN = f.read().strip() + return _K8S_TOKEN + + def k8s_get(path: str, timeout: int = 8) -> dict: + host = os.environ.get("KUBERNETES_SERVICE_HOST") + port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS") or os.environ.get("KUBERNETES_SERVICE_PORT") or "443" + if not host: + raise RuntimeError("k8s host missing") + url = f"https://{host}:{port}{path}" + headers = {"Authorization": f"Bearer {_k8s_token()}"} + r = request.Request(url, headers=headers, method="GET") + with request.urlopen(r, timeout=timeout, context=_k8s_context()) as resp: + raw = resp.read() + return json.loads(raw.decode()) if raw else {} + + def k8s_pods(namespace: str) -> list[dict]: + data = k8s_get(f"/api/v1/namespaces/{parse.quote(namespace)}/pods?limit=500") + items = data.get("items") or [] + return items if isinstance(items, list) else [] + + def summarize_pods(namespace: str, prefixes: set[str] | None = None) -> str: + try: + pods = k8s_pods(namespace) + except Exception: + return "" + out: list[str] = [] + for p in pods: + md = p.get("metadata") or {} + st = p.get("status") or {} + name = md.get("name") or "" + if prefixes and not any(name.startswith(pref + "-") or name == pref or name.startswith(pref) for pref in prefixes): + continue + phase = st.get("phase") or "?" + cs = st.get("containerStatuses") or [] + restarts = 0 + ready = 0 + total = 0 + reason = st.get("reason") or "" + for c in cs if isinstance(cs, list) else []: + if not isinstance(c, dict): + continue + total += 1 + restarts += int(c.get("restartCount") or 0) + if c.get("ready"): + ready += 1 + state = c.get("state") or {} + if not reason and isinstance(state, dict): + waiting = state.get("waiting") or {} + if isinstance(waiting, dict) and waiting.get("reason"): + reason = waiting.get("reason") + extra = f" ({reason})" if reason else "" + out.append(f"- {namespace}/{name}: {phase} {ready}/{total} restarts={restarts}{extra}") + return "\n".join(out[:20]) + + def flux_not_ready() -> str: + try: + data = k8s_get( + "/apis/kustomize.toolkit.fluxcd.io/v1/namespaces/flux-system/kustomizations?limit=200" + ) + except Exception: + return "" + items = data.get("items") or [] + bad: list[str] = [] + for it in items if isinstance(items, list) else []: + md = it.get("metadata") or {} + st = it.get("status") or {} + name = md.get("name") or "" + conds = st.get("conditions") or [] + ready = None + msg = "" + for c in conds if isinstance(conds, list) else []: + if isinstance(c, dict) and c.get("type") == "Ready": + ready = c.get("status") + msg = c.get("message") or "" + if ready not in ("True", True): + bad.append(f"- flux kustomization/{name}: Ready={ready} {msg}".strip()) + return "\n".join(bad[:10]) + + + # VictoriaMetrics (PromQL) helpers. + def vm_query(query: str, timeout: int = 8) -> dict | None: + try: + url = VM_URL.rstrip("/") + "/api/v1/query?" + parse.urlencode({"query": query}) + with request.urlopen(url, timeout=timeout) as resp: + return json.loads(resp.read().decode()) + except Exception: + return None + + def vm_top_restarts(hours: int = 1) -> str: + q = f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{hours}h])))" + res = vm_query(q) + if not res or (res.get("status") != "success"): + return "" + out: list[str] = [] + for r in (res.get("data") or {}).get("result") or []: + if not isinstance(r, dict): + continue + m = r.get("metric") or {} + v = r.get("value") or [] + ns = (m.get("namespace") or "").strip() + pod = (m.get("pod") or "").strip() + val = v[1] if isinstance(v, list) and len(v) > 1 else "" + if pod: + out.append(f"- restarts({hours}h): {ns}/{pod} = {val}") + return "\n".join(out) + + + # Conversation state. + history = collections.defaultdict(list) # (room_id, sender|None) -> list[str] (short transcript) def key_for(room_id: str, sender: str, is_dm: bool): return (room_id, None) if is_dm else (room_id, sender) - def ollama_reply(hist_key, prompt: str) -> str: + def build_context(prompt: str, *, allow_tools: bool, targets: list[tuple[str, str]]) -> str: + parts: list[str] = [] + + kb = kb_retrieve(prompt) + if kb: + parts.append(kb) + + endpoints, edges = catalog_hints(prompt) + if endpoints: + parts.append(endpoints) + + if allow_tools: + # Scope pod summaries to relevant namespaces/workloads when possible. + prefixes_by_ns: dict[str, set[str]] = collections.defaultdict(set) + for ns, name in (targets or []) + (edges or []): + if ns and name: + prefixes_by_ns[ns].add(name) + pod_lines: list[str] = [] + for ns in sorted(prefixes_by_ns.keys()): + summary = summarize_pods(ns, prefixes_by_ns[ns]) + if summary: + pod_lines.append(f"Pods (live):\n{summary}") + if pod_lines: + parts.append("\n".join(pod_lines)[:MAX_TOOL_CHARS]) + + flux_bad = flux_not_ready() + if flux_bad: + parts.append("Flux (not ready):\n" + flux_bad) + + restarts = vm_top_restarts(1) + if restarts: + parts.append("VictoriaMetrics (top restarts 1h):\n" + restarts) + + return "\n\n".join([p for p in parts if p]).strip() + + def ollama_reply(hist_key, prompt: str, *, context: str) -> str: try: - # Keep short context as plain text transcript - transcript = "\n".join( - ["System: You are Atlas, the Titan lab assistant for Othrys. Be helpful, direct, and concise."] - + history[hist_key][-24:] - + [f"User: {prompt}"] + system = ( + "System: You are Atlas, the Titan lab assistant for Atlas/Othrys. " + "Be helpful, direct, and concise. " + "Prefer answering with exact repo paths and Kubernetes resource names. " + "Never include or request secret values." ) + transcript_parts = [system] + if context: + transcript_parts.append("Context (grounded):\n" + context[:MAX_KB_CHARS]) + transcript_parts.extend(history[hist_key][-24:]) + transcript_parts.append(f"User: {prompt}") + transcript = "\n".join(transcript_parts) + payload = {"model": MODEL, "message": transcript} headers = {"Content-Type": "application/json"} if API_KEY: headers["x-api-key"] = API_KEY r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers) - with request.urlopen(r, timeout=15) as resp: + with request.urlopen(r, timeout=20) as resp: data = json.loads(resp.read().decode()) reply = data.get("message") or data.get("response") or data.get("reply") or "I'm here to help." history[hist_key].append(f"Atlas: {reply}") return reply except Exception: - return "Hi! I'm Atlas." + return "I’m here — but I couldn’t reach the model backend." def sync_loop(token: str, room_id: str): since = None @@ -111,6 +451,7 @@ data: since = res.get("next_batch") except Exception: pass + while True: params = {"timeout": 30000} if since: @@ -133,28 +474,48 @@ data: # messages for rid, data in res.get("rooms", {}).get("join", {}).items(): timeline = data.get("timeline", {}).get("events", []) + joined_count = data.get("summary", {}).get("m.joined_member_count") + is_dm = joined_count is not None and joined_count <= 2 + for ev in timeline: if ev.get("type") != "m.room.message": continue content = ev.get("content", {}) - body = content.get("body", "") - if not body.strip(): + body = (content.get("body", "") or "").strip() + if not body: continue sender = ev.get("sender", "") if sender == f"@{USER}:live.bstein.dev": continue - # Only respond if bot is mentioned or in a DM - joined_count = data.get("summary", {}).get("m.joined_member_count") - is_dm = joined_count is not None and joined_count <= 2 + mentioned = is_mentioned(content, body) hist_key = key_for(rid, sender, is_dm) history[hist_key].append(f"{sender}: {body}") history[hist_key] = history[hist_key][-80:] - if is_dm or mentioned: - reply = ollama_reply(hist_key, body) - send_msg(token, rid, reply) + + if not (is_dm or mentioned): + continue + + # Only do live cluster/metrics introspection in DMs. + allow_tools = is_dm + + # Attempt to scope tools to the most likely workloads when hostnames are mentioned. + targets: list[tuple[str, str]] = [] + for m in HOST_RE.finditer(body.lower()): + host = m.group(1).lower() + for ep in _HOST_INDEX.get(host, []): + backend = ep.get("backend") or {} + ns = backend.get("namespace") or "" + for w in backend.get("workloads") or []: + if isinstance(w, dict) and w.get("name"): + targets.append((ns, str(w["name"]))) + + context = build_context(body, allow_tools=allow_tools, targets=targets) + reply = ollama_reply(hist_key, body, context=context) + send_msg(token, rid, reply) def main(): + load_kb() token = login() try: room_id = resolve_alias(token, ROOM_ALIAS) diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index fbb9b3d..febd675 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -16,8 +16,9 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: c57538d33dc02db7aaf7b2f4681f50620c2cbcde8ddc1c51ccb5fa693247b00a + checksum/atlasbot-configmap: b9796738bbbc50fd5c70db0bd4fbffe986fd2728a7487186e39ff7ecabefbd1e spec: + serviceAccountName: atlasbot nodeSelector: hardware: rpi5 containers: @@ -32,6 +33,10 @@ spec: value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE value: http://matrix-authentication-service:8080 + - name: KB_DIR + value: /kb + - name: VM_URL + value: http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428 - name: BOT_USER value: atlasbot - name: BOT_PASS @@ -59,7 +64,24 @@ spec: - name: code mountPath: /app/bot.py subPath: bot.py + - name: kb + mountPath: /kb + readOnly: true volumes: - name: code configMap: name: atlasbot + - name: kb + configMap: + name: atlas-kb + items: + - key: INDEX.md + path: INDEX.md + - key: atlas.json + path: catalog/atlas.json + - key: atlas-summary.json + path: catalog/atlas-summary.json + - key: runbooks.json + path: catalog/runbooks.json + - key: atlas-http.mmd + path: diagrams/atlas-http.mmd diff --git a/services/communication/atlasbot-rbac.yaml b/services/communication/atlasbot-rbac.yaml new file mode 100644 index 0000000..59685d0 --- /dev/null +++ b/services/communication/atlasbot-rbac.yaml @@ -0,0 +1,47 @@ +# services/communication/atlasbot-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: atlasbot + namespace: comms +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: atlasbot-readonly +rules: + - apiGroups: [""] + resources: ["namespaces", "nodes", "pods", "services", "endpoints", "events"] + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["networking.k8s.io"] + resources: ["ingresses"] + verbs: ["get", "list", "watch"] + - apiGroups: ["traefik.io"] + resources: ["ingressroutes", "middlewares", "serverstransports"] + verbs: ["get", "list", "watch"] + - apiGroups: ["kustomize.toolkit.fluxcd.io"] + resources: ["kustomizations"] + verbs: ["get", "list", "watch"] + - apiGroups: ["helm.toolkit.fluxcd.io"] + resources: ["helmreleases"] + verbs: ["get", "list", "watch"] + - apiGroups: ["source.toolkit.fluxcd.io"] + resources: ["gitrepositories", "helmrepositories", "buckets"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: atlasbot-readonly +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: atlasbot-readonly +subjects: + - kind: ServiceAccount + name: atlasbot + namespace: comms + diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 2cf8b4f..5b71f75 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: comms resources: + - atlasbot-rbac.yaml - synapse-rendered.yaml - synapse-signingkey-ensure-job.yaml - mas-configmap.yaml @@ -29,3 +30,12 @@ resources: patchesStrategicMerge: - synapse-deployment-strategy-patch.yaml + +configMapGenerator: + - name: atlas-kb + files: + - INDEX.md=../../knowledge/INDEX.md + - atlas.json=../../knowledge/catalog/atlas.json + - atlas-summary.json=../../knowledge/catalog/atlas-summary.json + - runbooks.json=../../knowledge/catalog/runbooks.json + - atlas-http.mmd=../../knowledge/diagrams/atlas-http.mmd -- 2.47.2 From 91d4ecf451f0c2e2c0af5f057d80ce77f9cda238 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:52:25 -0300 Subject: [PATCH 457/684] nextcloud: run install init as root --- services/nextcloud/deployment.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index bc8c767..8b51bd1 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -48,6 +48,9 @@ spec: subPath: extra.config.php - name: install-nextcloud image: nextcloud:29-apache + securityContext: + runAsUser: 0 + runAsGroup: 0 command: ["/bin/sh", "-c"] args: - | -- 2.47.2 From 0a8e8e27daf8643cb7b093d100661ffa45ddbf4d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:53:19 -0300 Subject: [PATCH 458/684] knowledge: add runbooks skeleton --- .gitignore | 1 + knowledge/INDEX.md | 22 +++++++++++++++++ knowledge/runbooks/ci-gitea-jenkins.md | 27 ++++++++++++++++++++ knowledge/runbooks/kb-authoring.md | 34 ++++++++++++++++++++++++++ knowledge/runbooks/observability.md | 26 ++++++++++++++++++++ knowledge/runbooks/template.md | 18 ++++++++++++++ 6 files changed, 128 insertions(+) create mode 100644 knowledge/INDEX.md create mode 100644 knowledge/runbooks/ci-gitea-jenkins.md create mode 100644 knowledge/runbooks/kb-authoring.md create mode 100644 knowledge/runbooks/observability.md create mode 100644 knowledge/runbooks/template.md diff --git a/.gitignore b/.gitignore index 7bf3646..1d2e516 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.md !README.md +!knowledge/**/*.md __pycache__/ *.py[cod] diff --git a/knowledge/INDEX.md b/knowledge/INDEX.md new file mode 100644 index 0000000..fac9153 --- /dev/null +++ b/knowledge/INDEX.md @@ -0,0 +1,22 @@ +Atlas Knowledge Base (KB) + +This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be: +- Accurate (grounded in GitOps + read-only cluster tools) +- Maintainable (small docs + deterministic generators) +- Safe (no secrets; refer to Secret/Vault paths by name only) + +Layout +- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown). +- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON). +- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog. + +Regeneration +- Update manifests/docs, then regenerate generated artifacts: + - `python scripts/knowledge_render_atlas.py --write` + +Authoring rules +- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`. +- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths. +- Keep each runbook small; one topic per file; use headings. +- When in doubt, link to the exact file path in this repo that configures the behavior. + diff --git a/knowledge/runbooks/ci-gitea-jenkins.md b/knowledge/runbooks/ci-gitea-jenkins.md new file mode 100644 index 0000000..48dc91f --- /dev/null +++ b/knowledge/runbooks/ci-gitea-jenkins.md @@ -0,0 +1,27 @@ +--- +title: "CI: Gitea → Jenkins pipeline" +tags: ["atlas", "ci", "gitea", "jenkins"] +owners: ["brad"] +entrypoints: ["scm.bstein.dev", "ci.bstein.dev"] +source_paths: ["services/gitea", "services/jenkins", "scripts/jenkins_cred_sync.sh", "scripts/gitea_cred_sync.sh"] +--- + +# CI: Gitea → Jenkins pipeline + +## What this is +Atlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO). + +## Where it is configured +- Gitea manifests: `services/gitea/` +- Jenkins manifests: `services/jenkins/` +- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh` + +## What users do (typical flow) +- Create a repo in Gitea. +- Create/update a Jenkins job/pipeline that can fetch the repo. +- Configure a webhook (or SCM polling) so pushes trigger builds. + +## Troubleshooting (common) +- “Webhook not firing”: confirm ingress host, webhook URL, and Jenkins job is reachable. +- “Auth denied cloning”: confirm Keycloak group membership and that Jenkins has a valid token/credential configured. + diff --git a/knowledge/runbooks/kb-authoring.md b/knowledge/runbooks/kb-authoring.md new file mode 100644 index 0000000..9378d1d --- /dev/null +++ b/knowledge/runbooks/kb-authoring.md @@ -0,0 +1,34 @@ +--- +title: "KB authoring: what to write (and what not to)" +tags: ["atlas", "kb", "runbooks"] +owners: ["brad"] +entrypoints: [] +source_paths: ["knowledge/runbooks", "scripts/knowledge_render_atlas.py"] +--- + +# KB authoring: what to write (and what not to) + +## The goal +Give Atlas assistants enough grounded, Atlas-specific context to answer “how do I…?” questions without guessing. + +## What to capture (high value) +- User workflows: “click here, set X, expected result” +- Operator workflows: “edit these files, reconcile this kustomization, verify with these commands” +- Wiring: “this host routes to this service; this service depends on Postgres/Vault/etc” +- Failure modes: exact error messages + the 2–5 checks that usually resolve them +- Permissions: Keycloak groups/roles and what they unlock + +## What to avoid (low value / fluff) +- Generic Kubernetes explanations (link to upstream docs instead) +- Copy-pasting large manifests (prefer file paths + small snippets) +- Anything that will drift quickly (render it from GitOps instead) +- Any secret values (reference Secret/Vault locations by name only) + +## Document pattern (recommended) +Each runbook should answer: +- “What is this?” +- “What do users do?” +- “What do operators change (where in Git)?” +- “How do we verify it works?” +- “What breaks and how to debug it?” + diff --git a/knowledge/runbooks/observability.md b/knowledge/runbooks/observability.md new file mode 100644 index 0000000..4c5be6e --- /dev/null +++ b/knowledge/runbooks/observability.md @@ -0,0 +1,26 @@ +--- +title: "Observability: Grafana + VictoriaMetrics (how to query safely)" +tags: ["atlas", "monitoring", "grafana", "victoriametrics"] +owners: ["brad"] +entrypoints: ["metrics.bstein.dev", "alerts.bstein.dev"] +source_paths: ["services/monitoring"] +--- + +# Observability: Grafana + VictoriaMetrics (how to query safely) + +## Where it is configured +- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values) +- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL) + +## Using metrics as a “tool” for Atlas assistants +The safest pattern is: map a small set of intents → fixed PromQL queries, then summarize results. + +Examples (intents) +- “Is the cluster healthy?” → node readiness + pod restart rate +- “Why is Element Call failing?” → LiveKit/coturn pod restarts + synapse errors + ingress 5xx +- “Is Jenkins slow?” → pod CPU/memory + HTTP latency metrics (if exported) + +## Why dashboards are not the KB +Dashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the +KB focused on wiring, runbooks, and stable conventions. + diff --git a/knowledge/runbooks/template.md b/knowledge/runbooks/template.md new file mode 100644 index 0000000..086c65f --- /dev/null +++ b/knowledge/runbooks/template.md @@ -0,0 +1,18 @@ +--- +title: "" +tags: ["atlas", "", ""] +owners: ["brad"] +entrypoints: [""] +source_paths: ["services/", "clusters/atlas/<...>"] +--- + +# + +## What this is + +## For users (how to) + +## For operators (where configured) + +## Troubleshooting (symptoms → checks) + -- 2.47.2 From 4a5f3d4c92788f6df9860ce4525c6ca549908b67 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:53:58 -0300 Subject: [PATCH 459/684] nextcloud: install without runuser --- services/nextcloud/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 8b51bd1..55a5687 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -55,8 +55,7 @@ spec: args: - | if [ ! -s /var/www/html/config/config.php ]; then - runuser -u www-data -- \ - php /var/www/html/occ maintenance:install \ + php /var/www/html/occ maintenance:install \ --database pgsql \ --database-host "${POSTGRES_HOST}" \ --database-name "${POSTGRES_DB}" \ @@ -65,6 +64,7 @@ spec: --admin-user "${NEXTCLOUD_ADMIN_USER}" \ --admin-pass "${NEXTCLOUD_ADMIN_PASSWORD}" \ --data-dir "/var/www/html/data" + chown -R 33:33 /var/www/html/config /var/www/html/data || true fi env: - name: POSTGRES_HOST -- 2.47.2 From b313569e2f825da2544e5e74ee2f656b8163a3b5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:55:19 -0300 Subject: [PATCH 460/684] atlasbot: fix kb loader import --- services/communication/atlasbot-configmap.yaml | 1 + services/communication/atlasbot-deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index 0b4e38e..dfbdd2c 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -11,6 +11,7 @@ data: import re import ssl import time + from typing import Any from urllib import error, parse, request BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index febd675..5c6c87e 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: b9796738bbbc50fd5c70db0bd4fbffe986fd2728a7487186e39ff7ecabefbd1e + checksum/atlasbot-configmap: edd1d61d8010197b948343dff3d7a8913017e79a0a0098008213452f50361b44 spec: serviceAccountName: atlasbot nodeSelector: -- 2.47.2 From 221fda50a6de56a9f4c65f9d0630783b9b4505e5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 14:58:29 -0300 Subject: [PATCH 461/684] atlasbot: add PromQL + cluster snapshot --- .../communication/atlasbot-configmap.yaml | 95 ++++++++++++++++++- .../communication/atlasbot-deployment.yaml | 2 +- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/services/communication/atlasbot-configmap.yaml b/services/communication/atlasbot-configmap.yaml index dfbdd2c..672c4f4 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/communication/atlasbot-configmap.yaml @@ -61,6 +61,23 @@ data: "othrys", } + METRIC_HINT_WORDS = { + "health", + "status", + "down", + "slow", + "error", + "unknown_error", + "timeout", + "crash", + "crashloop", + "restart", + "restarts", + "pending", + "unreachable", + "latency", + } + def _tokens(text: str) -> list[str]: toks = [t.lower() for t in TOKEN_RE.findall(text or "")] return [t for t in toks if t not in STOPWORDS and len(t) >= 2] @@ -357,6 +374,42 @@ data: except Exception: return None + def _vm_value_series(res: dict) -> list[dict]: + if not res or (res.get("status") != "success"): + return [] + data = res.get("data") or {} + result = data.get("result") or [] + return result if isinstance(result, list) else [] + + def vm_render_result(res: dict | None, limit: int = 12) -> str: + if not res: + return "" + series = _vm_value_series(res) + if not series: + return "" + out: list[str] = [] + for r in series[:limit]: + if not isinstance(r, dict): + continue + metric = r.get("metric") or {} + value = r.get("value") or [] + val = value[1] if isinstance(value, list) and len(value) > 1 else "" + # Prefer common labels if present. + label_parts = [] + for k in ("namespace", "pod", "container", "node", "instance", "job", "phase"): + if isinstance(metric, dict) and metric.get(k): + label_parts.append(f"{k}={metric.get(k)}") + if not label_parts and isinstance(metric, dict): + for k in sorted(metric.keys()): + if k.startswith("__"): + continue + label_parts.append(f"{k}={metric.get(k)}") + if len(label_parts) >= 4: + break + labels = ", ".join(label_parts) if label_parts else "series" + out.append(f"- {labels}: {val}") + return "\n".join(out) + def vm_top_restarts(hours: int = 1) -> str: q = f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{hours}h])))" res = vm_query(q) @@ -375,6 +428,26 @@ data: out.append(f"- restarts({hours}h): {ns}/{pod} = {val}") return "\n".join(out) + def vm_cluster_snapshot() -> str: + parts: list[str] = [] + # Node readiness (kube-state-metrics). + ready = vm_query('sum(kube_node_status_condition{condition="Ready",status="true"})') + not_ready = vm_query('sum(kube_node_status_condition{condition="Ready",status="false"})') + if ready and not_ready: + try: + r = _vm_value_series(ready)[0]["value"][1] + nr = _vm_value_series(not_ready)[0]["value"][1] + parts.append(f"- nodes ready: {r} (not ready: {nr})") + except Exception: + pass + + phases = vm_query("sum by (phase) (kube_pod_status_phase)") + pr = vm_render_result(phases, limit=8) + if pr: + parts.append("Pod phases:") + parts.append(pr) + return "\n".join(parts).strip() + # Conversation state. history = collections.defaultdict(list) # (room_id, sender|None) -> list[str] (short transcript) @@ -411,9 +484,14 @@ data: if flux_bad: parts.append("Flux (not ready):\n" + flux_bad) - restarts = vm_top_restarts(1) - if restarts: - parts.append("VictoriaMetrics (top restarts 1h):\n" + restarts) + p_l = (prompt or "").lower() + if any(w in p_l for w in METRIC_HINT_WORDS): + restarts = vm_top_restarts(1) + if restarts: + parts.append("VictoriaMetrics (top restarts 1h):\n" + restarts) + snap = vm_cluster_snapshot() + if snap: + parts.append("VictoriaMetrics (cluster snapshot):\n" + snap) return "\n\n".join([p for p in parts if p]).strip() @@ -500,6 +578,12 @@ data: # Only do live cluster/metrics introspection in DMs. allow_tools = is_dm + promql = "" + if allow_tools: + m = re.match(r"(?is)^\\s*promql\\s*(?:\\:|\\s)\\s*(.+?)\\s*$", body) + if m: + promql = m.group(1).strip() + # Attempt to scope tools to the most likely workloads when hostnames are mentioned. targets: list[tuple[str, str]] = [] for m in HOST_RE.finditer(body.lower()): @@ -512,6 +596,11 @@ data: targets.append((ns, str(w["name"]))) context = build_context(body, allow_tools=allow_tools, targets=targets) + if allow_tools and promql: + res = vm_query(promql, timeout=20) + rendered = vm_render_result(res, limit=15) or "(no results)" + extra = "VictoriaMetrics (PromQL result):\n" + rendered + context = (context + "\n\n" + extra).strip() if context else extra reply = ollama_reply(hist_key, body, context=context) send_msg(token, rid, reply) diff --git a/services/communication/atlasbot-deployment.yaml b/services/communication/atlasbot-deployment.yaml index 5c6c87e..528d4b2 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/communication/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: edd1d61d8010197b948343dff3d7a8913017e79a0a0098008213452f50361b44 + checksum/atlasbot-configmap: 80fa4d62ccafbfbcdeb63f0976cbea36aada12649f15f8570932296db5d48949 spec: serviceAccountName: atlasbot nodeSelector: -- 2.47.2 From 45563f74b3551cf939f25fb5c30f397af84484c5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:07:33 -0300 Subject: [PATCH 462/684] nextcloud: run install occ as www-data --- services/nextcloud/deployment.yaml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 55a5687..8674874 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -34,7 +34,8 @@ spec: mkdir -p /var/www/html/data || true chown 33:33 /var/www/html || true chmod 775 /var/www/html || true - chown -R 33:33 /var/www/html/config || true + chown 33:33 /var/www/html/config || true + chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps || true securityContext: @@ -55,16 +56,17 @@ spec: args: - | if [ ! -s /var/www/html/config/config.php ]; then - php /var/www/html/occ maintenance:install \ + su -s /bin/sh www-data -c "php /var/www/html/occ maintenance:install \ --database pgsql \ - --database-host "${POSTGRES_HOST}" \ - --database-name "${POSTGRES_DB}" \ - --database-user "${POSTGRES_USER}" \ - --database-pass "${POSTGRES_PASSWORD}" \ - --admin-user "${NEXTCLOUD_ADMIN_USER}" \ - --admin-pass "${NEXTCLOUD_ADMIN_PASSWORD}" \ - --data-dir "/var/www/html/data" - chown -R 33:33 /var/www/html/config /var/www/html/data || true + --database-host \"${POSTGRES_HOST}\" \ + --database-name \"${POSTGRES_DB}\" \ + --database-user \"${POSTGRES_USER}\" \ + --database-pass \"${POSTGRES_PASSWORD}\" \ + --admin-user \"${NEXTCLOUD_ADMIN_USER}\" \ + --admin-pass \"${NEXTCLOUD_ADMIN_PASSWORD}\" \ + --data-dir \"/var/www/html/data\"" + chown 33:33 /var/www/html/config/config.php || true + chown -R 33:33 /var/www/html/data || true fi env: - name: POSTGRES_HOST -- 2.47.2 From c954fb75461ff8b78847181e95edee8c9914053c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:15:52 -0300 Subject: [PATCH 463/684] nextcloud: add one-time db reset job --- services/nextcloud/db-reset-job.yaml | 38 +++++++++++++++++++++++++++ services/nextcloud/kustomization.yaml | 5 ++-- 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 services/nextcloud/db-reset-job.yaml diff --git a/services/nextcloud/db-reset-job.yaml b/services/nextcloud/db-reset-job.yaml new file mode 100644 index 0000000..923c14a --- /dev/null +++ b/services/nextcloud/db-reset-job.yaml @@ -0,0 +1,38 @@ +# services/nextcloud/db-reset-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: nextcloud-db-reset + namespace: nextcloud +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 600 + template: + spec: + restartPolicy: Never + containers: + - name: psql + image: postgres:16 + env: + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGUSER + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-username + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-password + - name: NC_DB + valueFrom: + secretKeyRef: + name: nextcloud-db + key: database + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + psql -d "${NC_DB}" -v ON_ERROR_STOP=1 -c "DROP SCHEMA public CASCADE; CREATE SCHEMA public;" diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index 66b00d4..65a07bc 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -6,11 +6,12 @@ resources: - namespace.yaml - configmap.yaml - pvc.yaml + - db-reset-job.yaml - deployment.yaml - - service.yaml - - ingress.yaml - cronjob.yaml - maintenance-cronjob.yaml + - service.yaml + - ingress.yaml configMapGenerator: - name: nextcloud-maintenance-script files: -- 2.47.2 From b9d75d279cfb0893ee7ed3e0320861aaa15236b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:18:16 -0300 Subject: [PATCH 464/684] nextcloud: reinstall when config not installed --- services/nextcloud/deployment.yaml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 8674874..62f2d14 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -55,16 +55,19 @@ spec: command: ["/bin/sh", "-c"] args: - | - if [ ! -s /var/www/html/config/config.php ]; then - su -s /bin/sh www-data -c "php /var/www/html/occ maintenance:install \ + installed="$(su -s /bin/sh www-data -c \"php /var/www/html/occ status\" 2>/dev/null | awk '/installed:/{print $3}' || true)" + if [ ! -s /var/www/html/config/config.php ] || [ \"${installed}\" != \"true\" ]; then + rm -f /var/www/html/config/config.php || true + rm -rf /var/www/html/data/* || true + su -s /bin/sh www-data -c \"php /var/www/html/occ maintenance:install \ --database pgsql \ - --database-host \"${POSTGRES_HOST}\" \ - --database-name \"${POSTGRES_DB}\" \ - --database-user \"${POSTGRES_USER}\" \ - --database-pass \"${POSTGRES_PASSWORD}\" \ - --admin-user \"${NEXTCLOUD_ADMIN_USER}\" \ - --admin-pass \"${NEXTCLOUD_ADMIN_PASSWORD}\" \ - --data-dir \"/var/www/html/data\"" + --database-host \\\"${POSTGRES_HOST}\\\" \ + --database-name \\\"${POSTGRES_DB}\\\" \ + --database-user \\\"${POSTGRES_USER}\\\" \ + --database-pass \\\"${POSTGRES_PASSWORD}\\\" \ + --admin-user \\\"${NEXTCLOUD_ADMIN_USER}\\\" \ + --admin-pass \\\"${NEXTCLOUD_ADMIN_PASSWORD}\\\" \ + --data-dir \\\"/var/www/html/data\\\"\" chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true fi -- 2.47.2 From e5cb4571d8c01882b4af226e5065ca981661fa1f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:22:12 -0300 Subject: [PATCH 465/684] nextcloud: fix install command quoting --- services/nextcloud/deployment.yaml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 62f2d14..a8eb6f9 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -59,15 +59,7 @@ spec: if [ ! -s /var/www/html/config/config.php ] || [ \"${installed}\" != \"true\" ]; then rm -f /var/www/html/config/config.php || true rm -rf /var/www/html/data/* || true - su -s /bin/sh www-data -c \"php /var/www/html/occ maintenance:install \ - --database pgsql \ - --database-host \\\"${POSTGRES_HOST}\\\" \ - --database-name \\\"${POSTGRES_DB}\\\" \ - --database-user \\\"${POSTGRES_USER}\\\" \ - --database-pass \\\"${POSTGRES_PASSWORD}\\\" \ - --admin-user \\\"${NEXTCLOUD_ADMIN_USER}\\\" \ - --admin-pass \\\"${NEXTCLOUD_ADMIN_PASSWORD}\\\" \ - --data-dir \\\"/var/www/html/data\\\"\" + su -s /bin/sh www-data -c \"php /var/www/html/occ maintenance:install --database pgsql --database-host \\\"${POSTGRES_HOST}\\\" --database-name \\\"${POSTGRES_DB}\\\" --database-user \\\"${POSTGRES_USER}\\\" --database-pass \\\"${POSTGRES_PASSWORD}\\\" --admin-user \\\"${NEXTCLOUD_ADMIN_USER}\\\" --admin-pass \\\"${NEXTCLOUD_ADMIN_PASSWORD}\\\" --data-dir /var/www/html/data\" chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true fi -- 2.47.2 From 36552e425ff9c164acd61c7343d67248fd50eebb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:24:36 -0300 Subject: [PATCH 466/684] nextcloud: fix su command quoting --- services/nextcloud/deployment.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index a8eb6f9..20b2d2f 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -55,11 +55,11 @@ spec: command: ["/bin/sh", "-c"] args: - | - installed="$(su -s /bin/sh www-data -c \"php /var/www/html/occ status\" 2>/dev/null | awk '/installed:/{print $3}' || true)" - if [ ! -s /var/www/html/config/config.php ] || [ \"${installed}\" != \"true\" ]; then + installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" + if [ ! -s /var/www/html/config/config.php ] || [ "${installed}" != "true" ]; then rm -f /var/www/html/config/config.php || true rm -rf /var/www/html/data/* || true - su -s /bin/sh www-data -c \"php /var/www/html/occ maintenance:install --database pgsql --database-host \\\"${POSTGRES_HOST}\\\" --database-name \\\"${POSTGRES_DB}\\\" --database-user \\\"${POSTGRES_USER}\\\" --database-pass \\\"${POSTGRES_PASSWORD}\\\" --admin-user \\\"${NEXTCLOUD_ADMIN_USER}\\\" --admin-pass \\\"${NEXTCLOUD_ADMIN_PASSWORD}\\\" --data-dir /var/www/html/data\" + su -s /bin/sh www-data -c "php /var/www/html/occ maintenance:install --database pgsql --database-host \"${POSTGRES_HOST}\" --database-name \"${POSTGRES_DB}\" --database-user \"${POSTGRES_USER}\" --database-pass \"${POSTGRES_PASSWORD}\" --admin-user \"${NEXTCLOUD_ADMIN_USER}\" --admin-pass \"${NEXTCLOUD_ADMIN_PASSWORD}\" --data-dir /var/www/html/data" chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true fi -- 2.47.2 From 39d57613db25d022b2381fa348a545c7e6cba2c6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:27:06 -0300 Subject: [PATCH 467/684] nextcloud: remove db reset job --- services/nextcloud/db-reset-job.yaml | 38 --------------------------- services/nextcloud/kustomization.yaml | 1 - 2 files changed, 39 deletions(-) delete mode 100644 services/nextcloud/db-reset-job.yaml diff --git a/services/nextcloud/db-reset-job.yaml b/services/nextcloud/db-reset-job.yaml deleted file mode 100644 index 923c14a..0000000 --- a/services/nextcloud/db-reset-job.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# services/nextcloud/db-reset-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: nextcloud-db-reset - namespace: nextcloud -spec: - backoffLimit: 0 - ttlSecondsAfterFinished: 600 - template: - spec: - restartPolicy: Never - containers: - - name: psql - image: postgres:16 - env: - - name: PGHOST - value: postgres-service.postgres.svc.cluster.local - - name: PGUSER - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-username - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-password - - name: NC_DB - valueFrom: - secretKeyRef: - name: nextcloud-db - key: database - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - psql -d "${NC_DB}" -v ON_ERROR_STOP=1 -c "DROP SCHEMA public CASCADE; CREATE SCHEMA public;" diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index 65a07bc..271b854 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -6,7 +6,6 @@ resources: - namespace.yaml - configmap.yaml - pvc.yaml - - db-reset-job.yaml - deployment.yaml - cronjob.yaml - maintenance-cronjob.yaml -- 2.47.2 From 5fe584cc5fa7e0e8064abafa358e478a20dee595 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:35:31 -0300 Subject: [PATCH 468/684] nextcloud: ensure oidc/mail/external apps installed --- services/nextcloud/deployment.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 20b2d2f..e148707 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -63,6 +63,13 @@ spec: chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true fi + installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" + if [ "${installed}" = "true" ]; then + for app in oidc_login external mail; do + su -s /bin/sh www-data -c "php /var/www/html/occ app:install ${app}" || true + su -s /bin/sh www-data -c "php /var/www/html/occ app:enable ${app}" || true + done + fi env: - name: POSTGRES_HOST value: postgres-service.postgres.svc.cluster.local -- 2.47.2 From 7a7433f8248afd70915c29c9dfdf1cf866334e46 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:39:55 -0300 Subject: [PATCH 469/684] nextcloud: install oidc/mail/external apps from releases --- services/nextcloud/deployment.yaml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index e148707..53678a4 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -65,10 +65,22 @@ spec: fi installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" if [ "${installed}" = "true" ]; then - for app in oidc_login external mail; do - su -s /bin/sh www-data -c "php /var/www/html/occ app:install ${app}" || true + install_app() { + app="$1" + url="$2" + target="/var/www/html/custom_apps/${app}" + if [ ! -d "${target}" ]; then + mkdir -p /tmp/nextcloud-apps + curl -fsSL "${url}" -o "/tmp/nextcloud-apps/${app}.tar.gz" + tar -xzf "/tmp/nextcloud-apps/${app}.tar.gz" -C /var/www/html/custom_apps + rm -f "/tmp/nextcloud-apps/${app}.tar.gz" + chown -R 33:33 "${target}" + fi su -s /bin/sh www-data -c "php /var/www/html/occ app:enable ${app}" || true - done + } + install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/latest/download/oidc_login.tar.gz + install_app external https://github.com/nextcloud/external/releases/latest/download/external.tar.gz + install_app mail https://github.com/nextcloud/mail/releases/latest/download/mail.tar.gz fi env: - name: POSTGRES_HOST -- 2.47.2 From 93b219e5715cc9e5cfc50b4dbac2d3886514cf78 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:43:36 -0300 Subject: [PATCH 470/684] nextcloud: pin app download URLs --- services/nextcloud/deployment.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 53678a4..af4cb39 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -78,9 +78,9 @@ spec: fi su -s /bin/sh www-data -c "php /var/www/html/occ app:enable ${app}" || true } - install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/latest/download/oidc_login.tar.gz - install_app external https://github.com/nextcloud/external/releases/latest/download/external.tar.gz - install_app mail https://github.com/nextcloud/mail/releases/latest/download/mail.tar.gz + install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz + install_app external https://github.com/nextcloud-releases/external/releases/download/v7.0.0/external-v7.0.0.tar.gz + install_app mail https://github.com/nextcloud-releases/mail/releases/download/v5.6.5/mail-v5.6.5.tar.gz fi env: - name: POSTGRES_HOST -- 2.47.2 From 99e56fe1b44b519000e245756149016feda74a71 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 21:51:19 -0300 Subject: [PATCH 471/684] nextcloud: register custom apps path --- services/nextcloud/configmap.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/services/nextcloud/configmap.yaml b/services/nextcloud/configmap.yaml index a6e917c..febcac8 100644 --- a/services/nextcloud/configmap.yaml +++ b/services/nextcloud/configmap.yaml @@ -25,6 +25,21 @@ data: 'mail_smtpauthtype' => 'LOGIN', 'mail_domain' => 'bstein.dev', 'mail_from_address' => 'no-reply', + 'apps_paths' => + array ( + 0 => + array ( + 'path' => '/var/www/html/apps', + 'url' => '/apps', + 'writable' => false, + ), + 1 => + array ( + 'path' => '/var/www/html/custom_apps', + 'url' => '/custom_apps', + 'writable' => true, + ), + ), 'oidc_login_provider_url' => 'https://sso.bstein.dev/realms/atlas', 'oidc_login_client_id' => getenv('OIDC_CLIENT_ID'), 'oidc_login_client_secret' => getenv('OIDC_CLIENT_SECRET'), -- 2.47.2 From f1e94717ed59ec0f2096c5b003835f555982c29b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 22:03:01 -0300 Subject: [PATCH 472/684] nextcloud: pin mail/external app versions for 29 --- services/nextcloud/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index af4cb39..a5b9ef1 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -79,8 +79,8 @@ spec: su -s /bin/sh www-data -c "php /var/www/html/occ app:enable ${app}" || true } install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz - install_app external https://github.com/nextcloud-releases/external/releases/download/v7.0.0/external-v7.0.0.tar.gz - install_app mail https://github.com/nextcloud-releases/mail/releases/download/v5.6.5/mail-v5.6.5.tar.gz + install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz + install_app mail https://github.com/nextcloud/mail/releases/download/v3.7.24/mail-3.7.24.tar.gz fi env: - name: POSTGRES_HOST -- 2.47.2 From a15a2ce923b2f7c4930dbeaa627e4c3ecf31db44 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 22:09:16 -0300 Subject: [PATCH 473/684] nextcloud: reinstall custom apps with compatible mail --- services/nextcloud/deployment.yaml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index a5b9ef1..1f6f89f 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -69,18 +69,17 @@ spec: app="$1" url="$2" target="/var/www/html/custom_apps/${app}" - if [ ! -d "${target}" ]; then - mkdir -p /tmp/nextcloud-apps - curl -fsSL "${url}" -o "/tmp/nextcloud-apps/${app}.tar.gz" - tar -xzf "/tmp/nextcloud-apps/${app}.tar.gz" -C /var/www/html/custom_apps - rm -f "/tmp/nextcloud-apps/${app}.tar.gz" - chown -R 33:33 "${target}" - fi - su -s /bin/sh www-data -c "php /var/www/html/occ app:enable ${app}" || true + rm -rf "${target}" + mkdir -p /tmp/nextcloud-apps + curl -fsSL "${url}" -o "/tmp/nextcloud-apps/${app}.tar.gz" + tar -xzf "/tmp/nextcloud-apps/${app}.tar.gz" -C /var/www/html/custom_apps + rm -f "/tmp/nextcloud-apps/${app}.tar.gz" + chown -R 33:33 "${target}" + su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true } install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz - install_app mail https://github.com/nextcloud/mail/releases/download/v3.7.24/mail-3.7.24.tar.gz + install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz fi env: - name: POSTGRES_HOST -- 2.47.2 From 2d6883eb671b83e7fdcb6279af28f3e713340b09 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 22:16:51 -0300 Subject: [PATCH 474/684] nextcloud: restore mimetype defaults for external app --- services/nextcloud/deployment.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 1f6f89f..8cad32f 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -65,6 +65,17 @@ spec: fi installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" if [ "${installed}" = "true" ]; then + ensure_mime_defaults() { + cfg_dir="/var/www/html/resources/config" + mkdir -p "${cfg_dir}" + if [ ! -s "${cfg_dir}/mimetypemapping.dist.json" ]; then + curl -fsSL https://raw.githubusercontent.com/nextcloud/server/v29.0.16/resources/config/mimetypemapping.dist.json -o "${cfg_dir}/mimetypemapping.dist.json" || true + fi + if [ ! -s "${cfg_dir}/mimetypealiases.dist.json" ]; then + curl -fsSL https://raw.githubusercontent.com/nextcloud/server/v29.0.16/resources/config/mimetypealiases.dist.json -o "${cfg_dir}/mimetypealiases.dist.json" || true + fi + chown -R 33:33 "${cfg_dir}" || true + } install_app() { app="$1" url="$2" @@ -77,6 +88,7 @@ spec: chown -R 33:33 "${target}" su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true } + ensure_mime_defaults install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz -- 2.47.2 From 9d9aa5b64b1c48b6feed01d203fe66af0755c902 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 23:54:33 -0300 Subject: [PATCH 475/684] nextcloud: force OIDC login --- services/nextcloud/configmap.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/nextcloud/configmap.yaml b/services/nextcloud/configmap.yaml index febcac8..45f5665 100644 --- a/services/nextcloud/configmap.yaml +++ b/services/nextcloud/configmap.yaml @@ -43,10 +43,10 @@ data: 'oidc_login_provider_url' => 'https://sso.bstein.dev/realms/atlas', 'oidc_login_client_id' => getenv('OIDC_CLIENT_ID'), 'oidc_login_client_secret' => getenv('OIDC_CLIENT_SECRET'), - 'oidc_login_auto_redirect' => false, + 'oidc_login_auto_redirect' => true, 'oidc_login_end_session_redirect' => true, 'oidc_login_button_text' => 'Login with Keycloak', - 'oidc_login_hide_password_form' => false, + 'oidc_login_hide_password_form' => true, 'oidc_login_attributes' => array ( 'id' => 'preferred_username', @@ -56,7 +56,7 @@ data: 'oidc_login_scope' => 'openid profile email', 'oidc_login_unique_id' => 'preferred_username', 'oidc_login_use_pkce' => true, - 'oidc_login_disable_registration' => false, + 'oidc_login_disable_registration' => true, 'oidc_login_create_groups' => false, # External storage for user data should be configured to Asteria via the External Storage app (admin UI), # keeping the astreae PVC for app internals only. -- 2.47.2 From 16dc0e16f16b530713b3cae997871a62771ac245 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 00:03:57 -0300 Subject: [PATCH 476/684] nextcloud: enforce OIDC-only config --- services/nextcloud/deployment.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 8cad32f..bbdd824 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -65,6 +65,14 @@ spec: fi installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" if [ "${installed}" = "true" ]; then + configure_oidc() { + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_provider_url --value='https://sso.bstein.dev/realms/atlas'" + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_client_id --value='${OIDC_CLIENT_ID}'" + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_client_secret --value='${OIDC_CLIENT_SECRET}'" + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_auto_redirect --type=boolean --value=true" + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_hide_password_form --type=boolean --value=true" + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_disable_registration --type=boolean --value=true" + } ensure_mime_defaults() { cfg_dir="/var/www/html/resources/config" mkdir -p "${cfg_dir}" @@ -92,6 +100,7 @@ spec: install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz + configure_oidc fi env: - name: POSTGRES_HOST @@ -121,6 +130,16 @@ spec: secretKeyRef: name: nextcloud-admin key: admin-password + - name: OIDC_CLIENT_ID + valueFrom: + secretKeyRef: + name: nextcloud-oidc + key: client-id + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: nextcloud-oidc + key: client-secret volumeMounts: - name: nextcloud-data mountPath: /var/www/html -- 2.47.2 From 58bc646621ecd3549192815c191687707f60d400 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 00:12:21 -0300 Subject: [PATCH 477/684] nextcloud: allow OIDC auto user creation --- services/nextcloud/configmap.yaml | 2 +- services/nextcloud/deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/nextcloud/configmap.yaml b/services/nextcloud/configmap.yaml index 45f5665..c49a7b4 100644 --- a/services/nextcloud/configmap.yaml +++ b/services/nextcloud/configmap.yaml @@ -56,7 +56,7 @@ data: 'oidc_login_scope' => 'openid profile email', 'oidc_login_unique_id' => 'preferred_username', 'oidc_login_use_pkce' => true, - 'oidc_login_disable_registration' => true, + 'oidc_login_disable_registration' => false, 'oidc_login_create_groups' => false, # External storage for user data should be configured to Asteria via the External Storage app (admin UI), # keeping the astreae PVC for app internals only. diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index bbdd824..c28f23a 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -71,7 +71,7 @@ spec: su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_client_secret --value='${OIDC_CLIENT_SECRET}'" su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_auto_redirect --type=boolean --value=true" su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_hide_password_form --type=boolean --value=true" - su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_disable_registration --type=boolean --value=true" + su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_disable_registration --type=boolean --value=false" } ensure_mime_defaults() { cfg_dir="/var/www/html/resources/config" -- 2.47.2 From 8749d8a884bb544b7f47628306615ba7a134eeec Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 01:10:24 -0300 Subject: [PATCH 478/684] nextcloud: rebind data pvc to prior volume --- services/nextcloud/pvc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/nextcloud/pvc.yaml b/services/nextcloud/pvc.yaml index dd929b6..3b855cc 100644 --- a/services/nextcloud/pvc.yaml +++ b/services/nextcloud/pvc.yaml @@ -11,3 +11,4 @@ spec: requests: storage: 200Gi storageClassName: astreae + volumeName: pvc-061a70fd-1dc5-4c37-8f3e-2c7156c26ae6 -- 2.47.2 From ef0dfab20cda26c580a8022f8f8265ad6cc926fb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 02:47:12 -0300 Subject: [PATCH 479/684] mailu: harden postfix relay restrictions --- services/mailu/helmrelease.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index 0344a2f..63b76bc 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -218,22 +218,26 @@ spec: hardware: rpi4 overrides: postfix.cf: | + mynetworks = 127.0.0.0/8 [::1]/128 10.42.0.0/16 10.43.0.0/16 192.168.22.0/24 + smtpd_delay_reject = yes smtpd_helo_required = yes - smtpd_helo_restrictions = reject_invalid_helo_hostname, reject_non_fqdn_helo_hostname + smtpd_helo_restrictions = reject_invalid_helo_hostname, reject_non_fqdn_helo_hostname, reject_unknown_helo_hostname smtpd_sasl_auth_enable = yes smtpd_sasl_type = dovecot smtpd_sasl_path = private/auth smtpd_sasl_security_options = noanonymous smtpd_sasl_tls_security_options = noanonymous - smtpd_client_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_pipelining + smtpd_client_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_pipelining, reject_unknown_client_hostname smtpd_recipient_restrictions = permit_mynetworks, permit_sasl_authenticated, reject_unauth_destination, reject_non_fqdn_recipient, reject_unknown_recipient_domain smtpd_relay_restrictions = permit_sasl_authenticated, reject_unauth_destination smtpd_sender_restrictions = reject_non_fqdn_sender, reject_unknown_sender_domain, reject_sender_login_mismatch, reject_authenticated_sender_login_mismatch smtpd_tls_auth_only = yes smtpd_forbid_unauth_pipelining = yes + smtpd_client_connection_count_limit = 20 smtpd_client_connection_rate_limit = 30 smtpd_client_message_rate_limit = 100 smtpd_client_recipient_rate_limit = 200 + smtpd_recipient_limit = 100 podAnnotations: bstein.dev/restarted-at: "2026-01-06T00:00:00Z" redis: -- 2.47.2 From 428c2b54355999dd11ba3ebb654e16dfee1e723a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 03:39:59 -0300 Subject: [PATCH 480/684] nextcloud: restore app and user-data volumes --- services/nextcloud/cronjob.yaml | 11 +++++--- services/nextcloud/deployment.yaml | 19 ++++++++++---- services/nextcloud/maintenance-cronjob.yaml | 11 +++++--- services/nextcloud/pvc.yaml | 28 +++++++++++++++++++++ 4 files changed, 58 insertions(+), 11 deletions(-) diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index 86c55e1..7900bb1 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -24,9 +24,14 @@ spec: args: - "cd /var/www/html && php -f cron.php" volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data volumes: - - name: nextcloud-data + - name: nextcloud-app persistentVolumeClaim: - claimName: nextcloud-data + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index c28f23a..883d2f1 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -42,8 +42,10 @@ spec: runAsUser: 0 runAsGroup: 0 volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -141,8 +143,10 @@ spec: name: nextcloud-oidc key: client-secret volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -228,8 +232,10 @@ spec: - containerPort: 80 name: http volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -241,9 +247,12 @@ spec: cpu: 1 memory: 3Gi volumes: - - name: nextcloud-data + - name: nextcloud-app persistentVolumeClaim: - claimName: nextcloud-data + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data - name: nextcloud-config configMap: name: nextcloud-config diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index 55fcbd1..c9421dd 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -34,8 +34,10 @@ spec: name: nextcloud-admin key: admin-password volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: maintenance-script mountPath: /maintenance/maintenance.sh subPath: maintenance.sh @@ -47,9 +49,12 @@ spec: cpu: 500m memory: 512Mi volumes: - - name: nextcloud-data + - name: nextcloud-app persistentVolumeClaim: - claimName: nextcloud-data + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data - name: maintenance-script configMap: name: nextcloud-maintenance-script diff --git a/services/nextcloud/pvc.yaml b/services/nextcloud/pvc.yaml index 3b855cc..1a660a5 100644 --- a/services/nextcloud/pvc.yaml +++ b/services/nextcloud/pvc.yaml @@ -1,6 +1,34 @@ # services/nextcloud/pvc.yaml apiVersion: v1 kind: PersistentVolumeClaim +metadata: + name: nextcloud-app + namespace: nextcloud +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 20Gi + storageClassName: astreae + volumeName: pvc-9cf910d9-ae30-48e6-8d90-a6cbbf3cd2cf +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nextcloud-user-data + namespace: nextcloud +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 2Ti + storageClassName: asteria + volumeName: pvc-ee0fac85-d81b-48ad-aa0e-837a4be409b7 +--- +apiVersion: v1 +kind: PersistentVolumeClaim metadata: name: nextcloud-data namespace: nextcloud -- 2.47.2 From da81946771ca7d1706783c130744c0aa3fadea10 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 03:43:57 -0300 Subject: [PATCH 481/684] nextcloud: rebind user data pvc to restore data --- services/nextcloud/pvc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/nextcloud/pvc.yaml b/services/nextcloud/pvc.yaml index 1a660a5..d5223cb 100644 --- a/services/nextcloud/pvc.yaml +++ b/services/nextcloud/pvc.yaml @@ -25,7 +25,7 @@ spec: requests: storage: 2Ti storageClassName: asteria - volumeName: pvc-ee0fac85-d81b-48ad-aa0e-837a4be409b7 + volumeName: pvc-d918158d-422f-4928-beaa-27862611fbe5 --- apiVersion: v1 kind: PersistentVolumeClaim -- 2.47.2 From 46c0a4e290121c008802262bd1e89650ce5ea973 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 03:52:14 -0300 Subject: [PATCH 482/684] nextcloud: restore single data volume mount --- services/nextcloud/cronjob.yaml | 11 +++------ services/nextcloud/deployment.yaml | 25 ++++++--------------- services/nextcloud/maintenance-cronjob.yaml | 11 +++------ 3 files changed, 13 insertions(+), 34 deletions(-) diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index 7900bb1..86c55e1 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -24,14 +24,9 @@ spec: args: - "cd /var/www/html && php -f cron.php" volumeMounts: - - name: nextcloud-app + - name: nextcloud-data mountPath: /var/www/html - - name: nextcloud-user-data - mountPath: /var/www/html/data volumes: - - name: nextcloud-app + - name: nextcloud-data persistentVolumeClaim: - claimName: nextcloud-app - - name: nextcloud-user-data - persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-data diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 883d2f1..d2f7429 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -42,10 +42,8 @@ spec: runAsUser: 0 runAsGroup: 0 volumeMounts: - - name: nextcloud-app + - name: nextcloud-data mountPath: /var/www/html - - name: nextcloud-user-data - mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -58,9 +56,7 @@ spec: args: - | installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" - if [ ! -s /var/www/html/config/config.php ] || [ "${installed}" != "true" ]; then - rm -f /var/www/html/config/config.php || true - rm -rf /var/www/html/data/* || true + if [ ! -s /var/www/html/config/config.php ] && [ ! -f /var/www/html/data/.ocdata ]; then su -s /bin/sh www-data -c "php /var/www/html/occ maintenance:install --database pgsql --database-host \"${POSTGRES_HOST}\" --database-name \"${POSTGRES_DB}\" --database-user \"${POSTGRES_USER}\" --database-pass \"${POSTGRES_PASSWORD}\" --admin-user \"${NEXTCLOUD_ADMIN_USER}\" --admin-pass \"${NEXTCLOUD_ADMIN_PASSWORD}\" --data-dir /var/www/html/data" chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true @@ -143,10 +139,8 @@ spec: name: nextcloud-oidc key: client-secret volumeMounts: - - name: nextcloud-app + - name: nextcloud-data mountPath: /var/www/html - - name: nextcloud-user-data - mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -232,14 +226,12 @@ spec: - containerPort: 80 name: http volumeMounts: - - name: nextcloud-app + - name: nextcloud-data mountPath: /var/www/html - - name: nextcloud-user-data - mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php - resources: + resources: requests: cpu: 250m memory: 1Gi @@ -247,12 +239,9 @@ spec: cpu: 1 memory: 3Gi volumes: - - name: nextcloud-app + - name: nextcloud-data persistentVolumeClaim: - claimName: nextcloud-app - - name: nextcloud-user-data - persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-data - name: nextcloud-config configMap: name: nextcloud-config diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index c9421dd..55fcbd1 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -34,10 +34,8 @@ spec: name: nextcloud-admin key: admin-password volumeMounts: - - name: nextcloud-app + - name: nextcloud-data mountPath: /var/www/html - - name: nextcloud-user-data - mountPath: /var/www/html/data - name: maintenance-script mountPath: /maintenance/maintenance.sh subPath: maintenance.sh @@ -49,12 +47,9 @@ spec: cpu: 500m memory: 512Mi volumes: - - name: nextcloud-app + - name: nextcloud-data persistentVolumeClaim: - claimName: nextcloud-app - - name: nextcloud-user-data - persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-data - name: maintenance-script configMap: name: nextcloud-maintenance-script -- 2.47.2 From 75069193948e235733ca857c202f2d2d4f484440 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 04:41:00 -0300 Subject: [PATCH 483/684] nextcloud: align app/data mounts --- services/nextcloud/configmap.yaml | 1 + services/nextcloud/cronjob.yaml | 11 ++++++++--- services/nextcloud/deployment.yaml | 21 +++++++++++++++------ services/nextcloud/maintenance-cronjob.yaml | 11 ++++++++--- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/services/nextcloud/configmap.yaml b/services/nextcloud/configmap.yaml index c49a7b4..20d2273 100644 --- a/services/nextcloud/configmap.yaml +++ b/services/nextcloud/configmap.yaml @@ -25,6 +25,7 @@ data: 'mail_smtpauthtype' => 'LOGIN', 'mail_domain' => 'bstein.dev', 'mail_from_address' => 'no-reply', + 'datadirectory' => '/var/www/html/data', 'apps_paths' => array ( 0 => diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index 86c55e1..7900bb1 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -24,9 +24,14 @@ spec: args: - "cd /var/www/html && php -f cron.php" volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data volumes: - - name: nextcloud-data + - name: nextcloud-app persistentVolumeClaim: - claimName: nextcloud-data + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index d2f7429..c88e882 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -42,8 +42,10 @@ spec: runAsUser: 0 runAsGroup: 0 volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -139,8 +141,10 @@ spec: name: nextcloud-oidc key: client-secret volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php @@ -226,12 +230,14 @@ spec: - containerPort: 80 name: http volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: nextcloud-config mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php - resources: + resources: requests: cpu: 250m memory: 1Gi @@ -239,9 +245,12 @@ spec: cpu: 1 memory: 3Gi volumes: - - name: nextcloud-data + - name: nextcloud-app persistentVolumeClaim: - claimName: nextcloud-data + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data - name: nextcloud-config configMap: name: nextcloud-config diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index 55fcbd1..c9421dd 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -34,8 +34,10 @@ spec: name: nextcloud-admin key: admin-password volumeMounts: - - name: nextcloud-data + - name: nextcloud-app mountPath: /var/www/html + - name: nextcloud-user-data + mountPath: /var/www/html/data - name: maintenance-script mountPath: /maintenance/maintenance.sh subPath: maintenance.sh @@ -47,9 +49,12 @@ spec: cpu: 500m memory: 512Mi volumes: - - name: nextcloud-data + - name: nextcloud-app persistentVolumeClaim: - claimName: nextcloud-data + claimName: nextcloud-app + - name: nextcloud-user-data + persistentVolumeClaim: + claimName: nextcloud-user-data - name: maintenance-script configMap: name: nextcloud-maintenance-script -- 2.47.2 From 5a92e99c8d9d96ccaad229728ac96919683eea4c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 04:43:13 -0300 Subject: [PATCH 484/684] nextcloud-mail-sync: align data mount --- services/nextcloud-mail-sync/cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml index 55ff608..efce1ad 100644 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -69,7 +69,7 @@ spec: - name: nextcloud-app mountPath: /var/www/html - name: nextcloud-user-data - mountPath: /data/userdata + mountPath: /var/www/html/data - name: sync-script mountPath: /sync/sync.sh subPath: sync.sh -- 2.47.2 From cb7429a6a19eb3460dbd798b296a9c36cac6e3d6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 04:49:55 -0300 Subject: [PATCH 485/684] nextcloud: stabilize install guardrails --- services/nextcloud/deployment.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index c88e882..8a05102 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -32,6 +32,12 @@ spec: rm -f /var/www/html/config/config.php || true fi mkdir -p /var/www/html/data || true + if [ ! -f /var/www/html/data/.ocdata ]; then + touch /var/www/html/data/.ocdata + fi + if [ -s /var/www/html/config/config.php ] && ! grep -q "'installed'" /var/www/html/config/config.php; then + sed -i "/^);/i\\ 'installed' => true," /var/www/html/config/config.php + fi chown 33:33 /var/www/html || true chmod 775 /var/www/html || true chown 33:33 /var/www/html/config || true -- 2.47.2 From 3db0661a48ee54c2184fd7229ee8b4d5dbbdb081 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 08:43:45 -0300 Subject: [PATCH 486/684] nextcloud: reset storage mounts and restore office --- services/nextcloud-mail-sync/cronjob.yaml | 13 ++- services/nextcloud/collabora.yaml | 79 +++++++++++++++++ services/nextcloud/cronjob.yaml | 13 ++- services/nextcloud/deployment.yaml | 98 ++++++++++++++++----- services/nextcloud/kustomization.yaml | 1 + services/nextcloud/maintenance-cronjob.yaml | 13 ++- services/nextcloud/pvc.yaml | 32 ++++--- 7 files changed, 199 insertions(+), 50 deletions(-) create mode 100644 services/nextcloud/collabora.yaml diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml index efce1ad..9973ab0 100644 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -66,17 +66,22 @@ spec: cpu: 500m memory: 512Mi volumeMounts: - - name: nextcloud-app - mountPath: /var/www/html + - name: nextcloud-config-pvc + mountPath: /var/www/html/config + - name: nextcloud-custom-apps + mountPath: /var/www/html/custom_apps - name: nextcloud-user-data mountPath: /var/www/html/data - name: sync-script mountPath: /sync/sync.sh subPath: sync.sh volumes: - - name: nextcloud-app + - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-app + claimName: nextcloud-config + - name: nextcloud-custom-apps + persistentVolumeClaim: + claimName: nextcloud-custom-apps - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data diff --git a/services/nextcloud/collabora.yaml b/services/nextcloud/collabora.yaml new file mode 100644 index 0000000..1cda2ea --- /dev/null +++ b/services/nextcloud/collabora.yaml @@ -0,0 +1,79 @@ +# services/nextcloud/collabora.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: collabora + namespace: nextcloud + labels: + app: collabora +spec: + replicas: 1 + selector: + matchLabels: + app: collabora + template: + metadata: + labels: + app: collabora + spec: + nodeSelector: + hardware: rpi5 + containers: + - name: collabora + image: collabora/code:latest + imagePullPolicy: IfNotPresent + env: + - name: domain + value: cloud\\.bstein\\.dev + - name: DONT_GEN_SSL_CERT + value: "true" + - name: extra_params + value: --o:ssl.enable=false --o:ssl.termination=true + ports: + - containerPort: 9980 + name: http + resources: + requests: + cpu: 250m + memory: 512Mi + limits: + cpu: 1 + memory: 2Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: collabora + namespace: nextcloud +spec: + selector: + app: collabora + ports: + - name: http + port: 9980 + targetPort: http +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: collabora + namespace: nextcloud + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + traefik.ingress.kubernetes.io/router.entrypoints: websecure +spec: + tls: + - hosts: + - office.bstein.dev + secretName: collabora-tls + rules: + - host: office.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: collabora + port: + number: 9980 diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index 7900bb1..53772a5 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -24,14 +24,19 @@ spec: args: - "cd /var/www/html && php -f cron.php" volumeMounts: - - name: nextcloud-app - mountPath: /var/www/html + - name: nextcloud-config-pvc + mountPath: /var/www/html/config + - name: nextcloud-custom-apps + mountPath: /var/www/html/custom_apps - name: nextcloud-user-data mountPath: /var/www/html/data volumes: - - name: nextcloud-app + - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-app + claimName: nextcloud-config + - name: nextcloud-custom-apps + persistentVolumeClaim: + claimName: nextcloud-custom-apps - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 8a05102..85cf561 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -23,6 +23,44 @@ spec: runAsUser: 33 runAsGroup: 33 initContainers: + - name: db-reset + image: postgres:16-alpine + command: ["/bin/sh", "-c"] + args: + - | + set -e + mkdir -p /var/www/html/config + if [ ! -f /var/www/html/config/.db_initialized ]; then + rm -f /var/www/html/config/config.php || true + psql "host=${POSTGRES_HOST} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB}" -v ON_ERROR_STOP=1 <<'SQL' + DROP SCHEMA IF EXISTS public CASCADE; + CREATE SCHEMA public; + GRANT ALL ON SCHEMA public TO PUBLIC; +SQL + touch /var/www/html/config/.db_initialized + chown 33:33 /var/www/html/config/.db_initialized || true + fi + env: + - name: POSTGRES_HOST + value: postgres-service.postgres.svc.cluster.local + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: nextcloud-db + key: database + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-username + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: nextcloud-db + key: db-password + volumeMounts: + - name: nextcloud-config-pvc + mountPath: /var/www/html/config - name: fix-perms image: alpine:3.20 command: ["/bin/sh", "-c"] @@ -31,28 +69,28 @@ spec: if [ ! -s /var/www/html/config/config.php ]; then rm -f /var/www/html/config/config.php || true fi - mkdir -p /var/www/html/data || true - if [ ! -f /var/www/html/data/.ocdata ]; then + mkdir -p /var/www/html/config /var/www/html/data /var/www/html/custom_apps || true + if [ ! -s /var/www/html/config/config.php ]; then + rm -f /var/www/html/data/.ocdata || true + fi + if [ -s /var/www/html/config/config.php ] && [ ! -f /var/www/html/data/.ocdata ]; then touch /var/www/html/data/.ocdata fi if [ -s /var/www/html/config/config.php ] && ! grep -q "'installed'" /var/www/html/config/config.php; then sed -i "/^);/i\\ 'installed' => true," /var/www/html/config/config.php fi - chown 33:33 /var/www/html || true - chmod 775 /var/www/html || true - chown 33:33 /var/www/html/config || true - chown 33:33 /var/www/html/config/config.php || true - chown -R 33:33 /var/www/html/data || true - chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps || true + chown -R 33:33 /var/www/html/config /var/www/html/data /var/www/html/custom_apps || true securityContext: runAsUser: 0 runAsGroup: 0 volumeMounts: - - name: nextcloud-app - mountPath: /var/www/html + - name: nextcloud-config-pvc + mountPath: /var/www/html/config + - name: nextcloud-custom-apps + mountPath: /var/www/html/custom_apps - name: nextcloud-user-data mountPath: /var/www/html/data - - name: nextcloud-config + - name: nextcloud-config-extra mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php - name: install-nextcloud @@ -64,7 +102,7 @@ spec: args: - | installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" - if [ ! -s /var/www/html/config/config.php ] && [ ! -f /var/www/html/data/.ocdata ]; then + if [ ! -s /var/www/html/config/config.php ]; then su -s /bin/sh www-data -c "php /var/www/html/occ maintenance:install --database pgsql --database-host \"${POSTGRES_HOST}\" --database-name \"${POSTGRES_DB}\" --database-user \"${POSTGRES_USER}\" --database-pass \"${POSTGRES_PASSWORD}\" --admin-user \"${NEXTCLOUD_ADMIN_USER}\" --admin-pass \"${NEXTCLOUD_ADMIN_PASSWORD}\" --data-dir /var/www/html/data" chown 33:33 /var/www/html/config/config.php || true chown -R 33:33 /var/www/html/data || true @@ -79,6 +117,10 @@ spec: su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_hide_password_form --type=boolean --value=true" su -s /bin/sh www-data -c "php /var/www/html/occ config:system:set oidc_login_disable_registration --type=boolean --value=false" } + configure_office() { + su -s /bin/sh www-data -c "php /var/www/html/occ config:app:set richdocuments wopi_url --value='https://office.bstein.dev'" + su -s /bin/sh www-data -c "php /var/www/html/occ config:app:set richdocuments public_wopi_url --value='https://office.bstein.dev'" + } ensure_mime_defaults() { cfg_dir="/var/www/html/resources/config" mkdir -p "${cfg_dir}" @@ -102,10 +144,17 @@ spec: chown -R 33:33 "${target}" su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true } + ensure_app() { + app="$1" + su -s /bin/sh www-data -c "php /var/www/html/occ app:install --force ${app}" || true + su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true + } ensure_mime_defaults install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz + ensure_app richdocuments + configure_office configure_oidc fi env: @@ -147,11 +196,13 @@ spec: name: nextcloud-oidc key: client-secret volumeMounts: - - name: nextcloud-app - mountPath: /var/www/html + - name: nextcloud-config-pvc + mountPath: /var/www/html/config + - name: nextcloud-custom-apps + mountPath: /var/www/html/custom_apps - name: nextcloud-user-data mountPath: /var/www/html/data - - name: nextcloud-config + - name: nextcloud-config-extra mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php containers: @@ -236,11 +287,13 @@ spec: - containerPort: 80 name: http volumeMounts: - - name: nextcloud-app - mountPath: /var/www/html + - name: nextcloud-config-pvc + mountPath: /var/www/html/config + - name: nextcloud-custom-apps + mountPath: /var/www/html/custom_apps - name: nextcloud-user-data mountPath: /var/www/html/data - - name: nextcloud-config + - name: nextcloud-config-extra mountPath: /var/www/html/config/extra.config.php subPath: extra.config.php resources: @@ -251,13 +304,16 @@ spec: cpu: 1 memory: 3Gi volumes: - - name: nextcloud-app + - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-app + claimName: nextcloud-config + - name: nextcloud-custom-apps + persistentVolumeClaim: + claimName: nextcloud-custom-apps - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data - - name: nextcloud-config + - name: nextcloud-config-extra configMap: name: nextcloud-config defaultMode: 0444 diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index 271b854..8192690 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -7,6 +7,7 @@ resources: - configmap.yaml - pvc.yaml - deployment.yaml + - collabora.yaml - cronjob.yaml - maintenance-cronjob.yaml - service.yaml diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index c9421dd..a3d0bb6 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -34,8 +34,10 @@ spec: name: nextcloud-admin key: admin-password volumeMounts: - - name: nextcloud-app - mountPath: /var/www/html + - name: nextcloud-config-pvc + mountPath: /var/www/html/config + - name: nextcloud-custom-apps + mountPath: /var/www/html/custom_apps - name: nextcloud-user-data mountPath: /var/www/html/data - name: maintenance-script @@ -49,9 +51,12 @@ spec: cpu: 500m memory: 512Mi volumes: - - name: nextcloud-app + - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-app + claimName: nextcloud-config + - name: nextcloud-custom-apps + persistentVolumeClaim: + claimName: nextcloud-custom-apps - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data diff --git a/services/nextcloud/pvc.yaml b/services/nextcloud/pvc.yaml index d5223cb..d34e151 100644 --- a/services/nextcloud/pvc.yaml +++ b/services/nextcloud/pvc.yaml @@ -2,16 +2,28 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: nextcloud-app + name: nextcloud-config namespace: nextcloud spec: accessModes: - ReadWriteMany resources: requests: - storage: 20Gi + storage: 5Gi + storageClassName: astreae +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nextcloud-custom-apps + namespace: nextcloud +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi storageClassName: astreae - volumeName: pvc-9cf910d9-ae30-48e6-8d90-a6cbbf3cd2cf --- apiVersion: v1 kind: PersistentVolumeClaim @@ -26,17 +38,3 @@ spec: storage: 2Ti storageClassName: asteria volumeName: pvc-d918158d-422f-4928-beaa-27862611fbe5 ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nextcloud-data - namespace: nextcloud -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 200Gi - storageClassName: astreae - volumeName: pvc-061a70fd-1dc5-4c37-8f3e-2c7156c26ae6 -- 2.47.2 From 52295538a01797de2a53e60b8b6d4d30fde2105d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 08:46:57 -0300 Subject: [PATCH 487/684] nextcloud: fix db reset command --- services/nextcloud/deployment.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 85cf561..95ce0dc 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -32,11 +32,7 @@ spec: mkdir -p /var/www/html/config if [ ! -f /var/www/html/config/.db_initialized ]; then rm -f /var/www/html/config/config.php || true - psql "host=${POSTGRES_HOST} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB}" -v ON_ERROR_STOP=1 <<'SQL' - DROP SCHEMA IF EXISTS public CASCADE; - CREATE SCHEMA public; - GRANT ALL ON SCHEMA public TO PUBLIC; -SQL + psql "host=${POSTGRES_HOST} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB}" -v ON_ERROR_STOP=1 -c "DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public; GRANT ALL ON SCHEMA public TO PUBLIC;" touch /var/www/html/config/.db_initialized chown 33:33 /var/www/html/config/.db_initialized || true fi -- 2.47.2 From 59b719da54094419b3529daf41e7ce1c656c7ccf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 08:51:07 -0300 Subject: [PATCH 488/684] nextcloud: install oidc login via app store --- services/nextcloud/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 95ce0dc..3d677fe 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -146,9 +146,9 @@ spec: su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true } ensure_mime_defaults - install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz + ensure_app oidc_login ensure_app richdocuments configure_office configure_oidc -- 2.47.2 From 77ce04c562b6a1dfa67b37892b066f0e671ab85a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 08:58:50 -0300 Subject: [PATCH 489/684] nextcloud: reset external app config and force reinstall --- services/nextcloud/deployment.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 3d677fe..5380b3d 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -140,12 +140,23 @@ spec: chown -R 33:33 "${target}" su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true } + reset_external_config() { + su -s /bin/sh www-data -c "php /var/www/html/occ app:remove external" || true + su -s /bin/sh www-data -c "php /var/www/html/occ config:app:delete external jwt_token_privkey_es256" || true + su -s /bin/sh www-data -c "php /var/www/html/occ config:app:delete external jwt_token_pubkey_es256" || true + su -s /bin/sh www-data -c "php /var/www/html/occ config:app:delete external jwt_token_privkey_ed25519" || true + su -s /bin/sh www-data -c "php /var/www/html/occ config:app:delete external jwt_token_pubkey_ed25519" || true + } ensure_app() { app="$1" + target="/var/www/html/custom_apps/${app}" + rm -rf "${target}" + su -s /bin/sh www-data -c "php /var/www/html/occ app:remove ${app}" || true su -s /bin/sh www-data -c "php /var/www/html/occ app:install --force ${app}" || true su -s /bin/sh www-data -c "php /var/www/html/occ app:enable --force ${app}" || true } ensure_mime_defaults + reset_external_config install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz ensure_app oidc_login -- 2.47.2 From c928b7805cb30ad44806dff583a3e2950dccb2b1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:02:22 -0300 Subject: [PATCH 490/684] nextcloud: install oidc app from release tarball --- services/nextcloud/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 5380b3d..46455c5 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -159,7 +159,7 @@ spec: reset_external_config install_app external https://github.com/nextcloud-releases/external/releases/download/v5.4.1/external-v5.4.1.tar.gz install_app mail https://github.com/nextcloud-releases/mail/releases/download/v3.7.24/mail-stable3.7.tar.gz - ensure_app oidc_login + install_app oidc_login https://github.com/pulsejet/nextcloud-oidc-login/releases/download/v3.2.2/oidc_login.tar.gz ensure_app richdocuments configure_office configure_oidc -- 2.47.2 From 6850f7b2fc60b554531306e6132c970b9ca0a426 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:14:20 -0300 Subject: [PATCH 491/684] nextcloud: avoid forcing installed flag --- services/nextcloud/deployment.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 46455c5..ba98422 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -72,9 +72,6 @@ spec: if [ -s /var/www/html/config/config.php ] && [ ! -f /var/www/html/data/.ocdata ]; then touch /var/www/html/data/.ocdata fi - if [ -s /var/www/html/config/config.php ] && ! grep -q "'installed'" /var/www/html/config/config.php; then - sed -i "/^);/i\\ 'installed' => true," /var/www/html/config/config.php - fi chown -R 33:33 /var/www/html/config /var/www/html/data /var/www/html/custom_apps || true securityContext: runAsUser: 0 -- 2.47.2 From ff395f7cf2eb5c6f15f9d7d543ad920959905472 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:17:45 -0300 Subject: [PATCH 492/684] comms: restore Matrix guest join --- services/communication/element-rendered.yaml | 2 +- .../guest-register-configmap.yaml | 195 ++++++++++++++++++ .../guest-register-deployment.yaml | 95 +++++++++ .../communication/guest-register-ingress.yaml | 34 +++ .../communication/guest-register-service.yaml | 16 ++ services/communication/kustomization.yaml | 4 + 6 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 services/communication/guest-register-configmap.yaml create mode 100644 services/communication/guest-register-deployment.yaml create mode 100644 services/communication/guest-register-ingress.yaml create mode 100644 services/communication/guest-register-service.yaml diff --git a/services/communication/element-rendered.yaml b/services/communication/element-rendered.yaml index f04dda2..0d3200e 100644 --- a/services/communication/element-rendered.yaml +++ b/services/communication/element-rendered.yaml @@ -60,7 +60,7 @@ metadata: app.kubernetes.io/managed-by: Helm data: config.json: | - {"brand":"Othrys","default_server_config":{"m.homeserver":{"base_url":"https://matrix.live.bstein.dev","server_name":"live.bstein.dev"},"m.identity_server":{"base_url":"https://vector.im"}},"default_theme":"dark","disable_custom_urls":true,"disable_login_language_selector":true,"disable_guests":false,"show_labs_settings":true,"features":{"feature_group_calls":true,"feature_video_rooms":true,"feature_element_call_video_rooms":true},"room_directory":{"servers":["live.bstein.dev"]},"jitsi":{},"element_call":{"url":"https://call.live.bstein.dev","participant_limit":16,"brand":"Othrys Call"}} + {"brand":"Othrys","default_server_config":{"m.homeserver":{"base_url":"https://matrix.live.bstein.dev","server_name":"live.bstein.dev"},"m.identity_server":{"base_url":"https://vector.im"}},"default_theme":"dark","disable_custom_urls":true,"disable_login_language_selector":true,"disable_guests":false,"registration_url":"https://bstein.dev/request-access","show_labs_settings":true,"features":{"feature_group_calls":true,"feature_video_rooms":true,"feature_element_call_video_rooms":true},"room_directory":{"servers":["live.bstein.dev"]},"jitsi":{},"element_call":{"url":"https://call.live.bstein.dev","participant_limit":16,"brand":"Othrys Call"}} --- # Source: element-web/templates/service.yaml apiVersion: v1 diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml new file mode 100644 index 0000000..5d6e2a2 --- /dev/null +++ b/services/communication/guest-register-configmap.yaml @@ -0,0 +1,195 @@ +# services/communication/guest-register-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: matrix-guest-register +data: + server.py: | + import base64 + import json + import os + import random + import secrets + from http.server import BaseHTTPRequestHandler, HTTPServer + from urllib import error, parse, request + + SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") + AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080").rstrip("/") + SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") + + SEEDER_USER = os.environ["SEEDER_USER"] + SEEDER_PASS = os.environ["SEEDER_PASS"] + + # Basic rate limiting (best-effort) to avoid accidental abuse. + # Count requests per client IP over a short window. + RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) + RATE_MAX = int(os.environ.get("RATE_MAX", "30")) + _rate = {} # ip -> [window_start, count] + + ADJ = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] + NOUN = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] + + def _json(method, url, *, token=None, body=None, timeout=20): + headers = {"Content-Type": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + data = None + if body is not None: + data = json.dumps(body).encode() + req = request.Request(url, data=data, headers=headers, method=method) + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload + except error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode()) if raw else {} + except Exception: + payload = {} + return e.code, payload + + _seeder_token = None + _seeder_token_at = 0.0 + + def _login(user, password): + status, payload = _json( + "POST", + f"{AUTH_BASE}/_matrix/client/v3/login", + body={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }, + timeout=20, + ) + if status != 200 or "access_token" not in payload: + raise RuntimeError("login_failed") + return payload + + def _seeder_access_token(now): + global _seeder_token, _seeder_token_at + if _seeder_token and (now - _seeder_token_at) < 300: + return _seeder_token + payload = _login(SEEDER_USER, SEEDER_PASS) + _seeder_token = payload["access_token"] + _seeder_token_at = now + return _seeder_token + + def _create_user(admin_token, localpart, password, displayname): + user_id = f"@{localpart}:{SERVER_NAME}" + status, payload = _json( + "PUT", + f"{SYNAPSE_BASE}/_synapse/admin/v2/users/{parse.quote(user_id)}", + token=admin_token, + body={ + "password": password, + "admin": False, + "deactivated": False, + "displayname": displayname, + }, + timeout=25, + ) + if status not in (200, 201): + raise RuntimeError("user_create_failed") + return user_id + + def _generate_localpart(): + return "guest-" + secrets.token_hex(6) + + def _generate_displayname(): + return f"{random.choice(ADJ)}-{random.choice(NOUN)}" + + def _rate_check(ip, now): + win, cnt = _rate.get(ip, (now, 0)) + if now - win > RATE_WINDOW_SEC: + _rate[ip] = (now, 1) + return True + if cnt >= RATE_MAX: + return False + _rate[ip] = (win, cnt + 1) + return True + + class Handler(BaseHTTPRequestHandler): + server_version = "matrix-guest-register" + + def _send_json(self, code, payload): + body = json.dumps(payload).encode() + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_OPTIONS(self): # noqa: N802 + self.send_response(204) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") + self.end_headers() + + def do_GET(self): # noqa: N802 + if self.path in ("/healthz", "/"): + return self._send_json(200, {"ok": True}) + return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) + + def do_POST(self): # noqa: N802 + # We only implement guest registration (used by Element Web "Join as guest"). + parsed = parse.urlparse(self.path) + if parsed.path not in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): + return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) + + qs = parse.parse_qs(parsed.query) + kind = (qs.get("kind") or ["user"])[0] + if kind != "guest": + return self._send_json( + 403, + { + "errcode": "M_FORBIDDEN", + "error": "Registration is disabled; use https://bstein.dev/request-access for accounts.", + }, + ) + + # Best-effort client IP from X-Forwarded-For (Traefik). + xfwd = self.headers.get("x-forwarded-for", "") + ip = (xfwd.split(",")[0].strip() if xfwd else "") or self.client_address[0] + now = __import__("time").time() + if not _rate_check(ip, now): + return self._send_json(429, {"errcode": "M_LIMIT_EXCEEDED", "error": "rate_limited"}) + + # Consume request body (Element may send fields; we ignore). + length = int(self.headers.get("content-length", "0") or "0") + _ = self.rfile.read(length) if length else b"{}" + + # Create a short-lived "guest" account by provisioning a normal user with a random password. + # This keeps MAS/OIDC intact while restoring a no-signup guest UX. + try: + admin_token = _seeder_access_token(now) + displayname = _generate_displayname() + localpart = _generate_localpart() + password = base64.urlsafe_b64encode(secrets.token_bytes(24)).decode().rstrip("=") + user_id = _create_user(admin_token, localpart, password, displayname) + login_payload = _login(localpart, password) + except Exception: + return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) + + resp = { + "user_id": login_payload.get("user_id") or user_id, + "access_token": login_payload.get("access_token"), + "device_id": login_payload.get("device_id"), + "home_server": SERVER_NAME, + } + # Do not expose refresh tokens for guests. + return self._send_json(200, resp) + + def main(): + port = int(os.environ.get("PORT", "8080")) + HTTPServer(("0.0.0.0", port), Handler).serve_forever() + + if __name__ == "__main__": + main() + diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml new file mode 100644 index 0000000..720fe76 --- /dev/null +++ b/services/communication/guest-register-deployment.yaml @@ -0,0 +1,95 @@ +# services/communication/guest-register-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: matrix-guest-register + labels: + app.kubernetes.io/name: matrix-guest-register +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: matrix-guest-register + template: + metadata: + labels: + app.kubernetes.io/name: matrix-guest-register + spec: + securityContext: + runAsNonRoot: true + runAsUser: 10001 + runAsGroup: 10001 + containers: + - name: guest-register + image: python:3.11-slim + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + env: + - name: PYTHONDONTWRITEBYTECODE + value: "1" + - name: PYTHONUNBUFFERED + value: "1" + - name: PORT + value: "8080" + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 + - name: MATRIX_SERVER_NAME + value: live.bstein.dev + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + - name: RATE_WINDOW_SEC + value: "60" + - name: RATE_MAX + value: "30" + ports: + - name: http + containerPort: 8080 + protocol: TCP + readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 2 + periodSeconds: 10 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 10 + periodSeconds: 20 + timeoutSeconds: 2 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 250m + memory: 256Mi + volumeMounts: + - name: app + mountPath: /app/server.py + subPath: server.py + readOnly: true + command: + - python + - /app/server.py + volumes: + - name: app + configMap: + name: matrix-guest-register + items: + - key: server.py + path: server.py + diff --git a/services/communication/guest-register-ingress.yaml b/services/communication/guest-register-ingress.yaml new file mode 100644 index 0000000..c3f38c1 --- /dev/null +++ b/services/communication/guest-register-ingress.yaml @@ -0,0 +1,34 @@ +# services/communication/guest-register-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: matrix-guest-register + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - matrix.live.bstein.dev + secretName: matrix-live-tls + rules: + - host: matrix.live.bstein.dev + http: + paths: + - path: /_matrix/client/v3/register + pathType: Prefix + backend: + service: + name: matrix-guest-register + port: + number: 8080 + - path: /_matrix/client/r0/register + pathType: Prefix + backend: + service: + name: matrix-guest-register + port: + number: 8080 + diff --git a/services/communication/guest-register-service.yaml b/services/communication/guest-register-service.yaml new file mode 100644 index 0000000..776e3ab --- /dev/null +++ b/services/communication/guest-register-service.yaml @@ -0,0 +1,16 @@ +# services/communication/guest-register-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: matrix-guest-register + labels: + app.kubernetes.io/name: matrix-guest-register +spec: + selector: + app.kubernetes.io/name: matrix-guest-register + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP + diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 5b71f75..e651976 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -23,6 +23,10 @@ resources: - bstein-force-leave-job.yaml - pin-othrys-job.yaml - guest-name-job.yaml + - guest-register-configmap.yaml + - guest-register-deployment.yaml + - guest-register-service.yaml + - guest-register-ingress.yaml - atlasbot-configmap.yaml - atlasbot-deployment.yaml - seed-othrys-room.yaml -- 2.47.2 From 9d5ba6adfe46b85f1503f76a70591e70d56396f2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:20:22 -0300 Subject: [PATCH 493/684] nextcloud: preserve config merge and stop db reset --- services/nextcloud/configmap.yaml | 4 ++-- services/nextcloud/deployment.yaml | 34 ------------------------------ 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/services/nextcloud/configmap.yaml b/services/nextcloud/configmap.yaml index 20d2273..21098a2 100644 --- a/services/nextcloud/configmap.yaml +++ b/services/nextcloud/configmap.yaml @@ -7,7 +7,7 @@ metadata: data: extra.config.php: | array ( 0 => 'cloud.bstein.dev', @@ -61,4 +61,4 @@ data: 'oidc_login_create_groups' => false, # External storage for user data should be configured to Asteria via the External Storage app (admin UI), # keeping the astreae PVC for app internals only. - ); + )); diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index ba98422..9c63191 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -23,40 +23,6 @@ spec: runAsUser: 33 runAsGroup: 33 initContainers: - - name: db-reset - image: postgres:16-alpine - command: ["/bin/sh", "-c"] - args: - - | - set -e - mkdir -p /var/www/html/config - if [ ! -f /var/www/html/config/.db_initialized ]; then - rm -f /var/www/html/config/config.php || true - psql "host=${POSTGRES_HOST} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB}" -v ON_ERROR_STOP=1 -c "DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public; GRANT ALL ON SCHEMA public TO PUBLIC;" - touch /var/www/html/config/.db_initialized - chown 33:33 /var/www/html/config/.db_initialized || true - fi - env: - - name: POSTGRES_HOST - value: postgres-service.postgres.svc.cluster.local - - name: POSTGRES_DB - valueFrom: - secretKeyRef: - name: nextcloud-db - key: database - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-username - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-password - volumeMounts: - - name: nextcloud-config-pvc - mountPath: /var/www/html/config - name: fix-perms image: alpine:3.20 command: ["/bin/sh", "-c"] -- 2.47.2 From 1bcb9baba25ef589acc549b1c959a3a6d8d918d5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:31:46 -0300 Subject: [PATCH 494/684] comms: ensure seeder is Synapse admin --- services/communication/kustomization.yaml | 1 + .../synapse-seeder-admin-ensure-job.yaml | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 services/communication/synapse-seeder-admin-ensure-job.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index e651976..42aa7ea 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -6,6 +6,7 @@ resources: - atlasbot-rbac.yaml - synapse-rendered.yaml - synapse-signingkey-ensure-job.yaml + - synapse-seeder-admin-ensure-job.yaml - mas-configmap.yaml - mas-admin-client-secret-ensure-job.yaml - mas-deployment.yaml diff --git a/services/communication/synapse-seeder-admin-ensure-job.yaml b/services/communication/synapse-seeder-admin-ensure-job.yaml new file mode 100644 index 0000000..8d92ccd --- /dev/null +++ b/services/communication/synapse-seeder-admin-ensure-job.yaml @@ -0,0 +1,37 @@ +# services/communication/synapse-seeder-admin-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: synapse-seeder-admin-ensure-1 + namespace: comms +spec: + backoffLimit: 2 + template: + spec: + restartPolicy: OnFailure + containers: + - name: psql + image: postgres:16-alpine + env: + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: synapse + - name: PGUSER + value: synapse + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD + command: + - /bin/sh + - -c + - | + set -euo pipefail + psql -v ON_ERROR_STOP=1 <<'SQL' + UPDATE users SET admin = 1 WHERE name = '@othrys-seeder:live.bstein.dev'; + SQL + -- 2.47.2 From a711c450d3d9b6364f90949b8033f56fa89bf0b9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:36:45 -0300 Subject: [PATCH 495/684] comms: implement MAS-backed guest register --- .../guest-register-configmap.yaml | 116 +++++++++++++----- .../guest-register-deployment.yaml | 25 ++-- 2 files changed, 101 insertions(+), 40 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 5d6e2a2..296c291 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -13,12 +13,13 @@ data: from http.server import BaseHTTPRequestHandler, HTTPServer from urllib import error, parse, request - SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") + MATRIX_BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080").rstrip("/") + MAS_ADMIN_BASE = os.environ.get("MAS_ADMIN_BASE", "http://matrix-authentication-service:8081").rstrip("/") SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") - SEEDER_USER = os.environ["SEEDER_USER"] - SEEDER_PASS = os.environ["SEEDER_PASS"] + MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] + MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") # Basic rate limiting (best-effort) to avoid accidental abuse. # Count requests per client IP over a short window. @@ -50,8 +51,24 @@ data: payload = {} return e.code, payload - _seeder_token = None - _seeder_token_at = 0.0 + def _form(method, url, *, headers=None, fields=None, timeout=20): + hdrs = {"Content-Type": "application/x-www-form-urlencoded"} + if headers: + hdrs.update(headers) + data = parse.urlencode(fields or {}).encode() + req = request.Request(url, data=data, headers=hdrs, method=method) + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload + except error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode()) if raw else {} + except Exception: + payload = {} + return e.code, payload def _login(user, password): status, payload = _json( @@ -68,32 +85,46 @@ data: raise RuntimeError("login_failed") return payload - def _seeder_access_token(now): - global _seeder_token, _seeder_token_at - if _seeder_token and (now - _seeder_token_at) < 300: - return _seeder_token - payload = _login(SEEDER_USER, SEEDER_PASS) - _seeder_token = payload["access_token"] - _seeder_token_at = now - return _seeder_token + _mas_admin_token = None + _mas_admin_token_at = 0.0 - def _create_user(admin_token, localpart, password, displayname): - user_id = f"@{localpart}:{SERVER_NAME}" + def _mas_admin_access_token(now): + global _mas_admin_token, _mas_admin_token_at + if _mas_admin_token and (now - _mas_admin_token_at) < 300: + return _mas_admin_token + + with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: + client_secret = fh.read().strip() + creds = f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode() + basic = base64.b64encode(creds).decode() + + status, payload = _form( + "POST", + f"{AUTH_BASE}/oauth2/token", + headers={"Authorization": f"Basic {basic}"}, + fields={"grant_type": "client_credentials"}, + timeout=20, + ) + if status != 200 or "access_token" not in payload: + raise RuntimeError("mas_admin_token_failed") + + _mas_admin_token = payload["access_token"] + _mas_admin_token_at = now + return _mas_admin_token + + def _mas_create_user(admin_token, username, password): status, payload = _json( - "PUT", - f"{SYNAPSE_BASE}/_synapse/admin/v2/users/{parse.quote(user_id)}", + "POST", + f"{MAS_ADMIN_BASE}/api/admin/v1/users", token=admin_token, - body={ - "password": password, - "admin": False, - "deactivated": False, - "displayname": displayname, - }, + body={"username": username, "password": password}, timeout=25, ) - if status not in (200, 201): - raise RuntimeError("user_create_failed") - return user_id + if status in (200, 201): + return + if status == 409 or payload.get("errcode") == "M_ALREADY_EXISTS": + raise RuntimeError("username_taken") + raise RuntimeError("user_create_failed") def _generate_localpart(): return "guest-" + secrets.token_hex(6) @@ -101,6 +132,18 @@ data: def _generate_displayname(): return f"{random.choice(ADJ)}-{random.choice(NOUN)}" + def _set_displayname(access_token, user_id, displayname): + try: + _json( + "PUT", + f"{MATRIX_BASE}/_matrix/client/v3/profile/{parse.quote(user_id)}/displayname", + token=access_token, + body={"displayname": displayname}, + timeout=15, + ) + except Exception: + return + def _rate_check(ip, now): win, cnt = _rate.get(ip, (now, 0)) if now - win > RATE_WINDOW_SEC: @@ -168,17 +211,29 @@ data: # Create a short-lived "guest" account by provisioning a normal user with a random password. # This keeps MAS/OIDC intact while restoring a no-signup guest UX. try: - admin_token = _seeder_access_token(now) displayname = _generate_displayname() - localpart = _generate_localpart() password = base64.urlsafe_b64encode(secrets.token_bytes(24)).decode().rstrip("=") - user_id = _create_user(admin_token, localpart, password, displayname) + admin_token = _mas_admin_access_token(now) + last = None + for _ in range(3): + localpart = _generate_localpart() + try: + _mas_create_user(admin_token, localpart, password) + break + except RuntimeError as e: + last = str(e) + if last != "username_taken": + raise + else: + raise RuntimeError(last or "user_create_failed") + login_payload = _login(localpart, password) + _set_displayname(login_payload.get("access_token"), login_payload.get("user_id"), displayname) except Exception: return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) resp = { - "user_id": login_payload.get("user_id") or user_id, + "user_id": login_payload.get("user_id") or f"@{localpart}:{SERVER_NAME}", "access_token": login_payload.get("access_token"), "device_id": login_payload.get("device_id"), "home_server": SERVER_NAME, @@ -192,4 +247,3 @@ data: if __name__ == "__main__": main() - diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 720fe76..fe52862 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -35,19 +35,18 @@ spec: value: "1" - name: PORT value: "8080" - - name: SYNAPSE_BASE + - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE value: http://matrix-authentication-service:8080 + - name: MAS_ADMIN_BASE + value: http://matrix-authentication-service:8081 + - name: MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: MAS_ADMIN_CLIENT_SECRET_FILE + value: /etc/mas/admin-client/client_secret - name: MATRIX_SERVER_NAME value: live.bstein.dev - - name: SEEDER_USER - value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - name: RATE_WINDOW_SEC value: "60" - name: RATE_MAX @@ -82,6 +81,9 @@ spec: mountPath: /app/server.py subPath: server.py readOnly: true + - name: mas-admin-client + mountPath: /etc/mas/admin-client + readOnly: true command: - python - /app/server.py @@ -92,4 +94,9 @@ spec: items: - key: server.py path: server.py - + - name: mas-admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret -- 2.47.2 From 9bb90053a10ec8433fbba2983cca6bd5c1c4fa5f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:40:25 -0300 Subject: [PATCH 496/684] nextcloud: persist web root in pvc --- services/nextcloud-mail-sync/cronjob.yaml | 5 ++++ services/nextcloud/cronjob.yaml | 5 ++++ services/nextcloud/deployment.yaml | 27 +++++++++++++++++++++ services/nextcloud/maintenance-cronjob.yaml | 5 ++++ services/nextcloud/pvc.yaml | 13 ++++++++++ 5 files changed, 55 insertions(+) diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml index 9973ab0..e0705c5 100644 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -66,6 +66,8 @@ spec: cpu: 500m memory: 512Mi volumeMounts: + - name: nextcloud-web + mountPath: /var/www/html - name: nextcloud-config-pvc mountPath: /var/www/html/config - name: nextcloud-custom-apps @@ -85,6 +87,9 @@ spec: - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data + - name: nextcloud-web + persistentVolumeClaim: + claimName: nextcloud-web - name: sync-script configMap: name: nextcloud-mail-sync-script diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index 53772a5..721a494 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -24,6 +24,8 @@ spec: args: - "cd /var/www/html && php -f cron.php" volumeMounts: + - name: nextcloud-web + mountPath: /var/www/html - name: nextcloud-config-pvc mountPath: /var/www/html/config - name: nextcloud-custom-apps @@ -40,3 +42,6 @@ spec: - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data + - name: nextcloud-web + persistentVolumeClaim: + claimName: nextcloud-web diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 9c63191..0d34e45 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -23,6 +23,26 @@ spec: runAsUser: 33 runAsGroup: 33 initContainers: + - name: seed-nextcloud-web + image: nextcloud:29-apache + command: ["/bin/sh", "-c"] + args: + - | + if [ ! -f /var/www/html/version.php ]; then + echo "Seeding Nextcloud webroot..." + tar -C /usr/src/nextcloud -cf - \ + --exclude=./config \ + --exclude=./data \ + --exclude=./custom_apps \ + . | tar -C /var/www/html -xf - + chown -R 33:33 /var/www/html || true + fi + securityContext: + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + - name: nextcloud-web + mountPath: /var/www/html - name: fix-perms image: alpine:3.20 command: ["/bin/sh", "-c"] @@ -166,6 +186,8 @@ spec: name: nextcloud-oidc key: client-secret volumeMounts: + - name: nextcloud-web + mountPath: /var/www/html - name: nextcloud-config-pvc mountPath: /var/www/html/config - name: nextcloud-custom-apps @@ -257,6 +279,8 @@ spec: - containerPort: 80 name: http volumeMounts: + - name: nextcloud-web + mountPath: /var/www/html - name: nextcloud-config-pvc mountPath: /var/www/html/config - name: nextcloud-custom-apps @@ -274,6 +298,9 @@ spec: cpu: 1 memory: 3Gi volumes: + - name: nextcloud-web + persistentVolumeClaim: + claimName: nextcloud-web - name: nextcloud-config-pvc persistentVolumeClaim: claimName: nextcloud-config diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index a3d0bb6..a6fad0a 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -34,6 +34,8 @@ spec: name: nextcloud-admin key: admin-password volumeMounts: + - name: nextcloud-web + mountPath: /var/www/html - name: nextcloud-config-pvc mountPath: /var/www/html/config - name: nextcloud-custom-apps @@ -60,6 +62,9 @@ spec: - name: nextcloud-user-data persistentVolumeClaim: claimName: nextcloud-user-data + - name: nextcloud-web + persistentVolumeClaim: + claimName: nextcloud-web - name: maintenance-script configMap: name: nextcloud-maintenance-script diff --git a/services/nextcloud/pvc.yaml b/services/nextcloud/pvc.yaml index d34e151..a934732 100644 --- a/services/nextcloud/pvc.yaml +++ b/services/nextcloud/pvc.yaml @@ -27,6 +27,19 @@ spec: --- apiVersion: v1 kind: PersistentVolumeClaim +metadata: + name: nextcloud-web + namespace: nextcloud +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + storageClassName: astreae +--- +apiVersion: v1 +kind: PersistentVolumeClaim metadata: name: nextcloud-user-data namespace: nextcloud -- 2.47.2 From 4a55b39b0dd43118ece25c4c042cb7eac88bf5d8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:49:08 -0300 Subject: [PATCH 497/684] comms: add Synapse guest appservice secret job --- services/communication/kustomization.yaml | 1 + ...se-guest-appservice-secret-ensure-job.yaml | 111 ++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 services/communication/synapse-guest-appservice-secret-ensure-job.yaml diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index 42aa7ea..f161e0e 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -7,6 +7,7 @@ resources: - synapse-rendered.yaml - synapse-signingkey-ensure-job.yaml - synapse-seeder-admin-ensure-job.yaml + - synapse-guest-appservice-secret-ensure-job.yaml - mas-configmap.yaml - mas-admin-client-secret-ensure-job.yaml - mas-deployment.yaml diff --git a/services/communication/synapse-guest-appservice-secret-ensure-job.yaml b/services/communication/synapse-guest-appservice-secret-ensure-job.yaml new file mode 100644 index 0000000..6dd8564 --- /dev/null +++ b/services/communication/synapse-guest-appservice-secret-ensure-job.yaml @@ -0,0 +1,111 @@ +# services/communication/synapse-guest-appservice-secret-ensure-job.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: synapse-guest-appservice-secret-writer + namespace: comms +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: synapse-guest-appservice-secret-writer + namespace: comms +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["synapse-guest-appservice-runtime"] + verbs: ["get", "patch", "update"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: synapse-guest-appservice-secret-writer + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: synapse-guest-appservice-secret-writer +subjects: + - kind: ServiceAccount + name: synapse-guest-appservice-secret-writer + namespace: comms +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: synapse-guest-appservice-secret-ensure-1 + namespace: comms +spec: + backoffLimit: 2 + template: + spec: + serviceAccountName: synapse-guest-appservice-secret-writer + restartPolicy: OnFailure + volumes: + - name: work + emptyDir: {} + initContainers: + - name: generate + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + umask 077 + AS_TOKEN="$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n')" + HS_TOKEN="$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n')" + + printf '%s' "${AS_TOKEN}" > /work/as_token + printf '%s' "${HS_TOKEN}" > /work/hs_token + + cat > /work/registration.yaml </dev/null 2>&1; then + if kubectl -n comms get secret synapse-guest-appservice-runtime -o jsonpath='{.data.registration\.yaml}' 2>/dev/null | grep -q .; then + exit 0 + fi + else + kubectl -n comms create secret generic synapse-guest-appservice-runtime \ + --from-file=registration.yaml=/work/registration.yaml \ + --from-file=as_token=/work/as_token \ + --from-file=hs_token=/work/hs_token >/dev/null + exit 0 + fi + + reg_b64="$(base64 /work/registration.yaml | tr -d '\n')" + as_b64="$(base64 /work/as_token | tr -d '\n')" + hs_b64="$(base64 /work/hs_token | tr -d '\n')" + + payload="$(printf '{\"data\":{\"registration.yaml\":\"%s\",\"as_token\":\"%s\",\"hs_token\":\"%s\"}}' \"${reg_b64}\" \"${as_b64}\" \"${hs_b64}\")" + kubectl -n comms patch secret synapse-guest-appservice-runtime --type=merge -p \"${payload}\" >/dev/null + volumeMounts: + - name: work + mountPath: /work + -- 2.47.2 From 7ba578ed2141d5f0dd05fb93a1548ffcccf7ffef Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 09:54:41 -0300 Subject: [PATCH 498/684] comms: restore Synapse guest join --- .../guest-register-configmap.yaml | 157 ++++++------------ .../guest-register-deployment.yaml | 27 ++- services/communication/synapse-rendered.yaml | 11 ++ 3 files changed, 73 insertions(+), 122 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 296c291..68ea2db 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -5,7 +5,6 @@ metadata: name: matrix-guest-register data: server.py: | - import base64 import json import os import random @@ -14,15 +13,11 @@ data: from urllib import error, parse, request MATRIX_BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") - AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080").rstrip("/") - MAS_ADMIN_BASE = os.environ.get("MAS_ADMIN_BASE", "http://matrix-authentication-service:8081").rstrip("/") SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") - MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] - MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") + AS_TOKEN = os.environ["AS_TOKEN"] + HS_TOKEN = os.environ["HS_TOKEN"] - # Basic rate limiting (best-effort) to avoid accidental abuse. - # Count requests per client IP over a short window. RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) _rate = {} # ip -> [window_start, count] @@ -51,81 +46,6 @@ data: payload = {} return e.code, payload - def _form(method, url, *, headers=None, fields=None, timeout=20): - hdrs = {"Content-Type": "application/x-www-form-urlencoded"} - if headers: - hdrs.update(headers) - data = parse.urlencode(fields or {}).encode() - req = request.Request(url, data=data, headers=hdrs, method=method) - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read() - payload = json.loads(raw.decode()) if raw else {} - return resp.status, payload - except error.HTTPError as e: - raw = e.read() - try: - payload = json.loads(raw.decode()) if raw else {} - except Exception: - payload = {} - return e.code, payload - - def _login(user, password): - status, payload = _json( - "POST", - f"{AUTH_BASE}/_matrix/client/v3/login", - body={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }, - timeout=20, - ) - if status != 200 or "access_token" not in payload: - raise RuntimeError("login_failed") - return payload - - _mas_admin_token = None - _mas_admin_token_at = 0.0 - - def _mas_admin_access_token(now): - global _mas_admin_token, _mas_admin_token_at - if _mas_admin_token and (now - _mas_admin_token_at) < 300: - return _mas_admin_token - - with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: - client_secret = fh.read().strip() - creds = f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode() - basic = base64.b64encode(creds).decode() - - status, payload = _form( - "POST", - f"{AUTH_BASE}/oauth2/token", - headers={"Authorization": f"Basic {basic}"}, - fields={"grant_type": "client_credentials"}, - timeout=20, - ) - if status != 200 or "access_token" not in payload: - raise RuntimeError("mas_admin_token_failed") - - _mas_admin_token = payload["access_token"] - _mas_admin_token_at = now - return _mas_admin_token - - def _mas_create_user(admin_token, username, password): - status, payload = _json( - "POST", - f"{MAS_ADMIN_BASE}/api/admin/v1/users", - token=admin_token, - body={"username": username, "password": password}, - timeout=25, - ) - if status in (200, 201): - return - if status == 409 or payload.get("errcode") == "M_ALREADY_EXISTS": - raise RuntimeError("username_taken") - raise RuntimeError("user_create_failed") - def _generate_localpart(): return "guest-" + secrets.token_hex(6) @@ -144,6 +64,23 @@ data: except Exception: return + def _register_user(localpart): + url = f"{MATRIX_BASE}/_matrix/client/v3/register?access_token={parse.quote(AS_TOKEN)}" + status, payload = _json( + "POST", + url, + body={ + "type": "m.login.application_service", + "username": localpart, + "inhibit_login": False, + "initial_device_display_name": "Guest session", + }, + timeout=25, + ) + if status != 200 or "access_token" not in payload: + raise RuntimeError("register_failed") + return payload + def _rate_check(ip, now): win, cnt = _rate.get(ip, (now, 0)) if now - win > RATE_WINDOW_SEC: @@ -154,6 +91,17 @@ data: _rate[ip] = (win, cnt + 1) return True + def _is_appservice_auth(auth_header): + if not auth_header: + return False + parts = auth_header.split(" ", 1) + if len(parts) != 2: + return False + scheme, token = parts + if scheme.lower() != "bearer": + return False + return secrets.compare_digest(token, HS_TOKEN) + class Handler(BaseHTTPRequestHandler): server_version = "matrix-guest-register" @@ -176,13 +124,31 @@ data: self.end_headers() def do_GET(self): # noqa: N802 - if self.path in ("/healthz", "/"): + parsed = parse.urlparse(self.path) + + if parsed.path in ("/healthz", "/"): return self._send_json(200, {"ok": True}) + + if parsed.path.startswith("/_matrix/app/v1/users/"): + if not _is_appservice_auth(self.headers.get("authorization", "")): + return self._send_json(401, {"errcode": "M_UNAUTHORIZED", "error": "unauthorized"}) + return self._send_json(200, {}) + + if parsed.path.startswith("/_matrix/app/v1/rooms/"): + if not _is_appservice_auth(self.headers.get("authorization", "")): + return self._send_json(401, {"errcode": "M_UNAUTHORIZED", "error": "unauthorized"}) + return self._send_json(200, {}) + return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) def do_POST(self): # noqa: N802 - # We only implement guest registration (used by Element Web "Join as guest"). parsed = parse.urlparse(self.path) + + if parsed.path.startswith("/_matrix/app/v1/transactions/"): + if not _is_appservice_auth(self.headers.get("authorization", "")): + return self._send_json(401, {"errcode": "M_UNAUTHORIZED", "error": "unauthorized"}) + return self._send_json(200, {}) + if parsed.path not in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) @@ -197,37 +163,19 @@ data: }, ) - # Best-effort client IP from X-Forwarded-For (Traefik). xfwd = self.headers.get("x-forwarded-for", "") ip = (xfwd.split(",")[0].strip() if xfwd else "") or self.client_address[0] now = __import__("time").time() if not _rate_check(ip, now): return self._send_json(429, {"errcode": "M_LIMIT_EXCEEDED", "error": "rate_limited"}) - # Consume request body (Element may send fields; we ignore). length = int(self.headers.get("content-length", "0") or "0") _ = self.rfile.read(length) if length else b"{}" - # Create a short-lived "guest" account by provisioning a normal user with a random password. - # This keeps MAS/OIDC intact while restoring a no-signup guest UX. try: displayname = _generate_displayname() - password = base64.urlsafe_b64encode(secrets.token_bytes(24)).decode().rstrip("=") - admin_token = _mas_admin_access_token(now) - last = None - for _ in range(3): - localpart = _generate_localpart() - try: - _mas_create_user(admin_token, localpart, password) - break - except RuntimeError as e: - last = str(e) - if last != "username_taken": - raise - else: - raise RuntimeError(last or "user_create_failed") - - login_payload = _login(localpart, password) + localpart = _generate_localpart() + login_payload = _register_user(localpart) _set_displayname(login_payload.get("access_token"), login_payload.get("user_id"), displayname) except Exception: return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) @@ -238,7 +186,6 @@ data: "device_id": login_payload.get("device_id"), "home_server": SERVER_NAME, } - # Do not expose refresh tokens for guests. return self._send_json(200, resp) def main(): diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index fe52862..985e1eb 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -37,16 +37,18 @@ spec: value: "8080" - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - - name: MAS_ADMIN_BASE - value: http://matrix-authentication-service:8081 - - name: MAS_ADMIN_CLIENT_ID - value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /etc/mas/admin-client/client_secret - name: MATRIX_SERVER_NAME value: live.bstein.dev + - name: AS_TOKEN + valueFrom: + secretKeyRef: + name: synapse-guest-appservice-runtime + key: as_token + - name: HS_TOKEN + valueFrom: + secretKeyRef: + name: synapse-guest-appservice-runtime + key: hs_token - name: RATE_WINDOW_SEC value: "60" - name: RATE_MAX @@ -81,9 +83,6 @@ spec: mountPath: /app/server.py subPath: server.py readOnly: true - - name: mas-admin-client - mountPath: /etc/mas/admin-client - readOnly: true command: - python - /app/server.py @@ -94,9 +93,3 @@ spec: items: - key: server.py path: server.py - - name: mas-admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 22b68cf..be6084e 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -313,6 +313,8 @@ data: ## Registration ## enable_registration: false + app_service_config_files: + - /synapse/appservices/guest-register.yaml ## Metrics ### @@ -793,6 +795,9 @@ spec: mountPath: /synapse/secrets - name: signingkey mountPath: /synapse/keys + - name: appservices + mountPath: /synapse/appservices + readOnly: true - name: media mountPath: /synapse/data - name: tmpdir @@ -817,6 +822,12 @@ spec: items: - key: "signing.key" path: signing.key + - name: appservices + secret: + secretName: synapse-guest-appservice-runtime + items: + - key: registration.yaml + path: guest-register.yaml - name: tmpconf emptyDir: {} - name: tmpdir -- 2.47.2 From 376cbf6d705b7ad26507d262a1fac316026052f2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 10:12:37 -0300 Subject: [PATCH 499/684] comms: mint guest sessions via MAS --- .../guest-register-configmap.yaml | 190 +++++++++++------- .../guest-register-deployment.yaml | 27 +-- services/communication/synapse-rendered.yaml | 11 - 3 files changed, 133 insertions(+), 95 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 68ea2db..057b0fe 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -5,6 +5,7 @@ metadata: name: matrix-guest-register data: server.py: | + import base64 import json import os import random @@ -12,11 +13,12 @@ data: from http.server import BaseHTTPRequestHandler, HTTPServer from urllib import error, parse, request - MATRIX_BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") + MAS_BASE = os.environ.get("MAS_BASE", "http://matrix-authentication-service:8080").rstrip("/") SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") - AS_TOKEN = os.environ["AS_TOKEN"] - HS_TOKEN = os.environ["HS_TOKEN"] + MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] + MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") + MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) @@ -25,14 +27,14 @@ data: ADJ = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] NOUN = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] - def _json(method, url, *, token=None, body=None, timeout=20): - headers = {"Content-Type": "application/json"} - if token: - headers["Authorization"] = f"Bearer {token}" + def _json(method, url, *, headers=None, body=None, timeout=20): + hdrs = {"Content-Type": "application/json"} + if headers: + hdrs.update(headers) data = None if body is not None: data = json.dumps(body).encode() - req = request.Request(url, data=data, headers=headers, method=method) + req = request.Request(url, data=data, headers=hdrs, method=method) try: with request.urlopen(req, timeout=timeout) as resp: raw = resp.read() @@ -46,40 +48,96 @@ data: payload = {} return e.code, payload + def _form(method, url, *, headers=None, fields=None, timeout=20): + hdrs = {"Content-Type": "application/x-www-form-urlencoded"} + if headers: + hdrs.update(headers) + data = parse.urlencode(fields or {}).encode() + req = request.Request(url, data=data, headers=hdrs, method=method) + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload + except error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode()) if raw else {} + except Exception: + payload = {} + return e.code, payload + + _admin_token = None + _admin_token_at = 0.0 + + def _mas_admin_access_token(now): + global _admin_token, _admin_token_at + if _admin_token and (now - _admin_token_at) < 300: + return _admin_token + + with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: + client_secret = fh.read().strip() + basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode()).decode() + + status, payload = _form( + "POST", + f"{MAS_BASE}/oauth2/token", + headers={"Authorization": f"Basic {basic}"}, + fields={"grant_type": "client_credentials", "scope": MAS_ADMIN_SCOPE}, + timeout=20, + ) + if status != 200 or "access_token" not in payload: + raise RuntimeError("mas_admin_token_failed") + + _admin_token = payload["access_token"] + _admin_token_at = now + return _admin_token + + def _gql(admin_token, query, variables): + status, payload = _json( + "POST", + f"{MAS_BASE}/graphql", + headers={"Authorization": f"Bearer {admin_token}"}, + body={"query": query, "variables": variables}, + timeout=20, + ) + if status != 200: + raise RuntimeError("gql_http_failed") + if payload.get("errors"): + raise RuntimeError("gql_error") + return payload.get("data") or {} + def _generate_localpart(): return "guest-" + secrets.token_hex(6) def _generate_displayname(): return f"{random.choice(ADJ)}-{random.choice(NOUN)}" - def _set_displayname(access_token, user_id, displayname): - try: - _json( - "PUT", - f"{MATRIX_BASE}/_matrix/client/v3/profile/{parse.quote(user_id)}/displayname", - token=access_token, - body={"displayname": displayname}, - timeout=15, - ) - except Exception: - return - - def _register_user(localpart): - url = f"{MATRIX_BASE}/_matrix/client/v3/register?access_token={parse.quote(AS_TOKEN)}" - status, payload = _json( - "POST", - url, - body={ - "type": "m.login.application_service", - "username": localpart, - "inhibit_login": False, - "initial_device_display_name": "Guest session", - }, - timeout=25, + def _add_user(admin_token, username): + data = _gql( + admin_token, + "mutation($input:AddUserInput!){addUser(input:$input){status user{id}}}", + {"input": {"username": username, "skipHomeserverCheck": True}}, ) - if status != 200 or "access_token" not in payload: - raise RuntimeError("register_failed") - return payload + res = data.get("addUser") or {} + status = res.get("status") + user_id = (res.get("user") or {}).get("id") + return status, user_id + + def _set_display_name(admin_token, user_id, displayname): + _gql( + admin_token, + "mutation($input:SetDisplayNameInput!){setDisplayName(input:$input){status}}", + {"input": {"userId": user_id, "displayName": displayname}}, + ) + + def _create_oauth2_session(admin_token, user_id, scope): + data = _gql( + admin_token, + "mutation($input:CreateOAuth2SessionInput!){createOauth2Session(input:$input){accessToken}}", + {"input": {"userId": user_id, "scope": scope, "permanent": False}}, + ) + return (data.get("createOauth2Session") or {}).get("accessToken") def _rate_check(ip, now): win, cnt = _rate.get(ip, (now, 0)) @@ -91,17 +149,6 @@ data: _rate[ip] = (win, cnt + 1) return True - def _is_appservice_auth(auth_header): - if not auth_header: - return False - parts = auth_header.split(" ", 1) - if len(parts) != 2: - return False - scheme, token = parts - if scheme.lower() != "bearer": - return False - return secrets.compare_digest(token, HS_TOKEN) - class Handler(BaseHTTPRequestHandler): server_version = "matrix-guest-register" @@ -124,31 +171,12 @@ data: self.end_headers() def do_GET(self): # noqa: N802 - parsed = parse.urlparse(self.path) - - if parsed.path in ("/healthz", "/"): + if self.path in ("/healthz", "/"): return self._send_json(200, {"ok": True}) - - if parsed.path.startswith("/_matrix/app/v1/users/"): - if not _is_appservice_auth(self.headers.get("authorization", "")): - return self._send_json(401, {"errcode": "M_UNAUTHORIZED", "error": "unauthorized"}) - return self._send_json(200, {}) - - if parsed.path.startswith("/_matrix/app/v1/rooms/"): - if not _is_appservice_auth(self.headers.get("authorization", "")): - return self._send_json(401, {"errcode": "M_UNAUTHORIZED", "error": "unauthorized"}) - return self._send_json(200, {}) - return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) def do_POST(self): # noqa: N802 parsed = parse.urlparse(self.path) - - if parsed.path.startswith("/_matrix/app/v1/transactions/"): - if not _is_appservice_auth(self.headers.get("authorization", "")): - return self._send_json(401, {"errcode": "M_UNAUTHORIZED", "error": "unauthorized"}) - return self._send_json(200, {}) - if parsed.path not in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) @@ -173,17 +201,35 @@ data: _ = self.rfile.read(length) if length else b"{}" try: + admin_token = _mas_admin_access_token(now) displayname = _generate_displayname() - localpart = _generate_localpart() - login_payload = _register_user(localpart) - _set_displayname(login_payload.get("access_token"), login_payload.get("user_id"), displayname) + + localpart = None + mas_user_id = None + for _ in range(5): + localpart = _generate_localpart() + status, mas_user_id = _add_user(admin_token, localpart) + if status == "ADDED": + break + mas_user_id = None + if not mas_user_id or not localpart: + raise RuntimeError("add_user_failed") + + try: + _set_display_name(admin_token, mas_user_id, displayname) + except Exception: + pass + + access_token = _create_oauth2_session(admin_token, mas_user_id, "openid email") + if not access_token: + raise RuntimeError("session_failed") except Exception: return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) resp = { - "user_id": login_payload.get("user_id") or f"@{localpart}:{SERVER_NAME}", - "access_token": login_payload.get("access_token"), - "device_id": login_payload.get("device_id"), + "user_id": f"@{localpart}:{SERVER_NAME}", + "access_token": access_token, + "device_id": "g-" + secrets.token_hex(6), "home_server": SERVER_NAME, } return self._send_json(200, resp) diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 985e1eb..790cda9 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -35,20 +35,14 @@ spec: value: "1" - name: PORT value: "8080" - - name: MATRIX_BASE - value: http://othrys-synapse-matrix-synapse:8008 + - name: MAS_BASE + value: http://matrix-authentication-service:8080 + - name: MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: MAS_ADMIN_CLIENT_SECRET_FILE + value: /etc/mas/admin-client/client_secret - name: MATRIX_SERVER_NAME value: live.bstein.dev - - name: AS_TOKEN - valueFrom: - secretKeyRef: - name: synapse-guest-appservice-runtime - key: as_token - - name: HS_TOKEN - valueFrom: - secretKeyRef: - name: synapse-guest-appservice-runtime - key: hs_token - name: RATE_WINDOW_SEC value: "60" - name: RATE_MAX @@ -83,6 +77,9 @@ spec: mountPath: /app/server.py subPath: server.py readOnly: true + - name: mas-admin-client + mountPath: /etc/mas/admin-client + readOnly: true command: - python - /app/server.py @@ -93,3 +90,9 @@ spec: items: - key: server.py path: server.py + - name: mas-admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index be6084e..22b68cf 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -313,8 +313,6 @@ data: ## Registration ## enable_registration: false - app_service_config_files: - - /synapse/appservices/guest-register.yaml ## Metrics ### @@ -795,9 +793,6 @@ spec: mountPath: /synapse/secrets - name: signingkey mountPath: /synapse/keys - - name: appservices - mountPath: /synapse/appservices - readOnly: true - name: media mountPath: /synapse/data - name: tmpdir @@ -822,12 +817,6 @@ spec: items: - key: "signing.key" path: signing.key - - name: appservices - secret: - secretName: synapse-guest-appservice-runtime - items: - - key: registration.yaml - path: guest-register.yaml - name: tmpconf emptyDir: {} - name: tmpdir -- 2.47.2 From c111f773b73a55ca727c32ef96b6841ad0e80135 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 10:13:09 -0300 Subject: [PATCH 500/684] nextcloud: reset storage claims --- services/nextcloud-mail-sync/cronjob.yaml | 8 ++++---- services/nextcloud/cronjob.yaml | 8 ++++---- services/nextcloud/deployment.yaml | 8 ++++---- services/nextcloud/maintenance-cronjob.yaml | 8 ++++---- services/nextcloud/pvc.yaml | 9 ++++----- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml index e0705c5..9976d8e 100644 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -80,16 +80,16 @@ spec: volumes: - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-config + claimName: nextcloud-config-v2 - name: nextcloud-custom-apps persistentVolumeClaim: - claimName: nextcloud-custom-apps + claimName: nextcloud-custom-apps-v2 - name: nextcloud-user-data persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-user-data-v2 - name: nextcloud-web persistentVolumeClaim: - claimName: nextcloud-web + claimName: nextcloud-web-v2 - name: sync-script configMap: name: nextcloud-mail-sync-script diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index 721a494..cc0091b 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -35,13 +35,13 @@ spec: volumes: - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-config + claimName: nextcloud-config-v2 - name: nextcloud-custom-apps persistentVolumeClaim: - claimName: nextcloud-custom-apps + claimName: nextcloud-custom-apps-v2 - name: nextcloud-user-data persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-user-data-v2 - name: nextcloud-web persistentVolumeClaim: - claimName: nextcloud-web + claimName: nextcloud-web-v2 diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 0d34e45..295435e 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -300,16 +300,16 @@ spec: volumes: - name: nextcloud-web persistentVolumeClaim: - claimName: nextcloud-web + claimName: nextcloud-web-v2 - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-config + claimName: nextcloud-config-v2 - name: nextcloud-custom-apps persistentVolumeClaim: - claimName: nextcloud-custom-apps + claimName: nextcloud-custom-apps-v2 - name: nextcloud-user-data persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-user-data-v2 - name: nextcloud-config-extra configMap: name: nextcloud-config diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index a6fad0a..618f548 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -55,16 +55,16 @@ spec: volumes: - name: nextcloud-config-pvc persistentVolumeClaim: - claimName: nextcloud-config + claimName: nextcloud-config-v2 - name: nextcloud-custom-apps persistentVolumeClaim: - claimName: nextcloud-custom-apps + claimName: nextcloud-custom-apps-v2 - name: nextcloud-user-data persistentVolumeClaim: - claimName: nextcloud-user-data + claimName: nextcloud-user-data-v2 - name: nextcloud-web persistentVolumeClaim: - claimName: nextcloud-web + claimName: nextcloud-web-v2 - name: maintenance-script configMap: name: nextcloud-maintenance-script diff --git a/services/nextcloud/pvc.yaml b/services/nextcloud/pvc.yaml index a934732..c0779ef 100644 --- a/services/nextcloud/pvc.yaml +++ b/services/nextcloud/pvc.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: nextcloud-config + name: nextcloud-config-v2 namespace: nextcloud spec: accessModes: @@ -15,7 +15,7 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: nextcloud-custom-apps + name: nextcloud-custom-apps-v2 namespace: nextcloud spec: accessModes: @@ -28,7 +28,7 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: nextcloud-web + name: nextcloud-web-v2 namespace: nextcloud spec: accessModes: @@ -41,7 +41,7 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: nextcloud-user-data + name: nextcloud-user-data-v2 namespace: nextcloud spec: accessModes: @@ -50,4 +50,3 @@ spec: requests: storage: 2Ti storageClassName: asteria - volumeName: pvc-d918158d-422f-4928-beaa-27862611fbe5 -- 2.47.2 From 949995a8a0159e9ff8600e6f0e090efb3a4c006d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 10:25:10 -0300 Subject: [PATCH 501/684] comms: add guest register module scaffolding --- ...est-register-shared-secret-ensure-job.yaml | 86 +++++++++++++++++++ services/communication/kustomization.yaml | 2 + ...napse-guest-register-module-configmap.yaml | 70 +++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 services/communication/guest-register-shared-secret-ensure-job.yaml create mode 100644 services/communication/synapse-guest-register-module-configmap.yaml diff --git a/services/communication/guest-register-shared-secret-ensure-job.yaml b/services/communication/guest-register-shared-secret-ensure-job.yaml new file mode 100644 index 0000000..06f2440 --- /dev/null +++ b/services/communication/guest-register-shared-secret-ensure-job.yaml @@ -0,0 +1,86 @@ +# services/communication/guest-register-shared-secret-ensure-job.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: guest-register-secret-writer + namespace: comms +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: guest-register-secret-writer + namespace: comms +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["guest-register-shared-secret-runtime"] + verbs: ["get", "patch", "update"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: guest-register-secret-writer + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: guest-register-secret-writer +subjects: + - kind: ServiceAccount + name: guest-register-secret-writer + namespace: comms +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: guest-register-shared-secret-ensure-1 + namespace: comms +spec: + backoffLimit: 2 + template: + spec: + serviceAccountName: guest-register-secret-writer + restartPolicy: OnFailure + volumes: + - name: work + emptyDir: {} + initContainers: + - name: generate + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + umask 077 + dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n' > /work/secret + chmod 0644 /work/secret + volumeMounts: + - name: work + mountPath: /work + containers: + - name: write + image: bitnami/kubectl:latest + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + if kubectl -n comms get secret guest-register-shared-secret-runtime >/dev/null 2>&1; then + if kubectl -n comms get secret guest-register-shared-secret-runtime -o jsonpath='{.data.secret}' 2>/dev/null | grep -q .; then + exit 0 + fi + else + kubectl -n comms create secret generic guest-register-shared-secret-runtime \ + --from-file=secret=/work/secret >/dev/null + exit 0 + fi + + secret_b64="$(base64 /work/secret | tr -d '\n')" + payload="$(printf '{\"data\":{\"secret\":\"%s\"}}' \"${secret_b64}\")" + kubectl -n comms patch secret guest-register-shared-secret-runtime --type=merge -p \"${payload}\" >/dev/null + volumeMounts: + - name: work + mountPath: /work + diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml index f161e0e..d2352b8 100644 --- a/services/communication/kustomization.yaml +++ b/services/communication/kustomization.yaml @@ -8,6 +8,8 @@ resources: - synapse-signingkey-ensure-job.yaml - synapse-seeder-admin-ensure-job.yaml - synapse-guest-appservice-secret-ensure-job.yaml + - guest-register-shared-secret-ensure-job.yaml + - synapse-guest-register-module-configmap.yaml - mas-configmap.yaml - mas-admin-client-secret-ensure-job.yaml - mas-deployment.yaml diff --git a/services/communication/synapse-guest-register-module-configmap.yaml b/services/communication/synapse-guest-register-module-configmap.yaml new file mode 100644 index 0000000..37da25d --- /dev/null +++ b/services/communication/synapse-guest-register-module-configmap.yaml @@ -0,0 +1,70 @@ +# services/communication/synapse-guest-register-module-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: synapse-guest-register-module +data: + guest_register.py: | + import secrets + + import synapse.api.auth + from synapse.api.errors import Codes, SynapseError + from synapse.http.server import DirectServeJsonResource + from synapse.http.servlet import parse_json_object_from_request + + + class GuestRegisterResource(DirectServeJsonResource): + def __init__(self, hs, shared_secret: str, header_name: str): + super().__init__(clock=hs.get_clock()) + self._hs = hs + self._shared_secret = shared_secret + self._header_name = header_name + + async def _async_render_POST(self, request): # noqa: N802 + provided = request.requestHeaders.getRawHeaders(self._header_name) + if not provided or not secrets.compare_digest(provided[0], self._shared_secret): + raise SynapseError(403, "Forbidden", errcode=Codes.FORBIDDEN) + + body = parse_json_object_from_request(request) + initial_device_display_name = body.get("initial_device_display_name") + if not isinstance(initial_device_display_name, str): + initial_device_display_name = None + + reg = self._hs.get_registration_handler() + address = request.getClientAddress().host + + user_id = await reg.register_user(make_guest=True, address=address) + + device_id = synapse.api.auth.GUEST_DEVICE_ID + device_id, access_token, valid_until_ms, refresh_token = await reg.register_device( + user_id, + device_id, + initial_device_display_name, + is_guest=True, + ) + + result = { + "user_id": user_id, + "device_id": device_id, + "access_token": access_token, + "home_server": self._hs.hostname, + } + + if valid_until_ms is not None: + result["expires_in_ms"] = valid_until_ms - self._hs.get_clock().time_msec() + + if refresh_token is not None: + result["refresh_token"] = refresh_token + + return 200, result + + + class GuestRegisterModule: + def __init__(self, config, api): + shared_secret = config["shared_secret"] + header_name = config.get("header_name", "x-guest-register-secret") + path = config.get("path", "/_matrix/client/v3/_guest_register") + + hs = api._hs # noqa: SLF001 + api.register_web_resource(path, GuestRegisterResource(hs, shared_secret, header_name)) + -- 2.47.2 From 44404aa2f2aa4534df1444365a10642dcc4d6e5e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 10:34:52 -0300 Subject: [PATCH 502/684] comms: restore Element guest registration --- .../guest-register-configmap.yaml | 154 +++--------------- .../guest-register-deployment.yaml | 22 +-- services/communication/synapse-rendered.yaml | 26 +++ 3 files changed, 52 insertions(+), 150 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 057b0fe..22c27e8 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -5,28 +5,20 @@ metadata: name: matrix-guest-register data: server.py: | - import base64 import json import os - import random - import secrets from http.server import BaseHTTPRequestHandler, HTTPServer from urllib import error, parse, request - MAS_BASE = os.environ.get("MAS_BASE", "http://matrix-authentication-service:8080").rstrip("/") - SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") - - MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] - MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") - MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") + SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") + GUEST_REGISTER_SHARED_SECRET = os.environ["GUEST_REGISTER_SHARED_SECRET"] + GUEST_REGISTER_HEADER = os.environ.get("GUEST_REGISTER_HEADER", "x-guest-register-secret") + GUEST_REGISTER_PATH = os.environ.get("GUEST_REGISTER_PATH", "/_matrix/client/v3/_guest_register") RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) _rate = {} # ip -> [window_start, count] - ADJ = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] - NOUN = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] - def _json(method, url, *, headers=None, body=None, timeout=20): hdrs = {"Content-Type": "application/json"} if headers: @@ -48,97 +40,6 @@ data: payload = {} return e.code, payload - def _form(method, url, *, headers=None, fields=None, timeout=20): - hdrs = {"Content-Type": "application/x-www-form-urlencoded"} - if headers: - hdrs.update(headers) - data = parse.urlencode(fields or {}).encode() - req = request.Request(url, data=data, headers=hdrs, method=method) - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read() - payload = json.loads(raw.decode()) if raw else {} - return resp.status, payload - except error.HTTPError as e: - raw = e.read() - try: - payload = json.loads(raw.decode()) if raw else {} - except Exception: - payload = {} - return e.code, payload - - _admin_token = None - _admin_token_at = 0.0 - - def _mas_admin_access_token(now): - global _admin_token, _admin_token_at - if _admin_token and (now - _admin_token_at) < 300: - return _admin_token - - with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: - client_secret = fh.read().strip() - basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode()).decode() - - status, payload = _form( - "POST", - f"{MAS_BASE}/oauth2/token", - headers={"Authorization": f"Basic {basic}"}, - fields={"grant_type": "client_credentials", "scope": MAS_ADMIN_SCOPE}, - timeout=20, - ) - if status != 200 or "access_token" not in payload: - raise RuntimeError("mas_admin_token_failed") - - _admin_token = payload["access_token"] - _admin_token_at = now - return _admin_token - - def _gql(admin_token, query, variables): - status, payload = _json( - "POST", - f"{MAS_BASE}/graphql", - headers={"Authorization": f"Bearer {admin_token}"}, - body={"query": query, "variables": variables}, - timeout=20, - ) - if status != 200: - raise RuntimeError("gql_http_failed") - if payload.get("errors"): - raise RuntimeError("gql_error") - return payload.get("data") or {} - - def _generate_localpart(): - return "guest-" + secrets.token_hex(6) - - def _generate_displayname(): - return f"{random.choice(ADJ)}-{random.choice(NOUN)}" - - def _add_user(admin_token, username): - data = _gql( - admin_token, - "mutation($input:AddUserInput!){addUser(input:$input){status user{id}}}", - {"input": {"username": username, "skipHomeserverCheck": True}}, - ) - res = data.get("addUser") or {} - status = res.get("status") - user_id = (res.get("user") or {}).get("id") - return status, user_id - - def _set_display_name(admin_token, user_id, displayname): - _gql( - admin_token, - "mutation($input:SetDisplayNameInput!){setDisplayName(input:$input){status}}", - {"input": {"userId": user_id, "displayName": displayname}}, - ) - - def _create_oauth2_session(admin_token, user_id, scope): - data = _gql( - admin_token, - "mutation($input:CreateOAuth2SessionInput!){createOauth2Session(input:$input){accessToken}}", - {"input": {"userId": user_id, "scope": scope, "permanent": False}}, - ) - return (data.get("createOauth2Session") or {}).get("accessToken") - def _rate_check(ip, now): win, cnt = _rate.get(ip, (now, 0)) if now - win > RATE_WINDOW_SEC: @@ -198,41 +99,24 @@ data: return self._send_json(429, {"errcode": "M_LIMIT_EXCEEDED", "error": "rate_limited"}) length = int(self.headers.get("content-length", "0") or "0") - _ = self.rfile.read(length) if length else b"{}" - + raw = self.rfile.read(length) if length else b"{}" try: - admin_token = _mas_admin_access_token(now) - displayname = _generate_displayname() - - localpart = None - mas_user_id = None - for _ in range(5): - localpart = _generate_localpart() - status, mas_user_id = _add_user(admin_token, localpart) - if status == "ADDED": - break - mas_user_id = None - if not mas_user_id or not localpart: - raise RuntimeError("add_user_failed") - - try: - _set_display_name(admin_token, mas_user_id, displayname) - except Exception: - pass - - access_token = _create_oauth2_session(admin_token, mas_user_id, "openid email") - if not access_token: - raise RuntimeError("session_failed") + body = json.loads(raw.decode()) if raw else {} + if not isinstance(body, dict): + body = {} except Exception: - return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) + body = {} - resp = { - "user_id": f"@{localpart}:{SERVER_NAME}", - "access_token": access_token, - "device_id": "g-" + secrets.token_hex(6), - "home_server": SERVER_NAME, - } - return self._send_json(200, resp) + status, payload = _json( + "POST", + f"{SYNAPSE_BASE}{GUEST_REGISTER_PATH}", + headers={GUEST_REGISTER_HEADER: GUEST_REGISTER_SHARED_SECRET}, + body=body, + timeout=20, + ) + if "refresh_token" in payload: + payload.pop("refresh_token", None) + return self._send_json(status, payload) def main(): port = int(os.environ.get("PORT", "8080")) diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 790cda9..5818e64 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -35,12 +35,13 @@ spec: value: "1" - name: PORT value: "8080" - - name: MAS_BASE - value: http://matrix-authentication-service:8080 - - name: MAS_ADMIN_CLIENT_ID - value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /etc/mas/admin-client/client_secret + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: GUEST_REGISTER_SHARED_SECRET + valueFrom: + secretKeyRef: + name: guest-register-shared-secret-runtime + key: secret - name: MATRIX_SERVER_NAME value: live.bstein.dev - name: RATE_WINDOW_SEC @@ -77,9 +78,6 @@ spec: mountPath: /app/server.py subPath: server.py readOnly: true - - name: mas-admin-client - mountPath: /etc/mas/admin-client - readOnly: true command: - python - /app/server.py @@ -90,9 +88,3 @@ spec: items: - key: server.py path: server.py - - name: mas-admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 22b68cf..749ae41 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -313,6 +313,12 @@ data: ## Registration ## enable_registration: false + modules: + - module: guest_register.GuestRegisterModule + config: + shared_secret: "@@GUEST_REGISTER_SECRET@@" + header_name: x-guest-register-secret + path: /_matrix/client/v3/_guest_register ## Metrics ### @@ -702,6 +708,7 @@ spec: export OIDC_CLIENT_SECRET_ESCAPED=$(echo "${OIDC_CLIENT_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export TURN_SECRET_ESCAPED=$(echo "${TURN_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export MAS_SHARED_SECRET_ESCAPED=$(echo "${MAS_SHARED_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ + export GUEST_REGISTER_SECRET_ESCAPED=$(echo "${GUEST_REGISTER_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export MACAROON_SECRET_KEY_ESCAPED=$(echo "${MACAROON_SECRET_KEY:-}" | sed 's/[\\/&]/\\&/g') && \ cat /synapse/secrets/*.yaml | \ sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ @@ -718,6 +725,9 @@ spec: if [ -n "${MAS_SHARED_SECRET_ESCAPED}" ]; then \ sed -i "s/@@MAS_SHARED_SECRET@@/${MAS_SHARED_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ fi; \ + if [ -n "${GUEST_REGISTER_SECRET_ESCAPED}" ]; then \ + sed -i "s/@@GUEST_REGISTER_SECRET@@/${GUEST_REGISTER_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ + fi; \ if [ -n "${MACAROON_SECRET_KEY_ESCAPED}" ]; then \ sed -i "s/@@MACAROON_SECRET_KEY@@/${MACAROON_SECRET_KEY_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ fi @@ -750,11 +760,18 @@ spec: secretKeyRef: name: mas-secrets-runtime key: matrix_shared_secret + - name: GUEST_REGISTER_SECRET + valueFrom: + secretKeyRef: + name: guest-register-shared-secret-runtime + key: secret - name: MACAROON_SECRET_KEY valueFrom: secretKeyRef: name: synapse-macaroon key: macaroon_secret_key + - name: PYTHONPATH + value: /synapse/modules image: "ghcr.io/element-hq/synapse:v1.144.0" imagePullPolicy: IfNotPresent securityContext: @@ -791,6 +808,9 @@ spec: mountPath: /synapse/config/conf.d - name: secrets mountPath: /synapse/secrets + - name: modules + mountPath: /synapse/modules + readOnly: true - name: signingkey mountPath: /synapse/keys - name: media @@ -811,6 +831,12 @@ spec: - name: secrets secret: secretName: othrys-synapse-matrix-synapse + - name: modules + configMap: + name: synapse-guest-register-module + items: + - key: guest_register.py + path: guest_register.py - name: signingkey secret: secretName: "othrys-synapse-signingkey" -- 2.47.2 From eb1cb8cb0093e7a5e9da8d7e0a8d02965a16c838 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 10:42:11 -0300 Subject: [PATCH 503/684] comms: move guest register module endpoint --- services/communication/guest-register-configmap.yaml | 2 +- .../communication/synapse-guest-register-module-configmap.yaml | 3 +-- services/communication/synapse-rendered.yaml | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 22c27e8..ef34a7c 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -13,7 +13,7 @@ data: SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") GUEST_REGISTER_SHARED_SECRET = os.environ["GUEST_REGISTER_SHARED_SECRET"] GUEST_REGISTER_HEADER = os.environ.get("GUEST_REGISTER_HEADER", "x-guest-register-secret") - GUEST_REGISTER_PATH = os.environ.get("GUEST_REGISTER_PATH", "/_matrix/client/v3/_guest_register") + GUEST_REGISTER_PATH = os.environ.get("GUEST_REGISTER_PATH", "/_matrix/_guest_register") RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) diff --git a/services/communication/synapse-guest-register-module-configmap.yaml b/services/communication/synapse-guest-register-module-configmap.yaml index 37da25d..8295f56 100644 --- a/services/communication/synapse-guest-register-module-configmap.yaml +++ b/services/communication/synapse-guest-register-module-configmap.yaml @@ -63,8 +63,7 @@ data: def __init__(self, config, api): shared_secret = config["shared_secret"] header_name = config.get("header_name", "x-guest-register-secret") - path = config.get("path", "/_matrix/client/v3/_guest_register") + path = config.get("path", "/_matrix/_guest_register") hs = api._hs # noqa: SLF001 api.register_web_resource(path, GuestRegisterResource(hs, shared_secret, header_name)) - diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 749ae41..535e0e0 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -318,7 +318,7 @@ data: config: shared_secret: "@@GUEST_REGISTER_SECRET@@" header_name: x-guest-register-secret - path: /_matrix/client/v3/_guest_register + path: /_matrix/_guest_register ## Metrics ### -- 2.47.2 From 49ec3d1be80e347a723604ce3d52e6c42717f21f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 10:46:33 -0300 Subject: [PATCH 504/684] comms: restart synapse + guest proxy --- services/communication/guest-register-deployment.yaml | 2 ++ services/communication/synapse-rendered.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 5818e64..3a44bc1 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -12,6 +12,8 @@ spec: app.kubernetes.io/name: matrix-guest-register template: metadata: + annotations: + checksum/config: guest-register-proxy-1 labels: app.kubernetes.io/name: matrix-guest-register spec: diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index 535e0e0..e06f351 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -684,7 +684,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-6 + checksum/config: manual-rtc-enable-7 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From 658e434e65d7823886b24fca7e18799687fb17b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 11:14:28 -0300 Subject: [PATCH 505/684] comms: return 405 for GET /register --- services/communication/guest-register-configmap.yaml | 5 ++++- services/communication/guest-register-deployment.yaml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index ef34a7c..55a2d7f 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -72,8 +72,11 @@ data: self.end_headers() def do_GET(self): # noqa: N802 - if self.path in ("/healthz", "/"): + parsed = parse.urlparse(self.path) + if parsed.path in ("/healthz", "/"): return self._send_json(200, {"ok": True}) + if parsed.path in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): + return self._send_json(405, {"errcode": "M_UNRECOGNIZED", "error": "Unrecognized request"}) return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) def do_POST(self): # noqa: N802 diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 3a44bc1..2eb625e 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: guest-register-proxy-1 + checksum/config: guest-register-proxy-2 labels: app.kubernetes.io/name: matrix-guest-register spec: -- 2.47.2 From c950c32e93e56b5c73afd2780e95120ebc79ba98 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 11:17:33 -0300 Subject: [PATCH 506/684] comms: re-enable guest name randomizer --- services/communication/guest-name-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index 6bd0761..cd5b9aa 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -6,7 +6,7 @@ metadata: namespace: comms spec: schedule: "*/1 * * * *" - suspend: true + suspend: false jobTemplate: spec: backoffLimit: 0 -- 2.47.2 From 695e1ec322cc48e4239524de83ff071723aa259a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 11:23:53 -0300 Subject: [PATCH 507/684] comms: set guest displayname at registration --- services/communication/guest-name-job.yaml | 2 +- ...napse-guest-register-module-configmap.yaml | 20 +++++++++++++++++++ services/communication/synapse-rendered.yaml | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index cd5b9aa..6bd0761 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -6,7 +6,7 @@ metadata: namespace: comms spec: schedule: "*/1 * * * *" - suspend: false + suspend: true jobTemplate: spec: backoffLimit: 0 diff --git a/services/communication/synapse-guest-register-module-configmap.yaml b/services/communication/synapse-guest-register-module-configmap.yaml index 8295f56..3afb3d9 100644 --- a/services/communication/synapse-guest-register-module-configmap.yaml +++ b/services/communication/synapse-guest-register-module-configmap.yaml @@ -6,11 +6,13 @@ metadata: data: guest_register.py: | import secrets + import random import synapse.api.auth from synapse.api.errors import Codes, SynapseError from synapse.http.server import DirectServeJsonResource from synapse.http.servlet import parse_json_object_from_request + from synapse.types import UserID, create_requester class GuestRegisterResource(DirectServeJsonResource): @@ -20,6 +22,9 @@ data: self._shared_secret = shared_secret self._header_name = header_name + self._adj = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] + self._noun = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] + async def _async_render_POST(self, request): # noqa: N802 provided = request.requestHeaders.getRawHeaders(self._header_name) if not provided or not secrets.compare_digest(provided[0], self._shared_secret): @@ -43,6 +48,21 @@ data: is_guest=True, ) + displayname = body.get("displayname") + if not isinstance(displayname, str) or not displayname.strip(): + displayname = f"{random.choice(self._adj)}-{random.choice(self._noun)}" + + try: + requester = create_requester(user_id, is_guest=True, device_id=device_id) + await self._hs.get_profile_handler().set_displayname( + UserID.from_string(user_id), + requester, + displayname, + propagate=False, + ) + except Exception: + pass + result = { "user_id": user_id, "device_id": device_id, diff --git a/services/communication/synapse-rendered.yaml b/services/communication/synapse-rendered.yaml index e06f351..9155044 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/communication/synapse-rendered.yaml @@ -684,7 +684,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-7 + checksum/config: manual-rtc-enable-8 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From cd4b963db41e8a9e22382f70be7d73181edd32c2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 19:09:13 -0300 Subject: [PATCH 508/684] comms: serve register flows for guest UI --- services/communication/guest-register-configmap.yaml | 2 +- services/communication/guest-register-deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 55a2d7f..78cc789 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -76,7 +76,7 @@ data: if parsed.path in ("/healthz", "/"): return self._send_json(200, {"ok": True}) if parsed.path in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): - return self._send_json(405, {"errcode": "M_UNRECOGNIZED", "error": "Unrecognized request"}) + return self._send_json(200, {"flows": [{"stages": []}]}) return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) def do_POST(self): # noqa: N802 diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 2eb625e..cd13f63 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: guest-register-proxy-2 + checksum/config: guest-register-proxy-3 labels: app.kubernetes.io/name: matrix-guest-register spec: -- 2.47.2 From 70e40b281f4c91ec6cdc937fd48698380ebe2f08 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 19:51:33 -0300 Subject: [PATCH 509/684] comms: issue guest tokens via MAS --- .../guest-register-configmap.yaml | 149 ++++++++++++++++-- .../guest-register-deployment.yaml | 24 ++- 2 files changed, 151 insertions(+), 22 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index 78cc789..b051a10 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -5,20 +5,28 @@ metadata: name: matrix-guest-register data: server.py: | + import base64 import json import os + import random + import secrets from http.server import BaseHTTPRequestHandler, HTTPServer from urllib import error, parse, request - SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") - GUEST_REGISTER_SHARED_SECRET = os.environ["GUEST_REGISTER_SHARED_SECRET"] - GUEST_REGISTER_HEADER = os.environ.get("GUEST_REGISTER_HEADER", "x-guest-register-secret") - GUEST_REGISTER_PATH = os.environ.get("GUEST_REGISTER_PATH", "/_matrix/_guest_register") + MAS_BASE = os.environ.get("MAS_BASE", "http://matrix-authentication-service:8080").rstrip("/") + SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") + + MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] + MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") + MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) _rate = {} # ip -> [window_start, count] + ADJ = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] + NOUN = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] + def _json(method, url, *, headers=None, body=None, timeout=20): hdrs = {"Content-Type": "application/json"} if headers: @@ -40,6 +48,97 @@ data: payload = {} return e.code, payload + def _form(method, url, *, headers=None, fields=None, timeout=20): + hdrs = {"Content-Type": "application/x-www-form-urlencoded"} + if headers: + hdrs.update(headers) + data = parse.urlencode(fields or {}).encode() + req = request.Request(url, data=data, headers=hdrs, method=method) + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload + except error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode()) if raw else {} + except Exception: + payload = {} + return e.code, payload + + _admin_token = None + _admin_token_at = 0.0 + + def _mas_admin_access_token(now): + global _admin_token, _admin_token_at + if _admin_token and (now - _admin_token_at) < 300: + return _admin_token + + with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: + client_secret = fh.read().strip() + basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode()).decode() + + status, payload = _form( + "POST", + f"{MAS_BASE}/oauth2/token", + headers={"Authorization": f"Basic {basic}"}, + fields={"grant_type": "client_credentials", "scope": MAS_ADMIN_SCOPE}, + timeout=20, + ) + if status != 200 or "access_token" not in payload: + raise RuntimeError("mas_admin_token_failed") + + _admin_token = payload["access_token"] + _admin_token_at = now + return _admin_token + + def _gql(admin_token, query, variables): + status, payload = _json( + "POST", + f"{MAS_BASE}/graphql", + headers={"Authorization": f"Bearer {admin_token}"}, + body={"query": query, "variables": variables}, + timeout=20, + ) + if status != 200: + raise RuntimeError("gql_http_failed") + if payload.get("errors"): + raise RuntimeError("gql_error") + return payload.get("data") or {} + + def _generate_localpart(): + return "guest-" + secrets.token_hex(6) + + def _generate_displayname(): + return f"{random.choice(ADJ)}-{random.choice(NOUN)}" + + def _add_user(admin_token, username): + data = _gql( + admin_token, + "mutation($input:AddUserInput!){addUser(input:$input){status user{id username}}}", + {"input": {"username": username, "skipHomeserverCheck": True}}, + ) + res = data.get("addUser") or {} + status = res.get("status") + user = res.get("user") or {} + return status, user.get("id"), user.get("username") + + def _set_display_name(admin_token, user_id, displayname): + _gql( + admin_token, + "mutation($input:SetDisplayNameInput!){setDisplayName(input:$input){status}}", + {"input": {"userId": user_id, "displayName": displayname}}, + ) + + def _create_oauth2_session(admin_token, user_id, scope): + data = _gql( + admin_token, + "mutation($input:CreateOAuth2SessionInput!){createOauth2Session(input:$input){accessToken}}", + {"input": {"userId": user_id, "scope": scope, "permanent": False}}, + ) + return (data.get("createOauth2Session") or {}).get("accessToken") + def _rate_check(ip, now): win, cnt = _rate.get(ip, (now, 0)) if now - win > RATE_WINDOW_SEC: @@ -109,17 +208,39 @@ data: body = {} except Exception: body = {} + try: + admin_token = _mas_admin_access_token(now) + displayname = _generate_displayname() - status, payload = _json( - "POST", - f"{SYNAPSE_BASE}{GUEST_REGISTER_PATH}", - headers={GUEST_REGISTER_HEADER: GUEST_REGISTER_SHARED_SECRET}, - body=body, - timeout=20, - ) - if "refresh_token" in payload: - payload.pop("refresh_token", None) - return self._send_json(status, payload) + localpart = None + mas_user_id = None + for _ in range(5): + localpart = _generate_localpart() + status, mas_user_id, _ = _add_user(admin_token, localpart) + if status == "ADDED": + break + mas_user_id = None + if not mas_user_id or not localpart: + raise RuntimeError("add_user_failed") + + try: + _set_display_name(admin_token, mas_user_id, displayname) + except Exception: + pass + + access_token = _create_oauth2_session(admin_token, mas_user_id, "urn:matrix:client:api:*") + if not access_token: + raise RuntimeError("session_failed") + except Exception: + return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) + + resp = { + "user_id": f"@{localpart}:{SERVER_NAME}", + "access_token": access_token, + "device_id": "guest_device", + "home_server": SERVER_NAME, + } + return self._send_json(200, resp) def main(): port = int(os.environ.get("PORT", "8080")) diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index cd13f63..41833b2 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: guest-register-proxy-3 + checksum/config: guest-register-proxy-4 labels: app.kubernetes.io/name: matrix-guest-register spec: @@ -37,13 +37,12 @@ spec: value: "1" - name: PORT value: "8080" - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: GUEST_REGISTER_SHARED_SECRET - valueFrom: - secretKeyRef: - name: guest-register-shared-secret-runtime - key: secret + - name: MAS_BASE + value: http://matrix-authentication-service:8080 + - name: MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: MAS_ADMIN_CLIENT_SECRET_FILE + value: /etc/mas/admin-client/client_secret - name: MATRIX_SERVER_NAME value: live.bstein.dev - name: RATE_WINDOW_SEC @@ -80,6 +79,9 @@ spec: mountPath: /app/server.py subPath: server.py readOnly: true + - name: mas-admin-client + mountPath: /etc/mas/admin-client + readOnly: true command: - python - /app/server.py @@ -90,3 +92,9 @@ spec: items: - key: server.py path: server.py + - name: mas-admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret -- 2.47.2 From e44ee3ab2d8be860a719cd6be0fe03e1a34382d1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 7 Jan 2026 20:02:03 -0300 Subject: [PATCH 510/684] comms: fix guest registration via MAS admin API --- .../guest-register-configmap.yaml | 89 ++++++++++--------- .../guest-register-deployment.yaml | 8 +- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/services/communication/guest-register-configmap.yaml b/services/communication/guest-register-configmap.yaml index b051a10..804c7d7 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/communication/guest-register-configmap.yaml @@ -14,11 +14,14 @@ data: from urllib import error, parse, request MAS_BASE = os.environ.get("MAS_BASE", "http://matrix-authentication-service:8080").rstrip("/") + MAS_ADMIN_API_BASE = os.environ.get("MAS_ADMIN_API_BASE", "http://matrix-authentication-service:8081/api/admin/v1").rstrip("/") + SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") + SESSION_TTL_SEC = int(os.environ.get("SESSION_TTL_SEC", "43200")) RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) @@ -93,51 +96,52 @@ data: _admin_token_at = now return _admin_token - def _gql(admin_token, query, variables): - status, payload = _json( - "POST", - f"{MAS_BASE}/graphql", - headers={"Authorization": f"Bearer {admin_token}"}, - body={"query": query, "variables": variables}, - timeout=20, - ) - if status != 200: - raise RuntimeError("gql_http_failed") - if payload.get("errors"): - raise RuntimeError("gql_error") - return payload.get("data") or {} - def _generate_localpart(): return "guest-" + secrets.token_hex(6) def _generate_displayname(): return f"{random.choice(ADJ)}-{random.choice(NOUN)}" - def _add_user(admin_token, username): - data = _gql( - admin_token, - "mutation($input:AddUserInput!){addUser(input:$input){status user{id username}}}", - {"input": {"username": username, "skipHomeserverCheck": True}}, - ) - res = data.get("addUser") or {} - status = res.get("status") - user = res.get("user") or {} - return status, user.get("id"), user.get("username") - - def _set_display_name(admin_token, user_id, displayname): - _gql( - admin_token, - "mutation($input:SetDisplayNameInput!){setDisplayName(input:$input){status}}", - {"input": {"userId": user_id, "displayName": displayname}}, + def _admin_api(admin_token, method, path, body=None): + return _json( + method, + f"{MAS_ADMIN_API_BASE}{path}", + headers={"Authorization": f"Bearer {admin_token}"}, + body=body, + timeout=20, ) - def _create_oauth2_session(admin_token, user_id, scope): - data = _gql( + def _create_user(admin_token, username): + status, payload = _admin_api(admin_token, "POST", "/users", {"username": username}) + if status != 201: + return status, None + user = payload.get("data") or {} + return status, user.get("id") + + def _create_session(admin_token, user_id, scope): + status, payload = _admin_api( admin_token, - "mutation($input:CreateOAuth2SessionInput!){createOauth2Session(input:$input){accessToken}}", - {"input": {"userId": user_id, "scope": scope, "permanent": False}}, + "POST", + "/personal-sessions", + { + "actor_user_id": user_id, + "human_name": "guest session", + "scope": scope, + "expires_in": SESSION_TTL_SEC, + }, + ) + if status != 201: + return None + return (payload.get("data", {}).get("attributes", {}) or {}).get("access_token") + + def _set_display_name(access_token, user_id, displayname): + _json( + "PUT", + f"{SYNAPSE_BASE}/_matrix/client/v3/profile/{parse.quote(user_id, safe='')}/displayname", + headers={"Authorization": f"Bearer {access_token}"}, + body={"displayname": displayname}, + timeout=20, ) - return (data.get("createOauth2Session") or {}).get("accessToken") def _rate_check(ip, now): win, cnt = _rate.get(ip, (now, 0)) @@ -216,21 +220,20 @@ data: mas_user_id = None for _ in range(5): localpart = _generate_localpart() - status, mas_user_id, _ = _add_user(admin_token, localpart) - if status == "ADDED": + status, mas_user_id = _create_user(admin_token, localpart) + if status == 201 and mas_user_id: break mas_user_id = None if not mas_user_id or not localpart: raise RuntimeError("add_user_failed") - try: - _set_display_name(admin_token, mas_user_id, displayname) - except Exception: - pass - - access_token = _create_oauth2_session(admin_token, mas_user_id, "urn:matrix:client:api:*") + access_token = _create_session(admin_token, mas_user_id, "urn:matrix:client:api:*") if not access_token: raise RuntimeError("session_failed") + try: + _set_display_name(access_token, f"@{localpart}:{SERVER_NAME}", displayname) + except Exception: + pass except Exception: return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) diff --git a/services/communication/guest-register-deployment.yaml b/services/communication/guest-register-deployment.yaml index 41833b2..00e430c 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/communication/guest-register-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: guest-register-proxy-4 + checksum/config: guest-register-proxy-5 labels: app.kubernetes.io/name: matrix-guest-register spec: @@ -43,6 +43,12 @@ spec: value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE value: /etc/mas/admin-client/client_secret + - name: MAS_ADMIN_API_BASE + value: http://matrix-authentication-service:8081/api/admin/v1 + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: SESSION_TTL_SEC + value: "43200" - name: MATRIX_SERVER_NAME value: live.bstein.dev - name: RATE_WINDOW_SEC -- 2.47.2 From 47f0ff7c015c7ac8278cf06afb963b684b7dfe0d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 00:13:40 -0300 Subject: [PATCH 511/684] comms: fix guest rename job with MAS admin sessions --- services/communication/guest-name-job.yaml | 131 +++++++++++++++++---- 1 file changed, 106 insertions(+), 25 deletions(-) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index 6bd0761..56df3f2 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -6,28 +6,40 @@ metadata: namespace: comms spec: schedule: "*/1 * * * *" - suspend: true + suspend: false jobTemplate: spec: backoffLimit: 0 template: spec: restartPolicy: Never + volumes: + - name: mas-admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret containers: - name: rename image: python:3.11-slim + volumeMounts: + - name: mas-admin-client + mountPath: /etc/mas-admin-client + readOnly: true env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + - name: MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: MAS_ADMIN_CLIENT_SECRET_FILE + value: /etc/mas-admin-client/client_secret + - name: MAS_ADMIN_API_BASE + value: http://matrix-authentication-service:8081/api/admin/v1 + - name: MAS_TOKEN_URL + value: http://matrix-authentication-service:8080/oauth2/token - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password command: - /bin/sh - -c @@ -35,24 +47,77 @@ spec: set -euo pipefail pip install --no-cache-dir requests >/dev/null python - <<'PY' - import os, random, requests, urllib.parse + import base64 + import os + import random + import requests + import urllib.parse - ADJ = ["brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty"] - NOUN = ["otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit"] + ADJ = [ + "brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty", + "amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow", + "quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind", + ] + NOUN = [ + "otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit", + "breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak", + "pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr", + ] BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ.get("AUTH_BASE", BASE) + MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] + MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] + MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") + MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] + SEEDER_USER = os.environ["SEEDER_USER"] ROOM_ALIAS = "#othrys:live.bstein.dev" - def login(user, password): - r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }) + def mas_admin_token(): + with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: + secret = f.read().strip() + basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode() + r = requests.post( + MAS_TOKEN_URL, + headers={"Authorization": f"Basic {basic}"}, + data={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, + timeout=30, + ) r.raise_for_status() return r.json()["access_token"] + def mas_user_id(token, username): + r = requests.get( + f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}", + headers={"Authorization": f"Bearer {token}"}, + timeout=30, + ) + r.raise_for_status() + return r.json()["data"]["id"] + + def mas_personal_session(token, user_id): + r = requests.post( + f"{MAS_ADMIN_API_BASE}/personal-sessions", + headers={"Authorization": f"Bearer {token}"}, + json={ + "actor_user_id": user_id, + "human_name": "guest-name-randomizer", + "scope": "urn:matrix:client:api:*", + "expires_in": 300, + }, + timeout=30, + ) + r.raise_for_status() + data = r.json().get("data", {}).get("attributes", {}) or {} + return data["access_token"], r.json()["data"]["id"] + + def mas_revoke_session(token, session_id): + requests.post( + f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(session_id)}/revoke", + headers={"Authorization": f"Bearer {token}"}, + json={}, + timeout=30, + ) + def resolve_alias(token, alias): headers = {"Authorization": f"Bearer {token}"} enc = urllib.parse.quote(alias) @@ -63,6 +128,7 @@ spec: def list_guests(token): headers = {"Authorization": f"Bearer {token}"} users = [] + existing_names = set() from_token = None while True: url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" @@ -73,12 +139,14 @@ spec: data = res.json() for u in data.get("users", []): disp = u.get("displayname", "") + if disp: + existing_names.add(disp) if u.get("is_guest") and (not disp or disp.isdigit()): users.append(u["name"]) from_token = data.get("next_token") if not from_token: break - return users + return users, existing_names def set_displayname(token, room_id, user_id, name): headers = {"Authorization": f"Bearer {token}"} @@ -93,10 +161,23 @@ spec: content["displayname"] = name requests.put(state_url, headers=headers, json=content) - token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) - room_id = resolve_alias(token, ROOM_ALIAS) - guests = list_guests(token) - for g in guests: - new = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - set_displayname(token, room_id, g, new) + admin_token = mas_admin_token() + seeder_id = mas_user_id(admin_token, SEEDER_USER) + token, session_id = mas_personal_session(admin_token, seeder_id) + try: + room_id = resolve_alias(token, ROOM_ALIAS) + guests, existing = list_guests(token) + for g in guests: + new = None + for _ in range(30): + candidate = f\"{random.choice(ADJ)}-{random.choice(NOUN)}\" + if candidate not in existing: + new = candidate + existing.add(candidate) + break + if not new: + continue + set_displayname(token, room_id, g, new) + finally: + mas_revoke_session(admin_token, session_id) PY -- 2.47.2 From ca49c8408666cfc249860bfffe2423467b9a839e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 00:15:41 -0300 Subject: [PATCH 512/684] comms: fix guest randomizer syntax --- services/communication/guest-name-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index 56df3f2..5e9a885 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -170,7 +170,7 @@ spec: for g in guests: new = None for _ in range(30): - candidate = f\"{random.choice(ADJ)}-{random.choice(NOUN)}\" + candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" if candidate not in existing: new = candidate existing.add(candidate) -- 2.47.2 From 57e414adc67da773a2b02a5e21ed9cd379ef75cd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 00:20:55 -0300 Subject: [PATCH 513/684] comms: rerun synapse admin seeder job --- services/communication/synapse-seeder-admin-ensure-job.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/services/communication/synapse-seeder-admin-ensure-job.yaml b/services/communication/synapse-seeder-admin-ensure-job.yaml index 8d92ccd..b21f573 100644 --- a/services/communication/synapse-seeder-admin-ensure-job.yaml +++ b/services/communication/synapse-seeder-admin-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-seeder-admin-ensure-1 + name: synapse-seeder-admin-ensure-2 namespace: comms spec: backoffLimit: 2 @@ -34,4 +34,3 @@ spec: psql -v ON_ERROR_STOP=1 <<'SQL' UPDATE users SET admin = 1 WHERE name = '@othrys-seeder:live.bstein.dev'; SQL - -- 2.47.2 From d3ac4726e2022862ca1b4081cead6708eafb46ca Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 00:26:20 -0300 Subject: [PATCH 514/684] comms: rename guests via MAS admin sessions --- services/communication/guest-name-job.yaml | 105 ++++++++++++++------- 1 file changed, 72 insertions(+), 33 deletions(-) diff --git a/services/communication/guest-name-job.yaml b/services/communication/guest-name-job.yaml index 5e9a885..3e101f8 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/communication/guest-name-job.yaml @@ -125,49 +125,88 @@ spec: r.raise_for_status() return r.json()["room_id"] - def list_guests(token): + def room_members(token, room_id): + headers = {"Authorization": f"Bearer {token}"} + r = requests.get(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members", headers=headers) + r.raise_for_status() + members = set() + existing_names = set() + for ev in r.json().get("chunk", []): + user_id = ev.get("state_key") + if user_id: + members.add(user_id) + disp = (ev.get("content") or {}).get("displayname") + if disp: + existing_names.add(disp) + return members, existing_names + + def mas_list_users(token): headers = {"Authorization": f"Bearer {token}"} users = [] - existing_names = set() - from_token = None + cursor = None while True: - url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" - if from_token: - url += f"&from={from_token}" - res = requests.get(url, headers=headers) - res.raise_for_status() - data = res.json() - for u in data.get("users", []): - disp = u.get("displayname", "") - if disp: - existing_names.add(disp) - if u.get("is_guest") and (not disp or disp.isdigit()): - users.append(u["name"]) - from_token = data.get("next_token") - if not from_token: + url = f"{MAS_ADMIN_API_BASE}/users?page[size]=100" + if cursor: + url += f"&page[after]={urllib.parse.quote(cursor)}" + r = requests.get(url, headers=headers, timeout=30) + r.raise_for_status() + data = r.json().get("data", []) + if not data: break - return users, existing_names + users.extend(data) + cursor = data[-1].get("meta", {}).get("page", {}).get("cursor") + if not cursor: + break + return users - def set_displayname(token, room_id, user_id, name): + def user_id_for_username(username): + return f"@{username}:live.bstein.dev" + + def get_displayname(token, user_id): + headers = {"Authorization": f"Bearer {token}"} + r = requests.get(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}", headers=headers) + r.raise_for_status() + return r.json().get("displayname") + + def set_displayname(token, room_id, user_id, name, in_room): headers = {"Authorization": f"Bearer {token}"} payload = {"displayname": name} - # Update global profile - r = requests.put(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname", headers=headers, json=payload) + r = requests.put( + f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname", + headers=headers, + json=payload, + ) r.raise_for_status() - # Update Othrys member event so clients see the change quickly + if not in_room: + return state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}" - r2 = requests.get(state_url, headers=headers) - content = r2.json() if r2.status_code == 200 else {"membership": "join"} - content["displayname"] = name - requests.put(state_url, headers=headers, json=content) + content = {"membership": "join", "displayname": name} + requests.put(state_url, headers=headers, json=content, timeout=30) admin_token = mas_admin_token() seeder_id = mas_user_id(admin_token, SEEDER_USER) - token, session_id = mas_personal_session(admin_token, seeder_id) + seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id) try: - room_id = resolve_alias(token, ROOM_ALIAS) - guests, existing = list_guests(token) - for g in guests: + room_id = resolve_alias(seeder_token, ROOM_ALIAS) + members, existing = room_members(seeder_token, room_id) + finally: + mas_revoke_session(admin_token, seeder_session) + + users = mas_list_users(admin_token) + for user in users: + attrs = user.get("attributes") or {} + username = attrs.get("username") or "" + legacy_guest = attrs.get("legacy_guest") + if not username: + continue + if not (legacy_guest or username.isdigit() or username.startswith("guest-")): + continue + user_id = user_id_for_username(username) + access_token, session_id = mas_personal_session(admin_token, user["id"]) + try: + display = get_displayname(access_token, user_id) + if display and (not display.isdigit()) and (not display.startswith("guest-")): + continue new = None for _ in range(30): candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" @@ -177,7 +216,7 @@ spec: break if not new: continue - set_displayname(token, room_id, g, new) - finally: - mas_revoke_session(admin_token, session_id) + set_displayname(access_token, room_id, user_id, new, user_id in members) + finally: + mas_revoke_session(admin_token, session_id) PY -- 2.47.2 From 660b49bc5d680d60122de651fa2075742fced74e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 01:55:58 -0300 Subject: [PATCH 515/684] comms: consolidate stack manifests --- .../atlas/applications/kustomization.yaml | 2 +- .../applications/comms/kustomization.yaml | 15 - .../communication/kustomization.yaml | 2 +- .../applications/kustomization.yaml | 1 - knowledge/catalog/atlas-summary.json | 6 +- knowledge/catalog/atlas.json | 191 +- knowledge/catalog/atlas.yaml | 145 +- knowledge/catalog/runbooks.json | 16 + knowledge/diagrams/atlas-http.mmd | 37 +- knowledge/runbooks/comms-verify.md | 30 + scripts/comms_sync_kb.sh | 5 + .../atlasbot-configmap.yaml | 2 +- .../atlasbot-deployment.yaml | 2 +- .../atlasbot-rbac.yaml | 2 +- .../bstein-force-leave-job.yaml | 2 +- services/{communication => comms}/coturn.yaml | 2 +- .../element-call-config.yaml | 3 +- .../element-call-deployment.yaml | 5 +- .../element-rendered.yaml | 0 .../guest-name-job.yaml | 2 +- .../guest-register-configmap.yaml | 2 +- .../guest-register-deployment.yaml | 2 +- .../guest-register-service.yaml | 2 +- .../knowledge/catalog/atlas-summary.json | 8 + services/comms/knowledge/catalog/atlas.json | 2771 +++++++++++++++++ services/comms/knowledge/catalog/atlas.yaml | 1799 +++++++++++ .../comms/knowledge/catalog/runbooks.json | 73 + .../comms/knowledge/diagrams/atlas-http.mmd | 189 ++ services/comms/kustomization.yaml | 41 + .../livekit-config.yaml | 2 +- .../livekit-ingress.yaml | 3 +- .../livekit-middlewares.yaml | 5 +- .../livekit-token-deployment.yaml | 2 +- .../{communication => comms}/livekit.yaml | 2 +- .../mas-admin-client-secret-ensure-job.yaml | 2 +- .../mas-configmap.yaml | 3 +- .../mas-deployment.yaml | 4 +- .../matrix-ingress.yaml} | 72 +- .../pin-othrys-job.yaml | 2 +- .../reset-othrys-room-job.yaml | 2 +- .../seed-othrys-room.yaml | 2 +- .../synapse-deployment-strategy-patch.yaml | 2 +- .../synapse-rendered.yaml | 100 - .../synapse-seeder-admin-ensure-job.yaml | 2 +- .../synapse-signingkey-ensure-job.yaml | 2 +- .../values-element.yaml | 2 +- .../values-synapse.yaml | 2 +- .../{communication => comms}/wellknown.yaml | 41 +- .../communication/guest-register-ingress.yaml | 34 - ...est-register-shared-secret-ensure-job.yaml | 86 - services/communication/kustomization.yaml | 49 - ...se-guest-appservice-secret-ensure-job.yaml | 111 - ...napse-guest-register-module-configmap.yaml | 89 - 53 files changed, 5332 insertions(+), 646 deletions(-) delete mode 100644 clusters/atlas/flux-system/applications/comms/kustomization.yaml create mode 100644 knowledge/runbooks/comms-verify.md create mode 100755 scripts/comms_sync_kb.sh rename services/{communication => comms}/atlasbot-configmap.yaml (99%) rename services/{communication => comms}/atlasbot-deployment.yaml (98%) rename services/{communication => comms}/atlasbot-rbac.yaml (96%) rename services/{communication => comms}/bstein-force-leave-job.yaml (99%) rename services/{communication => comms}/coturn.yaml (99%) rename services/{communication => comms}/element-call-config.yaml (87%) rename services/{communication => comms}/element-call-deployment.yaml (92%) rename services/{communication => comms}/element-rendered.yaml (100%) rename services/{communication => comms}/guest-name-job.yaml (99%) rename services/{communication => comms}/guest-register-configmap.yaml (99%) rename services/{communication => comms}/guest-register-deployment.yaml (98%) rename services/{communication => comms}/guest-register-service.yaml (84%) create mode 100644 services/comms/knowledge/catalog/atlas-summary.json create mode 100644 services/comms/knowledge/catalog/atlas.json create mode 100644 services/comms/knowledge/catalog/atlas.yaml create mode 100644 services/comms/knowledge/catalog/runbooks.json create mode 100644 services/comms/knowledge/diagrams/atlas-http.mmd rename services/{communication => comms}/livekit-config.yaml (93%) rename services/{communication => comms}/livekit-ingress.yaml (90%) rename services/{communication => comms}/livekit-middlewares.yaml (88%) rename services/{communication => comms}/livekit-token-deployment.yaml (96%) rename services/{communication => comms}/livekit.yaml (99%) rename services/{communication => comms}/mas-admin-client-secret-ensure-job.yaml (97%) rename services/{communication => comms}/mas-configmap.yaml (97%) rename services/{communication => comms}/mas-deployment.yaml (97%) rename services/{communication/mas-ingress.yaml => comms/matrix-ingress.yaml} (50%) rename services/{communication => comms}/pin-othrys-job.yaml (99%) rename services/{communication => comms}/reset-othrys-room-job.yaml (99%) rename services/{communication => comms}/seed-othrys-room.yaml (99%) rename services/{communication => comms}/synapse-deployment-strategy-patch.yaml (74%) rename services/{communication => comms}/synapse-rendered.yaml (91%) rename services/{communication => comms}/synapse-seeder-admin-ensure-job.yaml (93%) rename services/{communication => comms}/synapse-signingkey-ensure-job.yaml (95%) rename services/{communication => comms}/values-element.yaml (96%) rename services/{communication => comms}/values-synapse.yaml (98%) rename services/{communication => comms}/wellknown.yaml (82%) delete mode 100644 services/communication/guest-register-ingress.yaml delete mode 100644 services/communication/guest-register-shared-secret-ensure-job.yaml delete mode 100644 services/communication/kustomization.yaml delete mode 100644 services/communication/synapse-guest-appservice-secret-ensure-job.yaml delete mode 100644 services/communication/synapse-guest-register-module-configmap.yaml diff --git a/clusters/atlas/applications/kustomization.yaml b/clusters/atlas/applications/kustomization.yaml index c25257b..f5c64e8 100644 --- a/clusters/atlas/applications/kustomization.yaml +++ b/clusters/atlas/applications/kustomization.yaml @@ -5,7 +5,7 @@ resources: - ../../services/crypto - ../../services/gitea - ../../services/jellyfin - - ../../services/communication + - ../../services/comms - ../../services/monitoring - ../../services/pegasus - ../../services/vault diff --git a/clusters/atlas/flux-system/applications/comms/kustomization.yaml b/clusters/atlas/flux-system/applications/comms/kustomization.yaml deleted file mode 100644 index 42dc736..0000000 --- a/clusters/atlas/flux-system/applications/comms/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# clusters/atlas/flux-system/applications/comms/kustomization.yaml -apiVersion: kustomize.toolkit.fluxcd.io/v1 -kind: Kustomization -metadata: - name: comms - namespace: flux-system -spec: - interval: 10m - prune: true - sourceRef: - kind: GitRepository - name: flux-system - path: ./services/comms - targetNamespace: comms - timeout: 2m diff --git a/clusters/atlas/flux-system/applications/communication/kustomization.yaml b/clusters/atlas/flux-system/applications/communication/kustomization.yaml index f9f3531..ab2e7d8 100644 --- a/clusters/atlas/flux-system/applications/communication/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/communication/kustomization.yaml @@ -10,7 +10,7 @@ spec: sourceRef: kind: GitRepository name: flux-system - path: ./services/communication + path: ./services/comms targetNamespace: comms timeout: 2m dependsOn: diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 9fefc9c..d8e27af 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -5,7 +5,6 @@ resources: - gitea/kustomization.yaml - vault/kustomization.yaml - vaultwarden/kustomization.yaml - - comms/kustomization.yaml - communication/kustomization.yaml - crypto/kustomization.yaml - monerod/kustomization.yaml diff --git a/knowledge/catalog/atlas-summary.json b/knowledge/catalog/atlas-summary.json index 16e3019..2139e29 100644 --- a/knowledge/catalog/atlas-summary.json +++ b/knowledge/catalog/atlas-summary.json @@ -1,8 +1,8 @@ { "counts": { "helmrelease_host_hints": 7, - "http_endpoints": 32, - "services": 42, - "workloads": 47 + "http_endpoints": 35, + "services": 44, + "workloads": 49 } } diff --git a/knowledge/catalog/atlas.json b/knowledge/catalog/atlas.json index 359af22..92f08f4 100644 --- a/knowledge/catalog/atlas.json +++ b/knowledge/catalog/atlas.json @@ -16,14 +16,9 @@ "path": "services/ci-demo", "targetNamespace": null }, - { - "name": "comms", - "path": "services/comms", - "targetNamespace": "comms" - }, { "name": "communication", - "path": "services/communication", + "path": "services/comms", "targetNamespace": "comms" }, { @@ -324,6 +319,19 @@ "ghcr.io/element-hq/matrix-authentication-service:1.8.0" ] }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "matrix-guest-register", + "labels": { + "app.kubernetes.io/name": "matrix-guest-register" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "python:3.11-slim" + ] + }, { "kind": "Deployment", "namespace": "comms", @@ -777,6 +785,21 @@ "python:3.12-alpine" ] }, + { + "kind": "Deployment", + "namespace": "nextcloud", + "name": "collabora", + "labels": { + "app": "collabora" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "collabora/code:latest" + ] + }, { "kind": "Deployment", "namespace": "nextcloud", @@ -1399,6 +1422,22 @@ } ] }, + { + "namespace": "comms", + "name": "matrix-guest-register", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/name": "matrix-guest-register" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, { "namespace": "comms", "name": "matrix-wellknown", @@ -1834,6 +1873,22 @@ } ] }, + { + "namespace": "nextcloud", + "name": "collabora", + "type": "ClusterIP", + "selector": { + "app": "collabora" + }, + "ports": [ + { + "name": "http", + "port": 9980, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, { "namespace": "nextcloud", "name": "nextcloud", @@ -2040,21 +2095,41 @@ }, { "host": "bstein.dev", - "path": "/.well-known/matrix", + "path": "/.well-known/matrix/client", "backend": { "namespace": "comms", - "service": "othrys-synapse-matrix-synapse", - "port": 8008, + "service": "matrix-wellknown", + "port": 80, "workloads": [ { "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" + "name": "matrix-wellknown" } ] }, "via": { "kind": "Ingress", - "name": "othrys-synapse-matrix-synapse", + "name": "matrix-wellknown-bstein-dev", + "source": "communication" + } + }, + { + "host": "bstein.dev", + "path": "/.well-known/matrix/server", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-bstein-dev", "source": "communication" } }, @@ -2218,26 +2293,6 @@ "source": "communication" } }, - { - "host": "live.bstein.dev", - "path": "/.well-known/matrix", - "backend": { - "namespace": "comms", - "service": "othrys-synapse-matrix-synapse", - "port": 8008, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" - } - ] - }, - "via": { - "kind": "Ingress", - "name": "othrys-synapse-matrix-synapse", - "source": "communication" - } - }, { "host": "live.bstein.dev", "path": "/.well-known/matrix/client", @@ -2294,7 +2349,7 @@ }, "via": { "kind": "Ingress", - "name": "othrys-synapse-matrix-synapse", + "name": "matrix-routing", "source": "communication" } }, @@ -2349,7 +2404,7 @@ }, "via": { "kind": "Ingress", - "name": "matrix-authentication-service", + "name": "matrix-routing", "source": "communication" } }, @@ -2409,7 +2464,27 @@ }, "via": { "kind": "Ingress", - "name": "othrys-synapse-matrix-synapse", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/r0/register", + "backend": { + "namespace": "comms", + "service": "matrix-guest-register", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-guest-register" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", "source": "communication" } }, @@ -2429,7 +2504,7 @@ }, "via": { "kind": "Ingress", - "name": "matrix-authentication-service-compat", + "name": "matrix-routing", "source": "communication" } }, @@ -2449,7 +2524,7 @@ }, "via": { "kind": "Ingress", - "name": "matrix-authentication-service-compat", + "name": "matrix-routing", "source": "communication" } }, @@ -2469,7 +2544,27 @@ }, "via": { "kind": "Ingress", - "name": "matrix-authentication-service-compat", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/register", + "backend": { + "namespace": "comms", + "service": "matrix-guest-register", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-guest-register" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", "source": "communication" } }, @@ -2489,7 +2584,7 @@ }, "via": { "kind": "Ingress", - "name": "othrys-synapse-matrix-synapse", + "name": "matrix-routing", "source": "communication" } }, @@ -2513,6 +2608,26 @@ "source": "monerod" } }, + { + "host": "office.bstein.dev", + "path": "/", + "backend": { + "namespace": "nextcloud", + "service": "collabora", + "port": 9980, + "workloads": [ + { + "kind": "Deployment", + "name": "collabora" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "collabora", + "source": "nextcloud" + } + }, { "host": "pegasus.bstein.dev", "path": "/", diff --git a/knowledge/catalog/atlas.yaml b/knowledge/catalog/atlas.yaml index 4b2e8bd..06e2469 100644 --- a/knowledge/catalog/atlas.yaml +++ b/knowledge/catalog/atlas.yaml @@ -10,11 +10,8 @@ sources: - name: ci-demo path: services/ci-demo targetNamespace: null -- name: comms - path: services/comms - targetNamespace: comms - name: communication - path: services/communication + path: services/comms targetNamespace: comms - name: core path: infrastructure/core @@ -207,6 +204,15 @@ workloads: hardware: rpi5 images: - ghcr.io/element-hq/matrix-authentication-service:1.8.0 +- kind: Deployment + namespace: comms + name: matrix-guest-register + labels: + app.kubernetes.io/name: matrix-guest-register + serviceAccountName: null + nodeSelector: {} + images: + - python:3.11-slim - kind: Deployment namespace: comms name: matrix-wellknown @@ -526,6 +532,16 @@ workloads: nodeSelector: {} images: - python:3.12-alpine +- kind: Deployment + namespace: nextcloud + name: collabora + labels: + app: collabora + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - collabora/code:latest - kind: Deployment namespace: nextcloud name: nextcloud @@ -935,6 +951,16 @@ services: port: 8081 targetPort: internal protocol: TCP +- namespace: comms + name: matrix-guest-register + type: ClusterIP + selector: + app.kubernetes.io/name: matrix-guest-register + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP - namespace: comms name: matrix-wellknown type: ClusterIP @@ -1214,6 +1240,16 @@ services: port: 8000 targetPort: http protocol: TCP +- namespace: nextcloud + name: collabora + type: ClusterIP + selector: + app: collabora + ports: + - name: http + port: 9980 + targetPort: http + protocol: TCP - namespace: nextcloud name: nextcloud type: ClusterIP @@ -1344,17 +1380,28 @@ http_endpoints: name: bstein-dev-home source: bstein-dev-home - host: bstein.dev - path: /.well-known/matrix + path: /.well-known/matrix/client backend: namespace: comms - service: othrys-synapse-matrix-synapse - port: 8008 + service: matrix-wellknown + port: 80 workloads: &id001 - kind: Deployment - name: othrys-synapse-matrix-synapse + name: matrix-wellknown via: kind: Ingress - name: othrys-synapse-matrix-synapse + name: matrix-wellknown-bstein-dev + source: communication +- host: bstein.dev + path: /.well-known/matrix/server + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id001 + via: + kind: Ingress + name: matrix-wellknown-bstein-dev source: communication - host: bstein.dev path: /api @@ -1460,26 +1507,13 @@ http_endpoints: kind: Ingress name: othrys-element-element-web source: communication -- host: live.bstein.dev - path: /.well-known/matrix - backend: - namespace: comms - service: othrys-synapse-matrix-synapse - port: 8008 - workloads: *id001 - via: - kind: Ingress - name: othrys-synapse-matrix-synapse - source: communication - host: live.bstein.dev path: /.well-known/matrix/client backend: namespace: comms service: matrix-wellknown port: 80 - workloads: &id002 - - kind: Deployment - name: matrix-wellknown + workloads: *id001 via: kind: Ingress name: matrix-wellknown @@ -1490,7 +1524,7 @@ http_endpoints: namespace: comms service: matrix-wellknown port: 80 - workloads: *id002 + workloads: *id001 via: kind: Ingress name: matrix-wellknown @@ -1501,10 +1535,12 @@ http_endpoints: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id001 + workloads: &id002 + - kind: Deployment + name: othrys-synapse-matrix-synapse via: kind: Ingress - name: othrys-synapse-matrix-synapse + name: matrix-routing source: communication - host: longhorn.bstein.dev path: / @@ -1541,7 +1577,7 @@ http_endpoints: name: matrix-authentication-service via: kind: Ingress - name: matrix-authentication-service + name: matrix-routing source: communication - host: matrix.live.bstein.dev path: /.well-known/matrix/client @@ -1549,7 +1585,7 @@ http_endpoints: namespace: comms service: matrix-wellknown port: 80 - workloads: *id002 + workloads: *id001 via: kind: Ingress name: matrix-wellknown-matrix-live @@ -1560,7 +1596,7 @@ http_endpoints: namespace: comms service: matrix-wellknown port: 80 - workloads: *id002 + workloads: *id001 via: kind: Ingress name: matrix-wellknown-matrix-live @@ -1571,10 +1607,23 @@ http_endpoints: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id001 + workloads: *id002 via: kind: Ingress - name: othrys-synapse-matrix-synapse + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/r0/register + backend: + namespace: comms + service: matrix-guest-register + port: 8080 + workloads: &id004 + - kind: Deployment + name: matrix-guest-register + via: + kind: Ingress + name: matrix-routing source: communication - host: matrix.live.bstein.dev path: /_matrix/client/v3/login @@ -1585,7 +1634,7 @@ http_endpoints: workloads: *id003 via: kind: Ingress - name: matrix-authentication-service-compat + name: matrix-routing source: communication - host: matrix.live.bstein.dev path: /_matrix/client/v3/logout @@ -1596,7 +1645,7 @@ http_endpoints: workloads: *id003 via: kind: Ingress - name: matrix-authentication-service-compat + name: matrix-routing source: communication - host: matrix.live.bstein.dev path: /_matrix/client/v3/refresh @@ -1607,7 +1656,18 @@ http_endpoints: workloads: *id003 via: kind: Ingress - name: matrix-authentication-service-compat + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/register + backend: + namespace: comms + service: matrix-guest-register + port: 8080 + workloads: *id004 + via: + kind: Ingress + name: matrix-routing source: communication - host: matrix.live.bstein.dev path: /_synapse @@ -1615,10 +1675,10 @@ http_endpoints: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id001 + workloads: *id002 via: kind: Ingress - name: othrys-synapse-matrix-synapse + name: matrix-routing source: communication - host: monero.bstein.dev path: / @@ -1633,6 +1693,19 @@ http_endpoints: kind: Ingress name: monerod source: monerod +- host: office.bstein.dev + path: / + backend: + namespace: nextcloud + service: collabora + port: 9980 + workloads: + - kind: Deployment + name: collabora + via: + kind: Ingress + name: collabora + source: nextcloud - host: pegasus.bstein.dev path: / backend: diff --git a/knowledge/catalog/runbooks.json b/knowledge/catalog/runbooks.json index d7356ca..0718562 100644 --- a/knowledge/catalog/runbooks.json +++ b/knowledge/catalog/runbooks.json @@ -20,6 +20,22 @@ ], "body": "# CI: Gitea \u2192 Jenkins pipeline\n\n## What this is\nAtlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).\n\n## Where it is configured\n- Gitea manifests: `services/gitea/`\n- Jenkins manifests: `services/jenkins/`\n- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`\n\n## What users do (typical flow)\n- Create a repo in Gitea.\n- Create/update a Jenkins job/pipeline that can fetch the repo.\n- Configure a webhook (or SCM polling) so pushes trigger builds.\n\n## Troubleshooting (common)\n- \u201cWebhook not firing\u201d: confirm ingress host, webhook URL, and Jenkins job is reachable.\n- \u201cAuth denied cloning\u201d: confirm Keycloak group membership and that Jenkins has a valid token/credential configured." }, + { + "path": "runbooks/comms-verify.md", + "title": "Othrys verification checklist", + "tags": [ + "comms", + "matrix", + "element", + "livekit" + ], + "entrypoints": [ + "https://live.bstein.dev", + "https://matrix.live.bstein.dev" + ], + "source_paths": [], + "body": "1) Guest join:\n- Open a private window and visit:\n `https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join`\n- Confirm the guest join flow works and the displayname becomes `-`.\n\n2) Keycloak login:\n- Log in from `https://live.bstein.dev` and confirm MAS -> Keycloak -> Element redirect.\n\n3) Video rooms:\n- Start an Element Call room and confirm audio/video with a second account.\n- Check that guests can read public rooms but cannot start calls.\n\n4) Well-known:\n- `https://live.bstein.dev/.well-known/matrix/client` returns JSON.\n- `https://matrix.live.bstein.dev/.well-known/matrix/client` returns JSON.\n\n5) TURN reachability:\n- Confirm `turn.live.bstein.dev:3478` and `turns:5349` are reachable from WAN." + }, { "path": "runbooks/kb-authoring.md", "title": "KB authoring: what to write (and what not to)", diff --git a/knowledge/diagrams/atlas-http.mmd b/knowledge/diagrams/atlas-http.mmd index a6fc2b5..ddd33d8 100644 --- a/knowledge/diagrams/atlas-http.mmd +++ b/knowledge/diagrams/atlas-http.mmd @@ -9,10 +9,10 @@ flowchart LR host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"] svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend - svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] - host_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse - wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] - svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse + svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"] + host_bstein_dev --> svc_comms_matrix_wellknown + wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"] + svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"] host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"] @@ -51,11 +51,11 @@ flowchart LR host_live_bstein_dev --> svc_comms_othrys_element_element_web wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"] svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web - host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse - svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"] host_live_bstein_dev --> svc_comms_matrix_wellknown - wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"] - svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown + svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] + host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] + svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse host_longhorn_bstein_dev["longhorn.bstein.dev"] svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"] host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn @@ -71,11 +71,20 @@ flowchart LR svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"] + host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register + wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"] + svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register host_monero_bstein_dev["monero.bstein.dev"] svc_crypto_monerod["crypto/monerod (Service)"] host_monero_bstein_dev --> svc_crypto_monerod wl_crypto_monerod["crypto/monerod (Deployment)"] svc_crypto_monerod --> wl_crypto_monerod + host_office_bstein_dev["office.bstein.dev"] + svc_nextcloud_collabora["nextcloud/collabora (Service)"] + host_office_bstein_dev --> svc_nextcloud_collabora + wl_nextcloud_collabora["nextcloud/collabora (Deployment)"] + svc_nextcloud_collabora --> wl_nextcloud_collabora host_pegasus_bstein_dev["pegasus.bstein.dev"] svc_jellyfin_pegasus["jellyfin/pegasus (Service)"] host_pegasus_bstein_dev --> svc_jellyfin_pegasus @@ -116,8 +125,8 @@ flowchart LR wl_bstein_dev_home_chat_ai_gateway end subgraph comms[comms] - svc_comms_othrys_synapse_matrix_synapse - wl_comms_othrys_synapse_matrix_synapse + svc_comms_matrix_wellknown + wl_comms_matrix_wellknown svc_comms_element_call wl_comms_element_call svc_comms_livekit_token_service @@ -126,10 +135,12 @@ flowchart LR wl_comms_livekit svc_comms_othrys_element_element_web wl_comms_othrys_element_element_web - svc_comms_matrix_wellknown - wl_comms_matrix_wellknown + svc_comms_othrys_synapse_matrix_synapse + wl_comms_othrys_synapse_matrix_synapse svc_comms_matrix_authentication_service wl_comms_matrix_authentication_service + svc_comms_matrix_guest_register + wl_comms_matrix_guest_register end subgraph crypto[crypto] svc_crypto_monerod @@ -159,6 +170,8 @@ flowchart LR subgraph nextcloud[nextcloud] svc_nextcloud_nextcloud wl_nextcloud_nextcloud + svc_nextcloud_collabora + wl_nextcloud_collabora end subgraph sso[sso] svc_sso_oauth2_proxy diff --git a/knowledge/runbooks/comms-verify.md b/knowledge/runbooks/comms-verify.md new file mode 100644 index 0000000..8c09d0a --- /dev/null +++ b/knowledge/runbooks/comms-verify.md @@ -0,0 +1,30 @@ +--- +title: Othrys verification checklist +tags: + - comms + - matrix + - element + - livekit +entrypoints: + - https://live.bstein.dev + - https://matrix.live.bstein.dev +--- + +1) Guest join: +- Open a private window and visit: + `https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join` +- Confirm the guest join flow works and the displayname becomes `-`. + +2) Keycloak login: +- Log in from `https://live.bstein.dev` and confirm MAS -> Keycloak -> Element redirect. + +3) Video rooms: +- Start an Element Call room and confirm audio/video with a second account. +- Check that guests can read public rooms but cannot start calls. + +4) Well-known: +- `https://live.bstein.dev/.well-known/matrix/client` returns JSON. +- `https://matrix.live.bstein.dev/.well-known/matrix/client` returns JSON. + +5) TURN reachability: +- Confirm `turn.live.bstein.dev:3478` and `turns:5349` are reachable from WAN. diff --git a/scripts/comms_sync_kb.sh b/scripts/comms_sync_kb.sh new file mode 100755 index 0000000..16f9332 --- /dev/null +++ b/scripts/comms_sync_kb.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail + +python scripts/knowledge_render_atlas.py --write +python scripts/knowledge_render_atlas.py --write --out services/comms/knowledge diff --git a/services/communication/atlasbot-configmap.yaml b/services/comms/atlasbot-configmap.yaml similarity index 99% rename from services/communication/atlasbot-configmap.yaml rename to services/comms/atlasbot-configmap.yaml index 672c4f4..d8e74e8 100644 --- a/services/communication/atlasbot-configmap.yaml +++ b/services/comms/atlasbot-configmap.yaml @@ -1,4 +1,4 @@ -# services/communication/atlasbot-configmap.yaml +# services/comms/atlasbot-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: diff --git a/services/communication/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml similarity index 98% rename from services/communication/atlasbot-deployment.yaml rename to services/comms/atlasbot-deployment.yaml index 528d4b2..86e5c28 100644 --- a/services/communication/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -1,4 +1,4 @@ -# services/communication/atlasbot-deployment.yaml +# services/comms/atlasbot-deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: diff --git a/services/communication/atlasbot-rbac.yaml b/services/comms/atlasbot-rbac.yaml similarity index 96% rename from services/communication/atlasbot-rbac.yaml rename to services/comms/atlasbot-rbac.yaml index 59685d0..bc6623b 100644 --- a/services/communication/atlasbot-rbac.yaml +++ b/services/comms/atlasbot-rbac.yaml @@ -1,4 +1,4 @@ -# services/communication/atlasbot-rbac.yaml +# services/comms/atlasbot-rbac.yaml apiVersion: v1 kind: ServiceAccount metadata: diff --git a/services/communication/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml similarity index 99% rename from services/communication/bstein-force-leave-job.yaml rename to services/comms/bstein-force-leave-job.yaml index 5763290..4690aa6 100644 --- a/services/communication/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -1,4 +1,4 @@ -# services/communication/bstein-force-leave-job.yaml +# services/comms/bstein-force-leave-job.yaml apiVersion: batch/v1 kind: Job metadata: diff --git a/services/communication/coturn.yaml b/services/comms/coturn.yaml similarity index 99% rename from services/communication/coturn.yaml rename to services/comms/coturn.yaml index 9051082..12fa78a 100644 --- a/services/communication/coturn.yaml +++ b/services/comms/coturn.yaml @@ -1,4 +1,4 @@ -# services/communication/coturn.yaml +# services/comms/coturn.yaml apiVersion: apps/v1 kind: Deployment metadata: diff --git a/services/communication/element-call-config.yaml b/services/comms/element-call-config.yaml similarity index 87% rename from services/communication/element-call-config.yaml rename to services/comms/element-call-config.yaml index c86bbb6..85368f2 100644 --- a/services/communication/element-call-config.yaml +++ b/services/comms/element-call-config.yaml @@ -1,9 +1,8 @@ -# services/communication/element-call-config.yaml +# services/comms/element-call-config.yaml apiVersion: v1 kind: ConfigMap metadata: name: element-call-config - namespace: communication data: config.json: | { diff --git a/services/communication/element-call-deployment.yaml b/services/comms/element-call-deployment.yaml similarity index 92% rename from services/communication/element-call-deployment.yaml rename to services/comms/element-call-deployment.yaml index f5752ac..7f3581d 100644 --- a/services/communication/element-call-deployment.yaml +++ b/services/comms/element-call-deployment.yaml @@ -1,9 +1,8 @@ -# services/communication/element-call-deployment.yaml +# services/comms/element-call-deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: element-call - namespace: communication labels: app: element-call spec: @@ -41,7 +40,6 @@ apiVersion: v1 kind: Service metadata: name: element-call - namespace: communication spec: selector: app: element-call @@ -54,7 +52,6 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: element-call - namespace: communication annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure diff --git a/services/communication/element-rendered.yaml b/services/comms/element-rendered.yaml similarity index 100% rename from services/communication/element-rendered.yaml rename to services/comms/element-rendered.yaml diff --git a/services/communication/guest-name-job.yaml b/services/comms/guest-name-job.yaml similarity index 99% rename from services/communication/guest-name-job.yaml rename to services/comms/guest-name-job.yaml index 3e101f8..5e5a2e9 100644 --- a/services/communication/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -1,4 +1,4 @@ -# services/communication/guest-name-job.yaml +# services/comms/guest-name-job.yaml apiVersion: batch/v1 kind: CronJob metadata: diff --git a/services/communication/guest-register-configmap.yaml b/services/comms/guest-register-configmap.yaml similarity index 99% rename from services/communication/guest-register-configmap.yaml rename to services/comms/guest-register-configmap.yaml index 804c7d7..ded54ec 100644 --- a/services/communication/guest-register-configmap.yaml +++ b/services/comms/guest-register-configmap.yaml @@ -1,4 +1,4 @@ -# services/communication/guest-register-configmap.yaml +# services/comms/guest-register-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: diff --git a/services/communication/guest-register-deployment.yaml b/services/comms/guest-register-deployment.yaml similarity index 98% rename from services/communication/guest-register-deployment.yaml rename to services/comms/guest-register-deployment.yaml index 00e430c..a9dd675 100644 --- a/services/communication/guest-register-deployment.yaml +++ b/services/comms/guest-register-deployment.yaml @@ -1,4 +1,4 @@ -# services/communication/guest-register-deployment.yaml +# services/comms/guest-register-deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: diff --git a/services/communication/guest-register-service.yaml b/services/comms/guest-register-service.yaml similarity index 84% rename from services/communication/guest-register-service.yaml rename to services/comms/guest-register-service.yaml index 776e3ab..5bb740a 100644 --- a/services/communication/guest-register-service.yaml +++ b/services/comms/guest-register-service.yaml @@ -1,4 +1,4 @@ -# services/communication/guest-register-service.yaml +# services/comms/guest-register-service.yaml apiVersion: v1 kind: Service metadata: diff --git a/services/comms/knowledge/catalog/atlas-summary.json b/services/comms/knowledge/catalog/atlas-summary.json new file mode 100644 index 0000000..2139e29 --- /dev/null +++ b/services/comms/knowledge/catalog/atlas-summary.json @@ -0,0 +1,8 @@ +{ + "counts": { + "helmrelease_host_hints": 7, + "http_endpoints": 35, + "services": 44, + "workloads": 49 + } +} diff --git a/services/comms/knowledge/catalog/atlas.json b/services/comms/knowledge/catalog/atlas.json new file mode 100644 index 0000000..92f08f4 --- /dev/null +++ b/services/comms/knowledge/catalog/atlas.json @@ -0,0 +1,2771 @@ +{ + "cluster": "atlas", + "sources": [ + { + "name": "ai-llm", + "path": "services/ai-llm", + "targetNamespace": "ai" + }, + { + "name": "bstein-dev-home", + "path": "services/bstein-dev-home", + "targetNamespace": "bstein-dev-home" + }, + { + "name": "ci-demo", + "path": "services/ci-demo", + "targetNamespace": null + }, + { + "name": "communication", + "path": "services/comms", + "targetNamespace": "comms" + }, + { + "name": "core", + "path": "infrastructure/core", + "targetNamespace": null + }, + { + "name": "crypto", + "path": "services/crypto", + "targetNamespace": "crypto" + }, + { + "name": "flux-system", + "path": "clusters/atlas/flux-system", + "targetNamespace": null + }, + { + "name": "gitea", + "path": "services/gitea", + "targetNamespace": "gitea" + }, + { + "name": "gitops-ui", + "path": "services/gitops-ui", + "targetNamespace": "flux-system" + }, + { + "name": "harbor", + "path": "services/harbor", + "targetNamespace": "harbor" + }, + { + "name": "helm", + "path": "infrastructure/sources/helm", + "targetNamespace": "flux-system" + }, + { + "name": "jellyfin", + "path": "services/jellyfin", + "targetNamespace": "jellyfin" + }, + { + "name": "jenkins", + "path": "services/jenkins", + "targetNamespace": "jenkins" + }, + { + "name": "keycloak", + "path": "services/keycloak", + "targetNamespace": "sso" + }, + { + "name": "longhorn-ui", + "path": "infrastructure/longhorn/ui-ingress", + "targetNamespace": "longhorn-system" + }, + { + "name": "mailu", + "path": "services/mailu", + "targetNamespace": "mailu-mailserver" + }, + { + "name": "metallb", + "path": "infrastructure/metallb", + "targetNamespace": "metallb-system" + }, + { + "name": "monerod", + "path": "services/crypto/monerod", + "targetNamespace": "crypto" + }, + { + "name": "monitoring", + "path": "services/monitoring", + "targetNamespace": null + }, + { + "name": "nextcloud", + "path": "services/nextcloud", + "targetNamespace": "nextcloud" + }, + { + "name": "nextcloud-mail-sync", + "path": "services/nextcloud-mail-sync", + "targetNamespace": "nextcloud" + }, + { + "name": "oauth2-proxy", + "path": "services/oauth2-proxy", + "targetNamespace": "sso" + }, + { + "name": "openldap", + "path": "services/openldap", + "targetNamespace": "sso" + }, + { + "name": "pegasus", + "path": "services/pegasus", + "targetNamespace": "jellyfin" + }, + { + "name": "sui-metrics", + "path": "services/sui-metrics/overlays/atlas", + "targetNamespace": "sui-metrics" + }, + { + "name": "traefik", + "path": "infrastructure/traefik", + "targetNamespace": "traefik" + }, + { + "name": "vault", + "path": "services/vault", + "targetNamespace": "vault" + }, + { + "name": "vault-csi", + "path": "infrastructure/vault-csi", + "targetNamespace": "kube-system" + }, + { + "name": "vaultwarden", + "path": "services/vaultwarden", + "targetNamespace": "vaultwarden" + }, + { + "name": "xmr-miner", + "path": "services/crypto/xmr-miner", + "targetNamespace": "crypto" + } + ], + "workloads": [ + { + "kind": "Deployment", + "namespace": "ai", + "name": "ollama", + "labels": { + "app": "ollama" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "ollama/ollama:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-backend", + "labels": { + "app": "bstein-dev-home-backend" + }, + "serviceAccountName": "bstein-dev-home", + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84" + ] + }, + { + "kind": "Deployment", + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-frontend", + "labels": { + "app": "bstein-dev-home-frontend" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84" + ] + }, + { + "kind": "Deployment", + "namespace": "bstein-dev-home", + "name": "chat-ai-gateway", + "labels": { + "app": "chat-ai-gateway" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "python:3.11-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "ci-demo", + "name": "ci-demo", + "labels": { + "app.kubernetes.io/name": "ci-demo" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi4" + }, + "images": [ + "registry.bstein.dev/infra/ci-demo:v0.0.0-3" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "atlasbot", + "labels": { + "app": "atlasbot" + }, + "serviceAccountName": "atlasbot", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "python:3.11-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "coturn", + "labels": { + "app": "coturn" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/coturn/coturn:4.6.2" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "element-call", + "labels": { + "app": "element-call" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/element-call:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "livekit", + "labels": { + "app": "livekit" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "livekit/livekit-server:v1.9.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "livekit-token-service", + "labels": { + "app": "livekit-token-service" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/lk-jwt-service:0.3.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "matrix-authentication-service", + "labels": { + "app": "matrix-authentication-service" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/matrix-authentication-service:1.8.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "matrix-guest-register", + "labels": { + "app.kubernetes.io/name": "matrix-guest-register" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "python:3.11-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "matrix-wellknown", + "labels": { + "app": "matrix-wellknown" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "nginx:1.27-alpine" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "othrys-element-element-web", + "labels": { + "app.kubernetes.io/instance": "othrys-element", + "app.kubernetes.io/name": "element-web" + }, + "serviceAccountName": "othrys-element-element-web", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/element-web:v1.12.6" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "othrys-synapse-matrix-synapse", + "labels": { + "app.kubernetes.io/component": "synapse", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "matrix-synapse" + }, + "serviceAccountName": "default", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "ghcr.io/element-hq/synapse:v1.144.0" + ] + }, + { + "kind": "Deployment", + "namespace": "comms", + "name": "othrys-synapse-redis-master", + "labels": { + "app.kubernetes.io/component": "master", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/managed-by": "Helm", + "app.kubernetes.io/name": "redis", + "helm.sh/chart": "redis-17.17.1" + }, + "serviceAccountName": "othrys-synapse-redis", + "nodeSelector": {}, + "images": [ + "docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34" + ] + }, + { + "kind": "DaemonSet", + "namespace": "crypto", + "name": "monero-xmrig", + "labels": { + "app": "monero-xmrig" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "ghcr.io/tari-project/xmrig:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "crypto", + "name": "monero-p2pool", + "labels": { + "app": "monero-p2pool" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "debian:bookworm-slim" + ] + }, + { + "kind": "Deployment", + "namespace": "crypto", + "name": "monerod", + "labels": { + "app": "monerod" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "registry.bstein.dev/crypto/monerod:0.18.4.1" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "helm-controller", + "labels": { + "app": "helm-controller", + "app.kubernetes.io/component": "helm-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "helm-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/helm-controller:v1.4.5" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "image-automation-controller", + "labels": { + "app": "image-automation-controller", + "app.kubernetes.io/component": "image-automation-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "image-automation-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/image-automation-controller:v1.0.4" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "image-reflector-controller", + "labels": { + "app": "image-reflector-controller", + "app.kubernetes.io/component": "image-reflector-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "image-reflector-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/image-reflector-controller:v1.0.4" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "kustomize-controller", + "labels": { + "app": "kustomize-controller", + "app.kubernetes.io/component": "kustomize-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "kustomize-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/kustomize-controller:v1.7.3" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "notification-controller", + "labels": { + "app": "notification-controller", + "app.kubernetes.io/component": "notification-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "notification-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/notification-controller:v1.7.5" + ] + }, + { + "kind": "Deployment", + "namespace": "flux-system", + "name": "source-controller", + "labels": { + "app": "source-controller", + "app.kubernetes.io/component": "source-controller", + "app.kubernetes.io/instance": "flux-system", + "app.kubernetes.io/part-of": "flux", + "app.kubernetes.io/version": "v2.7.5" + }, + "serviceAccountName": "source-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "ghcr.io/fluxcd/source-controller:v1.7.4" + ] + }, + { + "kind": "Deployment", + "namespace": "gitea", + "name": "gitea", + "labels": { + "app": "gitea" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "gitea/gitea:1.23" + ] + }, + { + "kind": "Deployment", + "namespace": "jellyfin", + "name": "jellyfin", + "labels": { + "app": "jellyfin" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "docker.io/jellyfin/jellyfin:10.11.5" + ] + }, + { + "kind": "Deployment", + "namespace": "jellyfin", + "name": "pegasus", + "labels": { + "app": "pegasus" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "alpine:3.20", + "registry.bstein.dev/streaming/pegasus:1.2.32" + ] + }, + { + "kind": "Deployment", + "namespace": "jenkins", + "name": "jenkins", + "labels": { + "app": "jenkins" + }, + "serviceAccountName": "jenkins", + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "jenkins/jenkins:2.528.3-jdk21" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "nvidia-device-plugin-jetson", + "labels": { + "app.kubernetes.io/instance": "jetson", + "app.kubernetes.io/name": "nvidia-device-plugin" + }, + "serviceAccountName": null, + "nodeSelector": { + "jetson": "true", + "kubernetes.io/arch": "arm64" + }, + "images": [ + "nvcr.io/nvidia/k8s-device-plugin:v0.16.2" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "nvidia-device-plugin-minipc", + "labels": { + "app.kubernetes.io/instance": "titan22", + "app.kubernetes.io/name": "nvidia-device-plugin" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "amd64", + "kubernetes.io/hostname": "titan-22" + }, + "images": [ + "nvcr.io/nvidia/k8s-device-plugin:v0.16.2" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "nvidia-device-plugin-tethys", + "labels": { + "app.kubernetes.io/instance": "titan24", + "app.kubernetes.io/name": "nvidia-device-plugin" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "amd64", + "kubernetes.io/hostname": "titan-24" + }, + "images": [ + "nvcr.io/nvidia/k8s-device-plugin:v0.16.2" + ] + }, + { + "kind": "DaemonSet", + "namespace": "kube-system", + "name": "vault-csi-provider", + "labels": { + "app.kubernetes.io/name": "vault-csi-provider" + }, + "serviceAccountName": "vault-csi-provider", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "hashicorp/vault-csi-provider:1.7.0" + ] + }, + { + "kind": "Deployment", + "namespace": "longhorn-system", + "name": "oauth2-proxy-longhorn", + "labels": { + "app": "oauth2-proxy-longhorn" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "quay.io/oauth2-proxy/oauth2-proxy:v7.6.0" + ] + }, + { + "kind": "DaemonSet", + "namespace": "mailu-mailserver", + "name": "vip-controller", + "labels": { + "app": "vip-controller" + }, + "serviceAccountName": "vip-controller", + "nodeSelector": { + "mailu.bstein.dev/vip": "true" + }, + "images": [ + "lachlanevenson/k8s-kubectl:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "mailu-mailserver", + "name": "mailu-sync-listener", + "labels": { + "app": "mailu-sync-listener" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "python:3.11-alpine" + ] + }, + { + "kind": "DaemonSet", + "namespace": "metallb-system", + "name": "metallb-speaker", + "labels": { + "app.kubernetes.io/component": "speaker", + "app.kubernetes.io/instance": "metallb", + "app.kubernetes.io/name": "metallb" + }, + "serviceAccountName": "metallb-speaker", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "quay.io/frrouting/frr:10.4.1", + "quay.io/metallb/speaker:v0.15.3" + ] + }, + { + "kind": "Deployment", + "namespace": "metallb-system", + "name": "metallb-controller", + "labels": { + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": "metallb", + "app.kubernetes.io/name": "metallb" + }, + "serviceAccountName": "metallb-controller", + "nodeSelector": { + "kubernetes.io/os": "linux" + }, + "images": [ + "quay.io/metallb/controller:v0.15.3" + ] + }, + { + "kind": "DaemonSet", + "namespace": "monitoring", + "name": "dcgm-exporter", + "labels": { + "app": "dcgm-exporter" + }, + "serviceAccountName": "default", + "nodeSelector": {}, + "images": [ + "registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04" + ] + }, + { + "kind": "Deployment", + "namespace": "monitoring", + "name": "postmark-exporter", + "labels": { + "app": "postmark-exporter" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "python:3.12-alpine" + ] + }, + { + "kind": "Deployment", + "namespace": "nextcloud", + "name": "collabora", + "labels": { + "app": "collabora" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "collabora/code:latest" + ] + }, + { + "kind": "Deployment", + "namespace": "nextcloud", + "name": "nextcloud", + "labels": { + "app": "nextcloud" + }, + "serviceAccountName": null, + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "nextcloud:29-apache" + ] + }, + { + "kind": "Deployment", + "namespace": "sso", + "name": "keycloak", + "labels": { + "app": "keycloak" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "quay.io/keycloak/keycloak:26.0.7" + ] + }, + { + "kind": "Deployment", + "namespace": "sso", + "name": "oauth2-proxy", + "labels": { + "app": "oauth2-proxy" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "quay.io/oauth2-proxy/oauth2-proxy:v7.6.0" + ] + }, + { + "kind": "StatefulSet", + "namespace": "sso", + "name": "openldap", + "labels": { + "app": "openldap" + }, + "serviceAccountName": null, + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "docker.io/osixia/openldap:1.5.0" + ] + }, + { + "kind": "Deployment", + "namespace": "sui-metrics", + "name": "sui-metrics", + "labels": { + "app": "sui-metrics" + }, + "serviceAccountName": "sui-metrics", + "nodeSelector": { + "kubernetes.io/hostname": "titan-24" + }, + "images": [ + "victoriametrics/vmagent:v1.103.0" + ] + }, + { + "kind": "Deployment", + "namespace": "traefik", + "name": "traefik", + "labels": { + "app": "traefik" + }, + "serviceAccountName": "traefik-ingress-controller", + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "traefik:v3.3.3" + ] + }, + { + "kind": "StatefulSet", + "namespace": "vault", + "name": "vault", + "labels": { + "app": "vault" + }, + "serviceAccountName": "vault", + "nodeSelector": { + "kubernetes.io/arch": "arm64", + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "hashicorp/vault:1.17.6" + ] + }, + { + "kind": "Deployment", + "namespace": "vaultwarden", + "name": "vaultwarden", + "labels": { + "app": "vaultwarden" + }, + "serviceAccountName": null, + "nodeSelector": {}, + "images": [ + "vaultwarden/server:1.33.2" + ] + } + ], + "services": [ + { + "namespace": "ai", + "name": "ollama", + "type": "ClusterIP", + "selector": { + "app": "ollama" + }, + "ports": [ + { + "name": "http", + "port": 11434, + "targetPort": 11434, + "protocol": "TCP" + } + ] + }, + { + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-backend", + "type": "ClusterIP", + "selector": { + "app": "bstein-dev-home-backend" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "bstein-dev-home", + "name": "bstein-dev-home-frontend", + "type": "ClusterIP", + "selector": { + "app": "bstein-dev-home-frontend" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 80, + "protocol": "TCP" + } + ] + }, + { + "namespace": "bstein-dev-home", + "name": "chat-ai-gateway", + "type": "ClusterIP", + "selector": { + "app": "chat-ai-gateway" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "ci-demo", + "name": "ci-demo", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/name": "ci-demo" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "coturn", + "type": "LoadBalancer", + "selector": { + "app": "coturn" + }, + "ports": [ + { + "name": "turn-udp", + "port": 3478, + "targetPort": 3478, + "protocol": "UDP" + }, + { + "name": "turn-tcp", + "port": 3478, + "targetPort": 3478, + "protocol": "TCP" + }, + { + "name": "turn-tls", + "port": 5349, + "targetPort": 5349, + "protocol": "TCP" + }, + { + "name": "relay-50000", + "port": 50000, + "targetPort": 50000, + "protocol": "UDP" + }, + { + "name": "relay-50001", + "port": 50001, + "targetPort": 50001, + "protocol": "UDP" + }, + { + "name": "relay-50002", + "port": 50002, + "targetPort": 50002, + "protocol": "UDP" + }, + { + "name": "relay-50003", + "port": 50003, + "targetPort": 50003, + "protocol": "UDP" + }, + { + "name": "relay-50004", + "port": 50004, + "targetPort": 50004, + "protocol": "UDP" + }, + { + "name": "relay-50005", + "port": 50005, + "targetPort": 50005, + "protocol": "UDP" + }, + { + "name": "relay-50006", + "port": 50006, + "targetPort": 50006, + "protocol": "UDP" + }, + { + "name": "relay-50007", + "port": 50007, + "targetPort": 50007, + "protocol": "UDP" + }, + { + "name": "relay-50008", + "port": 50008, + "targetPort": 50008, + "protocol": "UDP" + }, + { + "name": "relay-50009", + "port": 50009, + "targetPort": 50009, + "protocol": "UDP" + }, + { + "name": "relay-50010", + "port": 50010, + "targetPort": 50010, + "protocol": "UDP" + }, + { + "name": "relay-50011", + "port": 50011, + "targetPort": 50011, + "protocol": "UDP" + }, + { + "name": "relay-50012", + "port": 50012, + "targetPort": 50012, + "protocol": "UDP" + }, + { + "name": "relay-50013", + "port": 50013, + "targetPort": 50013, + "protocol": "UDP" + }, + { + "name": "relay-50014", + "port": 50014, + "targetPort": 50014, + "protocol": "UDP" + }, + { + "name": "relay-50015", + "port": 50015, + "targetPort": 50015, + "protocol": "UDP" + }, + { + "name": "relay-50016", + "port": 50016, + "targetPort": 50016, + "protocol": "UDP" + }, + { + "name": "relay-50017", + "port": 50017, + "targetPort": 50017, + "protocol": "UDP" + }, + { + "name": "relay-50018", + "port": 50018, + "targetPort": 50018, + "protocol": "UDP" + }, + { + "name": "relay-50019", + "port": 50019, + "targetPort": 50019, + "protocol": "UDP" + }, + { + "name": "relay-50020", + "port": 50020, + "targetPort": 50020, + "protocol": "UDP" + }, + { + "name": "relay-50021", + "port": 50021, + "targetPort": 50021, + "protocol": "UDP" + }, + { + "name": "relay-50022", + "port": 50022, + "targetPort": 50022, + "protocol": "UDP" + }, + { + "name": "relay-50023", + "port": 50023, + "targetPort": 50023, + "protocol": "UDP" + }, + { + "name": "relay-50024", + "port": 50024, + "targetPort": 50024, + "protocol": "UDP" + }, + { + "name": "relay-50025", + "port": 50025, + "targetPort": 50025, + "protocol": "UDP" + }, + { + "name": "relay-50026", + "port": 50026, + "targetPort": 50026, + "protocol": "UDP" + }, + { + "name": "relay-50027", + "port": 50027, + "targetPort": 50027, + "protocol": "UDP" + }, + { + "name": "relay-50028", + "port": 50028, + "targetPort": 50028, + "protocol": "UDP" + }, + { + "name": "relay-50029", + "port": 50029, + "targetPort": 50029, + "protocol": "UDP" + }, + { + "name": "relay-50030", + "port": 50030, + "targetPort": 50030, + "protocol": "UDP" + }, + { + "name": "relay-50031", + "port": 50031, + "targetPort": 50031, + "protocol": "UDP" + }, + { + "name": "relay-50032", + "port": 50032, + "targetPort": 50032, + "protocol": "UDP" + }, + { + "name": "relay-50033", + "port": 50033, + "targetPort": 50033, + "protocol": "UDP" + }, + { + "name": "relay-50034", + "port": 50034, + "targetPort": 50034, + "protocol": "UDP" + }, + { + "name": "relay-50035", + "port": 50035, + "targetPort": 50035, + "protocol": "UDP" + }, + { + "name": "relay-50036", + "port": 50036, + "targetPort": 50036, + "protocol": "UDP" + }, + { + "name": "relay-50037", + "port": 50037, + "targetPort": 50037, + "protocol": "UDP" + }, + { + "name": "relay-50038", + "port": 50038, + "targetPort": 50038, + "protocol": "UDP" + }, + { + "name": "relay-50039", + "port": 50039, + "targetPort": 50039, + "protocol": "UDP" + }, + { + "name": "relay-50040", + "port": 50040, + "targetPort": 50040, + "protocol": "UDP" + }, + { + "name": "relay-50041", + "port": 50041, + "targetPort": 50041, + "protocol": "UDP" + }, + { + "name": "relay-50042", + "port": 50042, + "targetPort": 50042, + "protocol": "UDP" + }, + { + "name": "relay-50043", + "port": 50043, + "targetPort": 50043, + "protocol": "UDP" + }, + { + "name": "relay-50044", + "port": 50044, + "targetPort": 50044, + "protocol": "UDP" + }, + { + "name": "relay-50045", + "port": 50045, + "targetPort": 50045, + "protocol": "UDP" + }, + { + "name": "relay-50046", + "port": 50046, + "targetPort": 50046, + "protocol": "UDP" + }, + { + "name": "relay-50047", + "port": 50047, + "targetPort": 50047, + "protocol": "UDP" + }, + { + "name": "relay-50048", + "port": 50048, + "targetPort": 50048, + "protocol": "UDP" + }, + { + "name": "relay-50049", + "port": 50049, + "targetPort": 50049, + "protocol": "UDP" + }, + { + "name": "relay-50050", + "port": 50050, + "targetPort": 50050, + "protocol": "UDP" + } + ] + }, + { + "namespace": "comms", + "name": "element-call", + "type": "ClusterIP", + "selector": { + "app": "element-call" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "livekit", + "type": "LoadBalancer", + "selector": { + "app": "livekit" + }, + "ports": [ + { + "name": "http", + "port": 7880, + "targetPort": 7880, + "protocol": "TCP" + }, + { + "name": "rtc-tcp", + "port": 7881, + "targetPort": 7881, + "protocol": "TCP" + }, + { + "name": "rtc-udp-7882", + "port": 7882, + "targetPort": 7882, + "protocol": "UDP" + }, + { + "name": "rtc-udp-7883", + "port": 7883, + "targetPort": 7883, + "protocol": "UDP" + } + ] + }, + { + "namespace": "comms", + "name": "livekit-token-service", + "type": "ClusterIP", + "selector": { + "app": "livekit-token-service" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "matrix-authentication-service", + "type": "ClusterIP", + "selector": { + "app": "matrix-authentication-service" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": "http", + "protocol": "TCP" + }, + { + "name": "internal", + "port": 8081, + "targetPort": "internal", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "matrix-guest-register", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/name": "matrix-guest-register" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "matrix-wellknown", + "type": "ClusterIP", + "selector": { + "app": "matrix-wellknown" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 80, + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-element-element-web", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/instance": "othrys-element", + "app.kubernetes.io/name": "element-web" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-matrix-synapse", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "synapse", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "matrix-synapse" + }, + "ports": [ + { + "name": "http", + "port": 8008, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-redis-headless", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "redis" + }, + "ports": [ + { + "name": "tcp-redis", + "port": 6379, + "targetPort": "redis", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-redis-master", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "master", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "redis" + }, + "ports": [ + { + "name": "tcp-redis", + "port": 6379, + "targetPort": "redis", + "protocol": "TCP" + } + ] + }, + { + "namespace": "comms", + "name": "othrys-synapse-replication", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "synapse", + "app.kubernetes.io/instance": "othrys-synapse", + "app.kubernetes.io/name": "matrix-synapse" + }, + "ports": [ + { + "name": "replication", + "port": 9093, + "targetPort": "replication", + "protocol": "TCP" + } + ] + }, + { + "namespace": "crypto", + "name": "monerod", + "type": "ClusterIP", + "selector": { + "app": "monerod" + }, + "ports": [ + { + "name": "rpc", + "port": 18081, + "targetPort": 18081, + "protocol": "TCP" + }, + { + "name": "p2p", + "port": 18080, + "targetPort": 18080, + "protocol": "TCP" + }, + { + "name": "zmq", + "port": 18083, + "targetPort": 18083, + "protocol": "TCP" + } + ] + }, + { + "namespace": "crypto", + "name": "p2pool", + "type": "ClusterIP", + "selector": { + "app": "p2pool" + }, + "ports": [ + { + "name": "stratum", + "port": 3333, + "targetPort": 3333, + "protocol": "TCP" + } + ] + }, + { + "namespace": "flux-system", + "name": "notification-controller", + "type": "ClusterIP", + "selector": { + "app": "notification-controller" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "flux-system", + "name": "source-controller", + "type": "ClusterIP", + "selector": { + "app": "source-controller" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "flux-system", + "name": "webhook-receiver", + "type": "ClusterIP", + "selector": { + "app": "notification-controller" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http-webhook", + "protocol": "TCP" + } + ] + }, + { + "namespace": "gitea", + "name": "gitea", + "type": "ClusterIP", + "selector": { + "app": "gitea" + }, + "ports": [ + { + "name": "http", + "port": 3000, + "targetPort": 3000, + "protocol": "TCP" + } + ] + }, + { + "namespace": "gitea", + "name": "gitea-ssh", + "type": "NodePort", + "selector": { + "app": "gitea" + }, + "ports": [ + { + "name": "ssh", + "port": 2242, + "targetPort": 2242, + "protocol": "TCP" + } + ] + }, + { + "namespace": "jellyfin", + "name": "jellyfin", + "type": "ClusterIP", + "selector": { + "app": "jellyfin" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 8096, + "protocol": "TCP" + } + ] + }, + { + "namespace": "jellyfin", + "name": "pegasus", + "type": "ClusterIP", + "selector": { + "app": "pegasus" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "jenkins", + "name": "jenkins", + "type": "ClusterIP", + "selector": { + "app": "jenkins" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": 8080, + "protocol": "TCP" + }, + { + "name": "agent-listener", + "port": 50000, + "targetPort": 50000, + "protocol": "TCP" + } + ] + }, + { + "namespace": "kube-system", + "name": "traefik", + "type": "LoadBalancer", + "selector": { + "app.kubernetes.io/instance": "traefik-kube-system", + "app.kubernetes.io/name": "traefik" + }, + "ports": [ + { + "name": "web", + "port": 80, + "targetPort": "web", + "protocol": "TCP" + }, + { + "name": "websecure", + "port": 443, + "targetPort": "websecure", + "protocol": "TCP" + } + ] + }, + { + "namespace": "longhorn-system", + "name": "oauth2-proxy-longhorn", + "type": "ClusterIP", + "selector": { + "app": "oauth2-proxy-longhorn" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 4180, + "protocol": "TCP" + } + ] + }, + { + "namespace": "mailu-mailserver", + "name": "mailu-front-lb", + "type": "LoadBalancer", + "selector": { + "app.kubernetes.io/component": "front", + "app.kubernetes.io/instance": "mailu", + "app.kubernetes.io/name": "mailu" + }, + "ports": [ + { + "name": "smtp", + "port": 25, + "targetPort": 25, + "protocol": "TCP" + }, + { + "name": "smtps", + "port": 465, + "targetPort": 465, + "protocol": "TCP" + }, + { + "name": "submission", + "port": 587, + "targetPort": 587, + "protocol": "TCP" + }, + { + "name": "imaps", + "port": 993, + "targetPort": 993, + "protocol": "TCP" + }, + { + "name": "pop3s", + "port": 995, + "targetPort": 995, + "protocol": "TCP" + }, + { + "name": "sieve", + "port": 4190, + "targetPort": 4190, + "protocol": "TCP" + } + ] + }, + { + "namespace": "mailu-mailserver", + "name": "mailu-sync-listener", + "type": "ClusterIP", + "selector": { + "app": "mailu-sync-listener" + }, + "ports": [ + { + "name": "http", + "port": 8080, + "targetPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "namespace": "metallb-system", + "name": "metallb-webhook-service", + "type": "ClusterIP", + "selector": { + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": "metallb", + "app.kubernetes.io/name": "metallb" + }, + "ports": [ + { + "name": null, + "port": 443, + "targetPort": 9443, + "protocol": "TCP" + } + ] + }, + { + "namespace": "monitoring", + "name": "dcgm-exporter", + "type": "ClusterIP", + "selector": { + "app": "dcgm-exporter" + }, + "ports": [ + { + "name": "metrics", + "port": 9400, + "targetPort": "metrics", + "protocol": "TCP" + } + ] + }, + { + "namespace": "monitoring", + "name": "postmark-exporter", + "type": "ClusterIP", + "selector": { + "app": "postmark-exporter" + }, + "ports": [ + { + "name": "http", + "port": 8000, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "nextcloud", + "name": "collabora", + "type": "ClusterIP", + "selector": { + "app": "collabora" + }, + "ports": [ + { + "name": "http", + "port": 9980, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "nextcloud", + "name": "nextcloud", + "type": "ClusterIP", + "selector": { + "app": "nextcloud" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "sso", + "name": "keycloak", + "type": "ClusterIP", + "selector": { + "app": "keycloak" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "sso", + "name": "oauth2-proxy", + "type": "ClusterIP", + "selector": { + "app": "oauth2-proxy" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 4180, + "protocol": "TCP" + } + ] + }, + { + "namespace": "sso", + "name": "openldap", + "type": "ClusterIP", + "selector": { + "app": "openldap" + }, + "ports": [ + { + "name": "ldap", + "port": 389, + "targetPort": "ldap", + "protocol": "TCP" + }, + { + "name": "ldaps", + "port": 636, + "targetPort": "ldaps", + "protocol": "TCP" + } + ] + }, + { + "namespace": "sui-metrics", + "name": "sui-metrics", + "type": "ClusterIP", + "selector": { + "app": "sui-metrics" + }, + "ports": [ + { + "name": "http", + "port": 8429, + "targetPort": 8429, + "protocol": "TCP" + } + ] + }, + { + "namespace": "traefik", + "name": "traefik-metrics", + "type": "ClusterIP", + "selector": { + "app": "traefik" + }, + "ports": [ + { + "name": "metrics", + "port": 9100, + "targetPort": "metrics", + "protocol": "TCP" + } + ] + }, + { + "namespace": "vault", + "name": "vault", + "type": "ClusterIP", + "selector": { + "app": "vault" + }, + "ports": [ + { + "name": "api", + "port": 8200, + "targetPort": 8200, + "protocol": "TCP" + }, + { + "name": "cluster", + "port": 8201, + "targetPort": 8201, + "protocol": "TCP" + } + ] + }, + { + "namespace": "vault", + "name": "vault-internal", + "type": "ClusterIP", + "selector": { + "app": "vault" + }, + "ports": [ + { + "name": "api", + "port": 8200, + "targetPort": 8200, + "protocol": "TCP" + }, + { + "name": "cluster", + "port": 8201, + "targetPort": 8201, + "protocol": "TCP" + } + ] + }, + { + "namespace": "vaultwarden", + "name": "vaultwarden-service", + "type": "ClusterIP", + "selector": { + "app": "vaultwarden" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + } + ], + "http_endpoints": [ + { + "host": "auth.bstein.dev", + "path": "/", + "backend": { + "namespace": "sso", + "service": "oauth2-proxy", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "oauth2-proxy" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "oauth2-proxy", + "source": "oauth2-proxy" + } + }, + { + "host": "bstein.dev", + "path": "/", + "backend": { + "namespace": "bstein-dev-home", + "service": "bstein-dev-home-frontend", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "bstein-dev-home-frontend" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "bstein-dev-home", + "source": "bstein-dev-home" + } + }, + { + "host": "bstein.dev", + "path": "/.well-known/matrix/client", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-bstein-dev", + "source": "communication" + } + }, + { + "host": "bstein.dev", + "path": "/.well-known/matrix/server", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-bstein-dev", + "source": "communication" + } + }, + { + "host": "bstein.dev", + "path": "/api", + "backend": { + "namespace": "bstein-dev-home", + "service": "bstein-dev-home-backend", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "bstein-dev-home-backend" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "bstein-dev-home", + "source": "bstein-dev-home" + } + }, + { + "host": "call.live.bstein.dev", + "path": "/", + "backend": { + "namespace": "comms", + "service": "element-call", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "element-call" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "element-call", + "source": "communication" + } + }, + { + "host": "chat.ai.bstein.dev", + "path": "/", + "backend": { + "namespace": "bstein-dev-home", + "service": "chat-ai-gateway", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "chat-ai-gateway" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "bstein-dev-home", + "source": "bstein-dev-home" + } + }, + { + "host": "ci.bstein.dev", + "path": "/", + "backend": { + "namespace": "jenkins", + "service": "jenkins", + "port": "http", + "workloads": [ + { + "kind": "Deployment", + "name": "jenkins" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "jenkins", + "source": "jenkins" + } + }, + { + "host": "cloud.bstein.dev", + "path": "/", + "backend": { + "namespace": "nextcloud", + "service": "nextcloud", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "nextcloud" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "nextcloud", + "source": "nextcloud" + } + }, + { + "host": "kit.live.bstein.dev", + "path": "/livekit/jwt", + "backend": { + "namespace": "comms", + "service": "livekit-token-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "livekit-token-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "livekit-jwt-ingress", + "source": "communication" + } + }, + { + "host": "kit.live.bstein.dev", + "path": "/livekit/sfu", + "backend": { + "namespace": "comms", + "service": "livekit", + "port": 7880, + "workloads": [ + { + "kind": "Deployment", + "name": "livekit" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "livekit-ingress", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/", + "backend": { + "namespace": "comms", + "service": "othrys-element-element-web", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-element-element-web" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "othrys-element-element-web", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/.well-known/matrix/client", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/.well-known/matrix/server", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown", + "source": "communication" + } + }, + { + "host": "live.bstein.dev", + "path": "/_matrix", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "longhorn.bstein.dev", + "path": "/", + "backend": { + "namespace": "longhorn-system", + "service": "oauth2-proxy-longhorn", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "oauth2-proxy-longhorn" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "longhorn-ingress", + "source": "longhorn-ui" + } + }, + { + "host": "mail.bstein.dev", + "path": "/", + "backend": { + "namespace": "mailu-mailserver", + "service": "mailu-front", + "port": 443, + "workloads": [] + }, + "via": { + "kind": "IngressRoute", + "name": "mailu", + "source": "mailu" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/.well-known/matrix/client", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-matrix-live", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/.well-known/matrix/server", + "backend": { + "namespace": "comms", + "service": "matrix-wellknown", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-wellknown" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-wellknown-matrix-live", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/r0/register", + "backend": { + "namespace": "comms", + "service": "matrix-guest-register", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-guest-register" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/login", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/logout", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/refresh", + "backend": { + "namespace": "comms", + "service": "matrix-authentication-service", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-authentication-service" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_matrix/client/v3/register", + "backend": { + "namespace": "comms", + "service": "matrix-guest-register", + "port": 8080, + "workloads": [ + { + "kind": "Deployment", + "name": "matrix-guest-register" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "matrix.live.bstein.dev", + "path": "/_synapse", + "backend": { + "namespace": "comms", + "service": "othrys-synapse-matrix-synapse", + "port": 8008, + "workloads": [ + { + "kind": "Deployment", + "name": "othrys-synapse-matrix-synapse" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "communication" + } + }, + { + "host": "monero.bstein.dev", + "path": "/", + "backend": { + "namespace": "crypto", + "service": "monerod", + "port": 18081, + "workloads": [ + { + "kind": "Deployment", + "name": "monerod" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "monerod", + "source": "monerod" + } + }, + { + "host": "office.bstein.dev", + "path": "/", + "backend": { + "namespace": "nextcloud", + "service": "collabora", + "port": 9980, + "workloads": [ + { + "kind": "Deployment", + "name": "collabora" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "collabora", + "source": "nextcloud" + } + }, + { + "host": "pegasus.bstein.dev", + "path": "/", + "backend": { + "namespace": "jellyfin", + "service": "pegasus", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "pegasus" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "pegasus", + "source": "pegasus" + } + }, + { + "host": "scm.bstein.dev", + "path": "/", + "backend": { + "namespace": "gitea", + "service": "gitea", + "port": 3000, + "workloads": [ + { + "kind": "Deployment", + "name": "gitea" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "gitea-ingress", + "source": "gitea" + } + }, + { + "host": "secret.bstein.dev", + "path": "/", + "backend": { + "namespace": "vault", + "service": "vault", + "port": 8200, + "workloads": [ + { + "kind": "StatefulSet", + "name": "vault" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "vault", + "source": "vault" + } + }, + { + "host": "sso.bstein.dev", + "path": "/", + "backend": { + "namespace": "sso", + "service": "keycloak", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "keycloak" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "keycloak", + "source": "keycloak" + } + }, + { + "host": "stream.bstein.dev", + "path": "/", + "backend": { + "namespace": "jellyfin", + "service": "jellyfin", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "jellyfin" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "jellyfin", + "source": "jellyfin" + } + }, + { + "host": "vault.bstein.dev", + "path": "/", + "backend": { + "namespace": "vaultwarden", + "service": "vaultwarden-service", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "vaultwarden" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "vaultwarden-ingress", + "source": "vaultwarden" + } + } + ], + "helmrelease_host_hints": { + "gitops-ui:flux-system/weave-gitops": [ + "cd.bstein.dev" + ], + "harbor:harbor/harbor": [ + "registry.bstein.dev" + ], + "mailu:mailu-mailserver/mailu": [ + "bstein.dev", + "mail.bstein.dev" + ], + "monitoring:monitoring/alertmanager": [ + "alerts.bstein.dev" + ], + "monitoring:monitoring/grafana": [ + "metrics.bstein.dev", + "sso.bstein.dev" + ] + } +} diff --git a/services/comms/knowledge/catalog/atlas.yaml b/services/comms/knowledge/catalog/atlas.yaml new file mode 100644 index 0000000..06e2469 --- /dev/null +++ b/services/comms/knowledge/catalog/atlas.yaml @@ -0,0 +1,1799 @@ +# Generated by scripts/knowledge_render_atlas.py (do not edit by hand) +cluster: atlas +sources: +- name: ai-llm + path: services/ai-llm + targetNamespace: ai +- name: bstein-dev-home + path: services/bstein-dev-home + targetNamespace: bstein-dev-home +- name: ci-demo + path: services/ci-demo + targetNamespace: null +- name: communication + path: services/comms + targetNamespace: comms +- name: core + path: infrastructure/core + targetNamespace: null +- name: crypto + path: services/crypto + targetNamespace: crypto +- name: flux-system + path: clusters/atlas/flux-system + targetNamespace: null +- name: gitea + path: services/gitea + targetNamespace: gitea +- name: gitops-ui + path: services/gitops-ui + targetNamespace: flux-system +- name: harbor + path: services/harbor + targetNamespace: harbor +- name: helm + path: infrastructure/sources/helm + targetNamespace: flux-system +- name: jellyfin + path: services/jellyfin + targetNamespace: jellyfin +- name: jenkins + path: services/jenkins + targetNamespace: jenkins +- name: keycloak + path: services/keycloak + targetNamespace: sso +- name: longhorn-ui + path: infrastructure/longhorn/ui-ingress + targetNamespace: longhorn-system +- name: mailu + path: services/mailu + targetNamespace: mailu-mailserver +- name: metallb + path: infrastructure/metallb + targetNamespace: metallb-system +- name: monerod + path: services/crypto/monerod + targetNamespace: crypto +- name: monitoring + path: services/monitoring + targetNamespace: null +- name: nextcloud + path: services/nextcloud + targetNamespace: nextcloud +- name: nextcloud-mail-sync + path: services/nextcloud-mail-sync + targetNamespace: nextcloud +- name: oauth2-proxy + path: services/oauth2-proxy + targetNamespace: sso +- name: openldap + path: services/openldap + targetNamespace: sso +- name: pegasus + path: services/pegasus + targetNamespace: jellyfin +- name: sui-metrics + path: services/sui-metrics/overlays/atlas + targetNamespace: sui-metrics +- name: traefik + path: infrastructure/traefik + targetNamespace: traefik +- name: vault + path: services/vault + targetNamespace: vault +- name: vault-csi + path: infrastructure/vault-csi + targetNamespace: kube-system +- name: vaultwarden + path: services/vaultwarden + targetNamespace: vaultwarden +- name: xmr-miner + path: services/crypto/xmr-miner + targetNamespace: crypto +workloads: +- kind: Deployment + namespace: ai + name: ollama + labels: + app: ollama + serviceAccountName: null + nodeSelector: {} + images: + - ollama/ollama:latest +- kind: Deployment + namespace: bstein-dev-home + name: bstein-dev-home-backend + labels: + app: bstein-dev-home-backend + serviceAccountName: bstein-dev-home + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 +- kind: Deployment + namespace: bstein-dev-home + name: bstein-dev-home-frontend + labels: + app: bstein-dev-home-frontend + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 +- kind: Deployment + namespace: bstein-dev-home + name: chat-ai-gateway + labels: + app: chat-ai-gateway + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - python:3.11-slim +- kind: Deployment + namespace: ci-demo + name: ci-demo + labels: + app.kubernetes.io/name: ci-demo + serviceAccountName: null + nodeSelector: + hardware: rpi4 + images: + - registry.bstein.dev/infra/ci-demo:v0.0.0-3 +- kind: Deployment + namespace: comms + name: atlasbot + labels: + app: atlasbot + serviceAccountName: atlasbot + nodeSelector: + hardware: rpi5 + images: + - python:3.11-slim +- kind: Deployment + namespace: comms + name: coturn + labels: + app: coturn + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/coturn/coturn:4.6.2 +- kind: Deployment + namespace: comms + name: element-call + labels: + app: element-call + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/element-call:latest +- kind: Deployment + namespace: comms + name: livekit + labels: + app: livekit + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - livekit/livekit-server:v1.9.0 +- kind: Deployment + namespace: comms + name: livekit-token-service + labels: + app: livekit-token-service + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/lk-jwt-service:0.3.0 +- kind: Deployment + namespace: comms + name: matrix-authentication-service + labels: + app: matrix-authentication-service + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/matrix-authentication-service:1.8.0 +- kind: Deployment + namespace: comms + name: matrix-guest-register + labels: + app.kubernetes.io/name: matrix-guest-register + serviceAccountName: null + nodeSelector: {} + images: + - python:3.11-slim +- kind: Deployment + namespace: comms + name: matrix-wellknown + labels: + app: matrix-wellknown + serviceAccountName: null + nodeSelector: {} + images: + - nginx:1.27-alpine +- kind: Deployment + namespace: comms + name: othrys-element-element-web + labels: + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/name: element-web + serviceAccountName: othrys-element-element-web + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/element-web:v1.12.6 +- kind: Deployment + namespace: comms + name: othrys-synapse-matrix-synapse + labels: + app.kubernetes.io/component: synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: matrix-synapse + serviceAccountName: default + nodeSelector: + hardware: rpi5 + images: + - ghcr.io/element-hq/synapse:v1.144.0 +- kind: Deployment + namespace: comms + name: othrys-synapse-redis-master + labels: + app.kubernetes.io/component: master + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: redis + helm.sh/chart: redis-17.17.1 + serviceAccountName: othrys-synapse-redis + nodeSelector: {} + images: + - docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34 +- kind: DaemonSet + namespace: crypto + name: monero-xmrig + labels: + app: monero-xmrig + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - ghcr.io/tari-project/xmrig:latest +- kind: Deployment + namespace: crypto + name: monero-p2pool + labels: + app: monero-p2pool + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - debian:bookworm-slim +- kind: Deployment + namespace: crypto + name: monerod + labels: + app: monerod + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - registry.bstein.dev/crypto/monerod:0.18.4.1 +- kind: Deployment + namespace: flux-system + name: helm-controller + labels: + app: helm-controller + app.kubernetes.io/component: helm-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: helm-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/helm-controller:v1.4.5 +- kind: Deployment + namespace: flux-system + name: image-automation-controller + labels: + app: image-automation-controller + app.kubernetes.io/component: image-automation-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: image-automation-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/image-automation-controller:v1.0.4 +- kind: Deployment + namespace: flux-system + name: image-reflector-controller + labels: + app: image-reflector-controller + app.kubernetes.io/component: image-reflector-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: image-reflector-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/image-reflector-controller:v1.0.4 +- kind: Deployment + namespace: flux-system + name: kustomize-controller + labels: + app: kustomize-controller + app.kubernetes.io/component: kustomize-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: kustomize-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/kustomize-controller:v1.7.3 +- kind: Deployment + namespace: flux-system + name: notification-controller + labels: + app: notification-controller + app.kubernetes.io/component: notification-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: notification-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/notification-controller:v1.7.5 +- kind: Deployment + namespace: flux-system + name: source-controller + labels: + app: source-controller + app.kubernetes.io/component: source-controller + app.kubernetes.io/instance: flux-system + app.kubernetes.io/part-of: flux + app.kubernetes.io/version: v2.7.5 + serviceAccountName: source-controller + nodeSelector: + kubernetes.io/os: linux + images: + - ghcr.io/fluxcd/source-controller:v1.7.4 +- kind: Deployment + namespace: gitea + name: gitea + labels: + app: gitea + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - gitea/gitea:1.23 +- kind: Deployment + namespace: jellyfin + name: jellyfin + labels: + app: jellyfin + serviceAccountName: null + nodeSelector: {} + images: + - docker.io/jellyfin/jellyfin:10.11.5 +- kind: Deployment + namespace: jellyfin + name: pegasus + labels: + app: pegasus + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - alpine:3.20 + - registry.bstein.dev/streaming/pegasus:1.2.32 +- kind: Deployment + namespace: jenkins + name: jenkins + labels: + app: jenkins + serviceAccountName: jenkins + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - jenkins/jenkins:2.528.3-jdk21 +- kind: DaemonSet + namespace: kube-system + name: nvidia-device-plugin-jetson + labels: + app.kubernetes.io/instance: jetson + app.kubernetes.io/name: nvidia-device-plugin + serviceAccountName: null + nodeSelector: + jetson: 'true' + kubernetes.io/arch: arm64 + images: + - nvcr.io/nvidia/k8s-device-plugin:v0.16.2 +- kind: DaemonSet + namespace: kube-system + name: nvidia-device-plugin-minipc + labels: + app.kubernetes.io/instance: titan22 + app.kubernetes.io/name: nvidia-device-plugin + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: amd64 + kubernetes.io/hostname: titan-22 + images: + - nvcr.io/nvidia/k8s-device-plugin:v0.16.2 +- kind: DaemonSet + namespace: kube-system + name: nvidia-device-plugin-tethys + labels: + app.kubernetes.io/instance: titan24 + app.kubernetes.io/name: nvidia-device-plugin + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: amd64 + kubernetes.io/hostname: titan-24 + images: + - nvcr.io/nvidia/k8s-device-plugin:v0.16.2 +- kind: DaemonSet + namespace: kube-system + name: vault-csi-provider + labels: + app.kubernetes.io/name: vault-csi-provider + serviceAccountName: vault-csi-provider + nodeSelector: + kubernetes.io/os: linux + images: + - hashicorp/vault-csi-provider:1.7.0 +- kind: Deployment + namespace: longhorn-system + name: oauth2-proxy-longhorn + labels: + app: oauth2-proxy-longhorn + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 +- kind: DaemonSet + namespace: mailu-mailserver + name: vip-controller + labels: + app: vip-controller + serviceAccountName: vip-controller + nodeSelector: + mailu.bstein.dev/vip: 'true' + images: + - lachlanevenson/k8s-kubectl:latest +- kind: Deployment + namespace: mailu-mailserver + name: mailu-sync-listener + labels: + app: mailu-sync-listener + serviceAccountName: null + nodeSelector: {} + images: + - python:3.11-alpine +- kind: DaemonSet + namespace: metallb-system + name: metallb-speaker + labels: + app.kubernetes.io/component: speaker + app.kubernetes.io/instance: metallb + app.kubernetes.io/name: metallb + serviceAccountName: metallb-speaker + nodeSelector: + kubernetes.io/os: linux + images: + - quay.io/frrouting/frr:10.4.1 + - quay.io/metallb/speaker:v0.15.3 +- kind: Deployment + namespace: metallb-system + name: metallb-controller + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: metallb + app.kubernetes.io/name: metallb + serviceAccountName: metallb-controller + nodeSelector: + kubernetes.io/os: linux + images: + - quay.io/metallb/controller:v0.15.3 +- kind: DaemonSet + namespace: monitoring + name: dcgm-exporter + labels: + app: dcgm-exporter + serviceAccountName: default + nodeSelector: {} + images: + - registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 +- kind: Deployment + namespace: monitoring + name: postmark-exporter + labels: + app: postmark-exporter + serviceAccountName: null + nodeSelector: {} + images: + - python:3.12-alpine +- kind: Deployment + namespace: nextcloud + name: collabora + labels: + app: collabora + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - collabora/code:latest +- kind: Deployment + namespace: nextcloud + name: nextcloud + labels: + app: nextcloud + serviceAccountName: null + nodeSelector: + hardware: rpi5 + images: + - nextcloud:29-apache +- kind: Deployment + namespace: sso + name: keycloak + labels: + app: keycloak + serviceAccountName: null + nodeSelector: {} + images: + - quay.io/keycloak/keycloak:26.0.7 +- kind: Deployment + namespace: sso + name: oauth2-proxy + labels: + app: oauth2-proxy + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 +- kind: StatefulSet + namespace: sso + name: openldap + labels: + app: openldap + serviceAccountName: null + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - docker.io/osixia/openldap:1.5.0 +- kind: Deployment + namespace: sui-metrics + name: sui-metrics + labels: + app: sui-metrics + serviceAccountName: sui-metrics + nodeSelector: + kubernetes.io/hostname: titan-24 + images: + - victoriametrics/vmagent:v1.103.0 +- kind: Deployment + namespace: traefik + name: traefik + labels: + app: traefik + serviceAccountName: traefik-ingress-controller + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - traefik:v3.3.3 +- kind: StatefulSet + namespace: vault + name: vault + labels: + app: vault + serviceAccountName: vault + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: 'true' + images: + - hashicorp/vault:1.17.6 +- kind: Deployment + namespace: vaultwarden + name: vaultwarden + labels: + app: vaultwarden + serviceAccountName: null + nodeSelector: {} + images: + - vaultwarden/server:1.33.2 +services: +- namespace: ai + name: ollama + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: 11434 + protocol: TCP +- namespace: bstein-dev-home + name: bstein-dev-home-backend + type: ClusterIP + selector: + app: bstein-dev-home-backend + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP +- namespace: bstein-dev-home + name: bstein-dev-home-frontend + type: ClusterIP + selector: + app: bstein-dev-home-frontend + ports: + - name: http + port: 80 + targetPort: 80 + protocol: TCP +- namespace: bstein-dev-home + name: chat-ai-gateway + type: ClusterIP + selector: + app: chat-ai-gateway + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP +- namespace: ci-demo + name: ci-demo + type: ClusterIP + selector: + app.kubernetes.io/name: ci-demo + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: comms + name: coturn + type: LoadBalancer + selector: + app: coturn + ports: + - name: turn-udp + port: 3478 + targetPort: 3478 + protocol: UDP + - name: turn-tcp + port: 3478 + targetPort: 3478 + protocol: TCP + - name: turn-tls + port: 5349 + targetPort: 5349 + protocol: TCP + - name: relay-50000 + port: 50000 + targetPort: 50000 + protocol: UDP + - name: relay-50001 + port: 50001 + targetPort: 50001 + protocol: UDP + - name: relay-50002 + port: 50002 + targetPort: 50002 + protocol: UDP + - name: relay-50003 + port: 50003 + targetPort: 50003 + protocol: UDP + - name: relay-50004 + port: 50004 + targetPort: 50004 + protocol: UDP + - name: relay-50005 + port: 50005 + targetPort: 50005 + protocol: UDP + - name: relay-50006 + port: 50006 + targetPort: 50006 + protocol: UDP + - name: relay-50007 + port: 50007 + targetPort: 50007 + protocol: UDP + - name: relay-50008 + port: 50008 + targetPort: 50008 + protocol: UDP + - name: relay-50009 + port: 50009 + targetPort: 50009 + protocol: UDP + - name: relay-50010 + port: 50010 + targetPort: 50010 + protocol: UDP + - name: relay-50011 + port: 50011 + targetPort: 50011 + protocol: UDP + - name: relay-50012 + port: 50012 + targetPort: 50012 + protocol: UDP + - name: relay-50013 + port: 50013 + targetPort: 50013 + protocol: UDP + - name: relay-50014 + port: 50014 + targetPort: 50014 + protocol: UDP + - name: relay-50015 + port: 50015 + targetPort: 50015 + protocol: UDP + - name: relay-50016 + port: 50016 + targetPort: 50016 + protocol: UDP + - name: relay-50017 + port: 50017 + targetPort: 50017 + protocol: UDP + - name: relay-50018 + port: 50018 + targetPort: 50018 + protocol: UDP + - name: relay-50019 + port: 50019 + targetPort: 50019 + protocol: UDP + - name: relay-50020 + port: 50020 + targetPort: 50020 + protocol: UDP + - name: relay-50021 + port: 50021 + targetPort: 50021 + protocol: UDP + - name: relay-50022 + port: 50022 + targetPort: 50022 + protocol: UDP + - name: relay-50023 + port: 50023 + targetPort: 50023 + protocol: UDP + - name: relay-50024 + port: 50024 + targetPort: 50024 + protocol: UDP + - name: relay-50025 + port: 50025 + targetPort: 50025 + protocol: UDP + - name: relay-50026 + port: 50026 + targetPort: 50026 + protocol: UDP + - name: relay-50027 + port: 50027 + targetPort: 50027 + protocol: UDP + - name: relay-50028 + port: 50028 + targetPort: 50028 + protocol: UDP + - name: relay-50029 + port: 50029 + targetPort: 50029 + protocol: UDP + - name: relay-50030 + port: 50030 + targetPort: 50030 + protocol: UDP + - name: relay-50031 + port: 50031 + targetPort: 50031 + protocol: UDP + - name: relay-50032 + port: 50032 + targetPort: 50032 + protocol: UDP + - name: relay-50033 + port: 50033 + targetPort: 50033 + protocol: UDP + - name: relay-50034 + port: 50034 + targetPort: 50034 + protocol: UDP + - name: relay-50035 + port: 50035 + targetPort: 50035 + protocol: UDP + - name: relay-50036 + port: 50036 + targetPort: 50036 + protocol: UDP + - name: relay-50037 + port: 50037 + targetPort: 50037 + protocol: UDP + - name: relay-50038 + port: 50038 + targetPort: 50038 + protocol: UDP + - name: relay-50039 + port: 50039 + targetPort: 50039 + protocol: UDP + - name: relay-50040 + port: 50040 + targetPort: 50040 + protocol: UDP + - name: relay-50041 + port: 50041 + targetPort: 50041 + protocol: UDP + - name: relay-50042 + port: 50042 + targetPort: 50042 + protocol: UDP + - name: relay-50043 + port: 50043 + targetPort: 50043 + protocol: UDP + - name: relay-50044 + port: 50044 + targetPort: 50044 + protocol: UDP + - name: relay-50045 + port: 50045 + targetPort: 50045 + protocol: UDP + - name: relay-50046 + port: 50046 + targetPort: 50046 + protocol: UDP + - name: relay-50047 + port: 50047 + targetPort: 50047 + protocol: UDP + - name: relay-50048 + port: 50048 + targetPort: 50048 + protocol: UDP + - name: relay-50049 + port: 50049 + targetPort: 50049 + protocol: UDP + - name: relay-50050 + port: 50050 + targetPort: 50050 + protocol: UDP +- namespace: comms + name: element-call + type: ClusterIP + selector: + app: element-call + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP +- namespace: comms + name: livekit + type: LoadBalancer + selector: + app: livekit + ports: + - name: http + port: 7880 + targetPort: 7880 + protocol: TCP + - name: rtc-tcp + port: 7881 + targetPort: 7881 + protocol: TCP + - name: rtc-udp-7882 + port: 7882 + targetPort: 7882 + protocol: UDP + - name: rtc-udp-7883 + port: 7883 + targetPort: 7883 + protocol: UDP +- namespace: comms + name: livekit-token-service + type: ClusterIP + selector: + app: livekit-token-service + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP +- namespace: comms + name: matrix-authentication-service + type: ClusterIP + selector: + app: matrix-authentication-service + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP + - name: internal + port: 8081 + targetPort: internal + protocol: TCP +- namespace: comms + name: matrix-guest-register + type: ClusterIP + selector: + app.kubernetes.io/name: matrix-guest-register + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP +- namespace: comms + name: matrix-wellknown + type: ClusterIP + selector: + app: matrix-wellknown + ports: + - name: http + port: 80 + targetPort: 80 + protocol: TCP +- namespace: comms + name: othrys-element-element-web + type: ClusterIP + selector: + app.kubernetes.io/instance: othrys-element + app.kubernetes.io/name: element-web + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: comms + name: othrys-synapse-matrix-synapse + type: ClusterIP + selector: + app.kubernetes.io/component: synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: matrix-synapse + ports: + - name: http + port: 8008 + targetPort: http + protocol: TCP +- namespace: comms + name: othrys-synapse-redis-headless + type: ClusterIP + selector: + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + ports: + - name: tcp-redis + port: 6379 + targetPort: redis + protocol: TCP +- namespace: comms + name: othrys-synapse-redis-master + type: ClusterIP + selector: + app.kubernetes.io/component: master + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: redis + ports: + - name: tcp-redis + port: 6379 + targetPort: redis + protocol: TCP +- namespace: comms + name: othrys-synapse-replication + type: ClusterIP + selector: + app.kubernetes.io/component: synapse + app.kubernetes.io/instance: othrys-synapse + app.kubernetes.io/name: matrix-synapse + ports: + - name: replication + port: 9093 + targetPort: replication + protocol: TCP +- namespace: crypto + name: monerod + type: ClusterIP + selector: + app: monerod + ports: + - name: rpc + port: 18081 + targetPort: 18081 + protocol: TCP + - name: p2p + port: 18080 + targetPort: 18080 + protocol: TCP + - name: zmq + port: 18083 + targetPort: 18083 + protocol: TCP +- namespace: crypto + name: p2pool + type: ClusterIP + selector: + app: p2pool + ports: + - name: stratum + port: 3333 + targetPort: 3333 + protocol: TCP +- namespace: flux-system + name: notification-controller + type: ClusterIP + selector: + app: notification-controller + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: flux-system + name: source-controller + type: ClusterIP + selector: + app: source-controller + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: flux-system + name: webhook-receiver + type: ClusterIP + selector: + app: notification-controller + ports: + - name: http + port: 80 + targetPort: http-webhook + protocol: TCP +- namespace: gitea + name: gitea + type: ClusterIP + selector: + app: gitea + ports: + - name: http + port: 3000 + targetPort: 3000 + protocol: TCP +- namespace: gitea + name: gitea-ssh + type: NodePort + selector: + app: gitea + ports: + - name: ssh + port: 2242 + targetPort: 2242 + protocol: TCP +- namespace: jellyfin + name: jellyfin + type: ClusterIP + selector: + app: jellyfin + ports: + - name: http + port: 80 + targetPort: 8096 + protocol: TCP +- namespace: jellyfin + name: pegasus + type: ClusterIP + selector: + app: pegasus + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: jenkins + name: jenkins + type: ClusterIP + selector: + app: jenkins + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP + - name: agent-listener + port: 50000 + targetPort: 50000 + protocol: TCP +- namespace: kube-system + name: traefik + type: LoadBalancer + selector: + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik + ports: + - name: web + port: 80 + targetPort: web + protocol: TCP + - name: websecure + port: 443 + targetPort: websecure + protocol: TCP +- namespace: longhorn-system + name: oauth2-proxy-longhorn + type: ClusterIP + selector: + app: oauth2-proxy-longhorn + ports: + - name: http + port: 80 + targetPort: 4180 + protocol: TCP +- namespace: mailu-mailserver + name: mailu-front-lb + type: LoadBalancer + selector: + app.kubernetes.io/component: front + app.kubernetes.io/instance: mailu + app.kubernetes.io/name: mailu + ports: + - name: smtp + port: 25 + targetPort: 25 + protocol: TCP + - name: smtps + port: 465 + targetPort: 465 + protocol: TCP + - name: submission + port: 587 + targetPort: 587 + protocol: TCP + - name: imaps + port: 993 + targetPort: 993 + protocol: TCP + - name: pop3s + port: 995 + targetPort: 995 + protocol: TCP + - name: sieve + port: 4190 + targetPort: 4190 + protocol: TCP +- namespace: mailu-mailserver + name: mailu-sync-listener + type: ClusterIP + selector: + app: mailu-sync-listener + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP +- namespace: metallb-system + name: metallb-webhook-service + type: ClusterIP + selector: + app.kubernetes.io/component: controller + app.kubernetes.io/instance: metallb + app.kubernetes.io/name: metallb + ports: + - name: null + port: 443 + targetPort: 9443 + protocol: TCP +- namespace: monitoring + name: dcgm-exporter + type: ClusterIP + selector: + app: dcgm-exporter + ports: + - name: metrics + port: 9400 + targetPort: metrics + protocol: TCP +- namespace: monitoring + name: postmark-exporter + type: ClusterIP + selector: + app: postmark-exporter + ports: + - name: http + port: 8000 + targetPort: http + protocol: TCP +- namespace: nextcloud + name: collabora + type: ClusterIP + selector: + app: collabora + ports: + - name: http + port: 9980 + targetPort: http + protocol: TCP +- namespace: nextcloud + name: nextcloud + type: ClusterIP + selector: + app: nextcloud + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: sso + name: keycloak + type: ClusterIP + selector: + app: keycloak + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: sso + name: oauth2-proxy + type: ClusterIP + selector: + app: oauth2-proxy + ports: + - name: http + port: 80 + targetPort: 4180 + protocol: TCP +- namespace: sso + name: openldap + type: ClusterIP + selector: + app: openldap + ports: + - name: ldap + port: 389 + targetPort: ldap + protocol: TCP + - name: ldaps + port: 636 + targetPort: ldaps + protocol: TCP +- namespace: sui-metrics + name: sui-metrics + type: ClusterIP + selector: + app: sui-metrics + ports: + - name: http + port: 8429 + targetPort: 8429 + protocol: TCP +- namespace: traefik + name: traefik-metrics + type: ClusterIP + selector: + app: traefik + ports: + - name: metrics + port: 9100 + targetPort: metrics + protocol: TCP +- namespace: vault + name: vault + type: ClusterIP + selector: + app: vault + ports: + - name: api + port: 8200 + targetPort: 8200 + protocol: TCP + - name: cluster + port: 8201 + targetPort: 8201 + protocol: TCP +- namespace: vault + name: vault-internal + type: ClusterIP + selector: + app: vault + ports: + - name: api + port: 8200 + targetPort: 8200 + protocol: TCP + - name: cluster + port: 8201 + targetPort: 8201 + protocol: TCP +- namespace: vaultwarden + name: vaultwarden-service + type: ClusterIP + selector: + app: vaultwarden + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +http_endpoints: +- host: auth.bstein.dev + path: / + backend: + namespace: sso + service: oauth2-proxy + port: 80 + workloads: + - kind: Deployment + name: oauth2-proxy + via: + kind: Ingress + name: oauth2-proxy + source: oauth2-proxy +- host: bstein.dev + path: / + backend: + namespace: bstein-dev-home + service: bstein-dev-home-frontend + port: 80 + workloads: + - kind: Deployment + name: bstein-dev-home-frontend + via: + kind: Ingress + name: bstein-dev-home + source: bstein-dev-home +- host: bstein.dev + path: /.well-known/matrix/client + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: &id001 + - kind: Deployment + name: matrix-wellknown + via: + kind: Ingress + name: matrix-wellknown-bstein-dev + source: communication +- host: bstein.dev + path: /.well-known/matrix/server + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id001 + via: + kind: Ingress + name: matrix-wellknown-bstein-dev + source: communication +- host: bstein.dev + path: /api + backend: + namespace: bstein-dev-home + service: bstein-dev-home-backend + port: 80 + workloads: + - kind: Deployment + name: bstein-dev-home-backend + via: + kind: Ingress + name: bstein-dev-home + source: bstein-dev-home +- host: call.live.bstein.dev + path: / + backend: + namespace: comms + service: element-call + port: 80 + workloads: + - kind: Deployment + name: element-call + via: + kind: Ingress + name: element-call + source: communication +- host: chat.ai.bstein.dev + path: / + backend: + namespace: bstein-dev-home + service: chat-ai-gateway + port: 80 + workloads: + - kind: Deployment + name: chat-ai-gateway + via: + kind: Ingress + name: bstein-dev-home + source: bstein-dev-home +- host: ci.bstein.dev + path: / + backend: + namespace: jenkins + service: jenkins + port: http + workloads: + - kind: Deployment + name: jenkins + via: + kind: Ingress + name: jenkins + source: jenkins +- host: cloud.bstein.dev + path: / + backend: + namespace: nextcloud + service: nextcloud + port: 80 + workloads: + - kind: Deployment + name: nextcloud + via: + kind: Ingress + name: nextcloud + source: nextcloud +- host: kit.live.bstein.dev + path: /livekit/jwt + backend: + namespace: comms + service: livekit-token-service + port: 8080 + workloads: + - kind: Deployment + name: livekit-token-service + via: + kind: Ingress + name: livekit-jwt-ingress + source: communication +- host: kit.live.bstein.dev + path: /livekit/sfu + backend: + namespace: comms + service: livekit + port: 7880 + workloads: + - kind: Deployment + name: livekit + via: + kind: Ingress + name: livekit-ingress + source: communication +- host: live.bstein.dev + path: / + backend: + namespace: comms + service: othrys-element-element-web + port: 80 + workloads: + - kind: Deployment + name: othrys-element-element-web + via: + kind: Ingress + name: othrys-element-element-web + source: communication +- host: live.bstein.dev + path: /.well-known/matrix/client + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id001 + via: + kind: Ingress + name: matrix-wellknown + source: communication +- host: live.bstein.dev + path: /.well-known/matrix/server + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id001 + via: + kind: Ingress + name: matrix-wellknown + source: communication +- host: live.bstein.dev + path: /_matrix + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: &id002 + - kind: Deployment + name: othrys-synapse-matrix-synapse + via: + kind: Ingress + name: matrix-routing + source: communication +- host: longhorn.bstein.dev + path: / + backend: + namespace: longhorn-system + service: oauth2-proxy-longhorn + port: 80 + workloads: + - kind: Deployment + name: oauth2-proxy-longhorn + via: + kind: Ingress + name: longhorn-ingress + source: longhorn-ui +- host: mail.bstein.dev + path: / + backend: + namespace: mailu-mailserver + service: mailu-front + port: 443 + workloads: [] + via: + kind: IngressRoute + name: mailu + source: mailu +- host: matrix.live.bstein.dev + path: / + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: &id003 + - kind: Deployment + name: matrix-authentication-service + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /.well-known/matrix/client + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id001 + via: + kind: Ingress + name: matrix-wellknown-matrix-live + source: communication +- host: matrix.live.bstein.dev + path: /.well-known/matrix/server + backend: + namespace: comms + service: matrix-wellknown + port: 80 + workloads: *id001 + via: + kind: Ingress + name: matrix-wellknown-matrix-live + source: communication +- host: matrix.live.bstein.dev + path: /_matrix + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: *id002 + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/r0/register + backend: + namespace: comms + service: matrix-guest-register + port: 8080 + workloads: &id004 + - kind: Deployment + name: matrix-guest-register + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/login + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: *id003 + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/logout + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: *id003 + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/refresh + backend: + namespace: comms + service: matrix-authentication-service + port: 8080 + workloads: *id003 + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_matrix/client/v3/register + backend: + namespace: comms + service: matrix-guest-register + port: 8080 + workloads: *id004 + via: + kind: Ingress + name: matrix-routing + source: communication +- host: matrix.live.bstein.dev + path: /_synapse + backend: + namespace: comms + service: othrys-synapse-matrix-synapse + port: 8008 + workloads: *id002 + via: + kind: Ingress + name: matrix-routing + source: communication +- host: monero.bstein.dev + path: / + backend: + namespace: crypto + service: monerod + port: 18081 + workloads: + - kind: Deployment + name: monerod + via: + kind: Ingress + name: monerod + source: monerod +- host: office.bstein.dev + path: / + backend: + namespace: nextcloud + service: collabora + port: 9980 + workloads: + - kind: Deployment + name: collabora + via: + kind: Ingress + name: collabora + source: nextcloud +- host: pegasus.bstein.dev + path: / + backend: + namespace: jellyfin + service: pegasus + port: 80 + workloads: + - kind: Deployment + name: pegasus + via: + kind: Ingress + name: pegasus + source: pegasus +- host: scm.bstein.dev + path: / + backend: + namespace: gitea + service: gitea + port: 3000 + workloads: + - kind: Deployment + name: gitea + via: + kind: Ingress + name: gitea-ingress + source: gitea +- host: secret.bstein.dev + path: / + backend: + namespace: vault + service: vault + port: 8200 + workloads: + - kind: StatefulSet + name: vault + via: + kind: Ingress + name: vault + source: vault +- host: sso.bstein.dev + path: / + backend: + namespace: sso + service: keycloak + port: 80 + workloads: + - kind: Deployment + name: keycloak + via: + kind: Ingress + name: keycloak + source: keycloak +- host: stream.bstein.dev + path: / + backend: + namespace: jellyfin + service: jellyfin + port: 80 + workloads: + - kind: Deployment + name: jellyfin + via: + kind: Ingress + name: jellyfin + source: jellyfin +- host: vault.bstein.dev + path: / + backend: + namespace: vaultwarden + service: vaultwarden-service + port: 80 + workloads: + - kind: Deployment + name: vaultwarden + via: + kind: Ingress + name: vaultwarden-ingress + source: vaultwarden +helmrelease_host_hints: + gitops-ui:flux-system/weave-gitops: + - cd.bstein.dev + harbor:harbor/harbor: + - registry.bstein.dev + mailu:mailu-mailserver/mailu: + - bstein.dev + - mail.bstein.dev + monitoring:monitoring/alertmanager: + - alerts.bstein.dev + monitoring:monitoring/grafana: + - metrics.bstein.dev + - sso.bstein.dev diff --git a/services/comms/knowledge/catalog/runbooks.json b/services/comms/knowledge/catalog/runbooks.json new file mode 100644 index 0000000..d7356ca --- /dev/null +++ b/services/comms/knowledge/catalog/runbooks.json @@ -0,0 +1,73 @@ +[ + { + "path": "runbooks/ci-gitea-jenkins.md", + "title": "CI: Gitea \u2192 Jenkins pipeline", + "tags": [ + "atlas", + "ci", + "gitea", + "jenkins" + ], + "entrypoints": [ + "scm.bstein.dev", + "ci.bstein.dev" + ], + "source_paths": [ + "services/gitea", + "services/jenkins", + "scripts/jenkins_cred_sync.sh", + "scripts/gitea_cred_sync.sh" + ], + "body": "# CI: Gitea \u2192 Jenkins pipeline\n\n## What this is\nAtlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).\n\n## Where it is configured\n- Gitea manifests: `services/gitea/`\n- Jenkins manifests: `services/jenkins/`\n- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`\n\n## What users do (typical flow)\n- Create a repo in Gitea.\n- Create/update a Jenkins job/pipeline that can fetch the repo.\n- Configure a webhook (or SCM polling) so pushes trigger builds.\n\n## Troubleshooting (common)\n- \u201cWebhook not firing\u201d: confirm ingress host, webhook URL, and Jenkins job is reachable.\n- \u201cAuth denied cloning\u201d: confirm Keycloak group membership and that Jenkins has a valid token/credential configured." + }, + { + "path": "runbooks/kb-authoring.md", + "title": "KB authoring: what to write (and what not to)", + "tags": [ + "atlas", + "kb", + "runbooks" + ], + "entrypoints": [], + "source_paths": [ + "knowledge/runbooks", + "scripts/knowledge_render_atlas.py" + ], + "body": "# KB authoring: what to write (and what not to)\n\n## The goal\nGive Atlas assistants enough grounded, Atlas-specific context to answer \u201chow do I\u2026?\u201d questions without guessing.\n\n## What to capture (high value)\n- User workflows: \u201cclick here, set X, expected result\u201d\n- Operator workflows: \u201cedit these files, reconcile this kustomization, verify with these commands\u201d\n- Wiring: \u201cthis host routes to this service; this service depends on Postgres/Vault/etc\u201d\n- Failure modes: exact error messages + the 2\u20135 checks that usually resolve them\n- Permissions: Keycloak groups/roles and what they unlock\n\n## What to avoid (low value / fluff)\n- Generic Kubernetes explanations (link to upstream docs instead)\n- Copy-pasting large manifests (prefer file paths + small snippets)\n- Anything that will drift quickly (render it from GitOps instead)\n- Any secret values (reference Secret/Vault locations by name only)\n\n## Document pattern (recommended)\nEach runbook should answer:\n- \u201cWhat is this?\u201d\n- \u201cWhat do users do?\u201d\n- \u201cWhat do operators change (where in Git)?\u201d\n- \u201cHow do we verify it works?\u201d\n- \u201cWhat breaks and how to debug it?\u201d" + }, + { + "path": "runbooks/observability.md", + "title": "Observability: Grafana + VictoriaMetrics (how to query safely)", + "tags": [ + "atlas", + "monitoring", + "grafana", + "victoriametrics" + ], + "entrypoints": [ + "metrics.bstein.dev", + "alerts.bstein.dev" + ], + "source_paths": [ + "services/monitoring" + ], + "body": "# Observability: Grafana + VictoriaMetrics (how to query safely)\n\n## Where it is configured\n- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values)\n- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL)\n\n## Using metrics as a \u201ctool\u201d for Atlas assistants\nThe safest pattern is: map a small set of intents \u2192 fixed PromQL queries, then summarize results.\n\nExamples (intents)\n- \u201cIs the cluster healthy?\u201d \u2192 node readiness + pod restart rate\n- \u201cWhy is Element Call failing?\u201d \u2192 LiveKit/coturn pod restarts + synapse errors + ingress 5xx\n- \u201cIs Jenkins slow?\u201d \u2192 pod CPU/memory + HTTP latency metrics (if exported)\n\n## Why dashboards are not the KB\nDashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the\nKB focused on wiring, runbooks, and stable conventions." + }, + { + "path": "runbooks/template.md", + "title": "", + "tags": [ + "atlas", + "", + "" + ], + "entrypoints": [ + "" + ], + "source_paths": [ + "services/", + "clusters/atlas/<...>" + ], + "body": "# \n\n## What this is\n\n## For users (how to)\n\n## For operators (where configured)\n\n## Troubleshooting (symptoms \u2192 checks)" + } +] diff --git a/services/comms/knowledge/diagrams/atlas-http.mmd b/services/comms/knowledge/diagrams/atlas-http.mmd new file mode 100644 index 0000000..ddd33d8 --- /dev/null +++ b/services/comms/knowledge/diagrams/atlas-http.mmd @@ -0,0 +1,189 @@ +flowchart LR + host_auth_bstein_dev["auth.bstein.dev"] + svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"] + host_auth_bstein_dev --> svc_sso_oauth2_proxy + wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"] + svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy + host_bstein_dev["bstein.dev"] + svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"] + host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend + wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"] + svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend + svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"] + host_bstein_dev --> svc_comms_matrix_wellknown + wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"] + svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown + svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"] + host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend + wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"] + svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend + host_call_live_bstein_dev["call.live.bstein.dev"] + svc_comms_element_call["comms/element-call (Service)"] + host_call_live_bstein_dev --> svc_comms_element_call + wl_comms_element_call["comms/element-call (Deployment)"] + svc_comms_element_call --> wl_comms_element_call + host_chat_ai_bstein_dev["chat.ai.bstein.dev"] + svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"] + host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway + wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"] + svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway + host_ci_bstein_dev["ci.bstein.dev"] + svc_jenkins_jenkins["jenkins/jenkins (Service)"] + host_ci_bstein_dev --> svc_jenkins_jenkins + wl_jenkins_jenkins["jenkins/jenkins (Deployment)"] + svc_jenkins_jenkins --> wl_jenkins_jenkins + host_cloud_bstein_dev["cloud.bstein.dev"] + svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"] + host_cloud_bstein_dev --> svc_nextcloud_nextcloud + wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"] + svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud + host_kit_live_bstein_dev["kit.live.bstein.dev"] + svc_comms_livekit_token_service["comms/livekit-token-service (Service)"] + host_kit_live_bstein_dev --> svc_comms_livekit_token_service + wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"] + svc_comms_livekit_token_service --> wl_comms_livekit_token_service + svc_comms_livekit["comms/livekit (Service)"] + host_kit_live_bstein_dev --> svc_comms_livekit + wl_comms_livekit["comms/livekit (Deployment)"] + svc_comms_livekit --> wl_comms_livekit + host_live_bstein_dev["live.bstein.dev"] + svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"] + host_live_bstein_dev --> svc_comms_othrys_element_element_web + wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"] + svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web + host_live_bstein_dev --> svc_comms_matrix_wellknown + svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] + host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] + svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse + host_longhorn_bstein_dev["longhorn.bstein.dev"] + svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"] + host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn + wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"] + svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn + host_mail_bstein_dev["mail.bstein.dev"] + svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"] + host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front + host_matrix_live_bstein_dev["matrix.live.bstein.dev"] + svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"] + host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service + wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"] + svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service + host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown + host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse + svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"] + host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register + wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"] + svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register + host_monero_bstein_dev["monero.bstein.dev"] + svc_crypto_monerod["crypto/monerod (Service)"] + host_monero_bstein_dev --> svc_crypto_monerod + wl_crypto_monerod["crypto/monerod (Deployment)"] + svc_crypto_monerod --> wl_crypto_monerod + host_office_bstein_dev["office.bstein.dev"] + svc_nextcloud_collabora["nextcloud/collabora (Service)"] + host_office_bstein_dev --> svc_nextcloud_collabora + wl_nextcloud_collabora["nextcloud/collabora (Deployment)"] + svc_nextcloud_collabora --> wl_nextcloud_collabora + host_pegasus_bstein_dev["pegasus.bstein.dev"] + svc_jellyfin_pegasus["jellyfin/pegasus (Service)"] + host_pegasus_bstein_dev --> svc_jellyfin_pegasus + wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"] + svc_jellyfin_pegasus --> wl_jellyfin_pegasus + host_scm_bstein_dev["scm.bstein.dev"] + svc_gitea_gitea["gitea/gitea (Service)"] + host_scm_bstein_dev --> svc_gitea_gitea + wl_gitea_gitea["gitea/gitea (Deployment)"] + svc_gitea_gitea --> wl_gitea_gitea + host_secret_bstein_dev["secret.bstein.dev"] + svc_vault_vault["vault/vault (Service)"] + host_secret_bstein_dev --> svc_vault_vault + wl_vault_vault["vault/vault (StatefulSet)"] + svc_vault_vault --> wl_vault_vault + host_sso_bstein_dev["sso.bstein.dev"] + svc_sso_keycloak["sso/keycloak (Service)"] + host_sso_bstein_dev --> svc_sso_keycloak + wl_sso_keycloak["sso/keycloak (Deployment)"] + svc_sso_keycloak --> wl_sso_keycloak + host_stream_bstein_dev["stream.bstein.dev"] + svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"] + host_stream_bstein_dev --> svc_jellyfin_jellyfin + wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"] + svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin + host_vault_bstein_dev["vault.bstein.dev"] + svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"] + host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service + wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"] + svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden + + subgraph bstein_dev_home[bstein-dev-home] + svc_bstein_dev_home_bstein_dev_home_frontend + wl_bstein_dev_home_bstein_dev_home_frontend + svc_bstein_dev_home_bstein_dev_home_backend + wl_bstein_dev_home_bstein_dev_home_backend + svc_bstein_dev_home_chat_ai_gateway + wl_bstein_dev_home_chat_ai_gateway + end + subgraph comms[comms] + svc_comms_matrix_wellknown + wl_comms_matrix_wellknown + svc_comms_element_call + wl_comms_element_call + svc_comms_livekit_token_service + wl_comms_livekit_token_service + svc_comms_livekit + wl_comms_livekit + svc_comms_othrys_element_element_web + wl_comms_othrys_element_element_web + svc_comms_othrys_synapse_matrix_synapse + wl_comms_othrys_synapse_matrix_synapse + svc_comms_matrix_authentication_service + wl_comms_matrix_authentication_service + svc_comms_matrix_guest_register + wl_comms_matrix_guest_register + end + subgraph crypto[crypto] + svc_crypto_monerod + wl_crypto_monerod + end + subgraph gitea[gitea] + svc_gitea_gitea + wl_gitea_gitea + end + subgraph jellyfin[jellyfin] + svc_jellyfin_pegasus + wl_jellyfin_pegasus + svc_jellyfin_jellyfin + wl_jellyfin_jellyfin + end + subgraph jenkins[jenkins] + svc_jenkins_jenkins + wl_jenkins_jenkins + end + subgraph longhorn_system[longhorn-system] + svc_longhorn_system_oauth2_proxy_longhorn + wl_longhorn_system_oauth2_proxy_longhorn + end + subgraph mailu_mailserver[mailu-mailserver] + svc_mailu_mailserver_mailu_front + end + subgraph nextcloud[nextcloud] + svc_nextcloud_nextcloud + wl_nextcloud_nextcloud + svc_nextcloud_collabora + wl_nextcloud_collabora + end + subgraph sso[sso] + svc_sso_oauth2_proxy + wl_sso_oauth2_proxy + svc_sso_keycloak + wl_sso_keycloak + end + subgraph vault[vault] + svc_vault_vault + wl_vault_vault + end + subgraph vaultwarden[vaultwarden] + svc_vaultwarden_vaultwarden_service + wl_vaultwarden_vaultwarden + end diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 393be76..99b0b4a 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -1,5 +1,46 @@ # services/comms/kustomization.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization +namespace: comms resources: - namespace.yaml + - atlasbot-rbac.yaml + - synapse-rendered.yaml + - synapse-signingkey-ensure-job.yaml + - synapse-seeder-admin-ensure-job.yaml + - mas-configmap.yaml + - mas-admin-client-secret-ensure-job.yaml + - mas-deployment.yaml + - element-rendered.yaml + - livekit-config.yaml + - livekit.yaml + - coturn.yaml + - livekit-token-deployment.yaml + - livekit-ingress.yaml + - livekit-middlewares.yaml + - element-call-config.yaml + - element-call-deployment.yaml + - reset-othrys-room-job.yaml + - bstein-force-leave-job.yaml + - pin-othrys-job.yaml + - guest-name-job.yaml + - guest-register-configmap.yaml + - guest-register-deployment.yaml + - guest-register-service.yaml + - matrix-ingress.yaml + - atlasbot-configmap.yaml + - atlasbot-deployment.yaml + - seed-othrys-room.yaml + - wellknown.yaml + +patches: + - path: synapse-deployment-strategy-patch.yaml + +configMapGenerator: + - name: atlas-kb + files: + - INDEX.md=knowledge/INDEX.md + - atlas.json=knowledge/catalog/atlas.json + - atlas-summary.json=knowledge/catalog/atlas-summary.json + - runbooks.json=knowledge/catalog/runbooks.json + - atlas-http.mmd=knowledge/diagrams/atlas-http.mmd diff --git a/services/communication/livekit-config.yaml b/services/comms/livekit-config.yaml similarity index 93% rename from services/communication/livekit-config.yaml rename to services/comms/livekit-config.yaml index c39c783..8b977a4 100644 --- a/services/communication/livekit-config.yaml +++ b/services/comms/livekit-config.yaml @@ -1,4 +1,4 @@ -# services/communication/livekit-config.yaml +# services/comms/livekit-config.yaml apiVersion: v1 kind: ConfigMap metadata: diff --git a/services/communication/livekit-ingress.yaml b/services/comms/livekit-ingress.yaml similarity index 90% rename from services/communication/livekit-ingress.yaml rename to services/comms/livekit-ingress.yaml index c6f1dae..ba30ae3 100644 --- a/services/communication/livekit-ingress.yaml +++ b/services/comms/livekit-ingress.yaml @@ -1,9 +1,8 @@ -# services/communication/livekit-ingress.yaml +# services/comms/livekit-ingress.yaml apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: livekit-ingress - namespace: communication annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure diff --git a/services/communication/livekit-middlewares.yaml b/services/comms/livekit-middlewares.yaml similarity index 88% rename from services/communication/livekit-middlewares.yaml rename to services/comms/livekit-middlewares.yaml index 76632fc..f1b74ed 100644 --- a/services/communication/livekit-middlewares.yaml +++ b/services/comms/livekit-middlewares.yaml @@ -1,9 +1,8 @@ -# services/communication/livekit-middlewares.yaml +# services/comms/livekit-middlewares.yaml apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: name: livekit-sfu-strip - namespace: communication spec: stripPrefix: prefixes: @@ -13,7 +12,6 @@ apiVersion: traefik.io/v1alpha1 kind: Middleware metadata: name: livekit-jwt-strip - namespace: communication spec: stripPrefix: prefixes: @@ -23,7 +21,6 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: livekit-jwt-ingress - namespace: communication annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure diff --git a/services/communication/livekit-token-deployment.yaml b/services/comms/livekit-token-deployment.yaml similarity index 96% rename from services/communication/livekit-token-deployment.yaml rename to services/comms/livekit-token-deployment.yaml index f9d1a87..1b4cdca 100644 --- a/services/communication/livekit-token-deployment.yaml +++ b/services/comms/livekit-token-deployment.yaml @@ -1,4 +1,4 @@ -# services/communication/livekit-token-deployment.yaml +# services/comms/livekit-token-deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: diff --git a/services/communication/livekit.yaml b/services/comms/livekit.yaml similarity index 99% rename from services/communication/livekit.yaml rename to services/comms/livekit.yaml index 6de11e4..46d57f8 100644 --- a/services/communication/livekit.yaml +++ b/services/comms/livekit.yaml @@ -1,4 +1,4 @@ -# services/communication/livekit.yaml +# services/comms/livekit.yaml apiVersion: apps/v1 kind: Deployment metadata: diff --git a/services/communication/mas-admin-client-secret-ensure-job.yaml b/services/comms/mas-admin-client-secret-ensure-job.yaml similarity index 97% rename from services/communication/mas-admin-client-secret-ensure-job.yaml rename to services/comms/mas-admin-client-secret-ensure-job.yaml index ff8d282..3843877 100644 --- a/services/communication/mas-admin-client-secret-ensure-job.yaml +++ b/services/comms/mas-admin-client-secret-ensure-job.yaml @@ -1,4 +1,4 @@ -# services/communication/mas-admin-client-secret-ensure-job.yaml +# services/comms/mas-admin-client-secret-ensure-job.yaml apiVersion: v1 kind: ServiceAccount metadata: diff --git a/services/communication/mas-configmap.yaml b/services/comms/mas-configmap.yaml similarity index 97% rename from services/communication/mas-configmap.yaml rename to services/comms/mas-configmap.yaml index ea5c33c..a41ebeb 100644 --- a/services/communication/mas-configmap.yaml +++ b/services/comms/mas-configmap.yaml @@ -1,9 +1,8 @@ -# services/communication/mas-configmap.yaml +# services/comms/mas-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: matrix-authentication-service-config - namespace: communication data: config.yaml: | http: diff --git a/services/communication/mas-deployment.yaml b/services/comms/mas-deployment.yaml similarity index 97% rename from services/communication/mas-deployment.yaml rename to services/comms/mas-deployment.yaml index 7034fc7..ed88328 100644 --- a/services/communication/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -1,9 +1,8 @@ -# services/communication/mas-deployment.yaml +# services/comms/mas-deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: matrix-authentication-service - namespace: communication labels: app: matrix-authentication-service spec: @@ -139,7 +138,6 @@ apiVersion: v1 kind: Service metadata: name: matrix-authentication-service - namespace: communication spec: selector: app: matrix-authentication-service diff --git a/services/communication/mas-ingress.yaml b/services/comms/matrix-ingress.yaml similarity index 50% rename from services/communication/mas-ingress.yaml rename to services/comms/matrix-ingress.yaml index b6e4bda..caaa593 100644 --- a/services/communication/mas-ingress.yaml +++ b/services/comms/matrix-ingress.yaml @@ -1,50 +1,41 @@ -# services/communication/mas-ingress.yaml +# services/comms/matrix-ingress.yaml apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: matrix-authentication-service - namespace: communication + name: matrix-routing annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" cert-manager.io/cluster-issuer: letsencrypt spec: + ingressClassName: traefik tls: - hosts: - matrix.live.bstein.dev secretName: matrix-live-tls + - hosts: + - live.bstein.dev + secretName: live-othrys-tls + # Consolidated Matrix routing: MAS for auth/UI, Synapse for Matrix APIs, guest-register for guest joins. rules: - host: matrix.live.bstein.dev http: paths: - - path: / + - path: /_matrix/client/v3/register pathType: Prefix backend: service: - name: matrix-authentication-service + name: matrix-guest-register + port: + number: 8080 + - path: /_matrix/client/r0/register + pathType: Prefix + backend: + service: + name: matrix-guest-register port: number: 8080 ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: matrix-authentication-service-compat - namespace: communication - annotations: - kubernetes.io/ingress.class: traefik - traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.tls: "true" - cert-manager.io/cluster-issuer: letsencrypt -spec: - tls: - - hosts: - - matrix.live.bstein.dev - secretName: matrix-live-tls - rules: - - host: matrix.live.bstein.dev - http: - paths: - path: /_matrix/client/v3/login pathType: Prefix backend: @@ -66,3 +57,34 @@ spec: name: matrix-authentication-service port: number: 8080 + - path: /_matrix + pathType: Prefix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + - path: /_synapse + pathType: Prefix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 + - path: / + pathType: Prefix + backend: + service: + name: matrix-authentication-service + port: + number: 8080 + - host: live.bstein.dev + http: + paths: + - path: /_matrix + pathType: Prefix + backend: + service: + name: othrys-synapse-matrix-synapse + port: + number: 8008 diff --git a/services/communication/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml similarity index 99% rename from services/communication/pin-othrys-job.yaml rename to services/comms/pin-othrys-job.yaml index b0a4c4d..c42c815 100644 --- a/services/communication/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -1,4 +1,4 @@ -# services/communication/pin-othrys-job.yaml +# services/comms/pin-othrys-job.yaml apiVersion: batch/v1 kind: CronJob metadata: diff --git a/services/communication/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml similarity index 99% rename from services/communication/reset-othrys-room-job.yaml rename to services/comms/reset-othrys-room-job.yaml index e282b44..1ae22ca 100644 --- a/services/communication/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -1,4 +1,4 @@ -# services/communication/reset-othrys-room-job.yaml +# services/comms/reset-othrys-room-job.yaml apiVersion: batch/v1 kind: Job metadata: diff --git a/services/communication/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml similarity index 99% rename from services/communication/seed-othrys-room.yaml rename to services/comms/seed-othrys-room.yaml index a80b388..5085aa3 100644 --- a/services/communication/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -1,4 +1,4 @@ -# services/communication/seed-othrys-room.yaml +# services/comms/seed-othrys-room.yaml apiVersion: batch/v1 kind: CronJob metadata: diff --git a/services/communication/synapse-deployment-strategy-patch.yaml b/services/comms/synapse-deployment-strategy-patch.yaml similarity index 74% rename from services/communication/synapse-deployment-strategy-patch.yaml rename to services/comms/synapse-deployment-strategy-patch.yaml index 0a795c6..59b8e32 100644 --- a/services/communication/synapse-deployment-strategy-patch.yaml +++ b/services/comms/synapse-deployment-strategy-patch.yaml @@ -1,4 +1,4 @@ -# services/communication/synapse-deployment-strategy-patch.yaml +# services/comms/synapse-deployment-strategy-patch.yaml apiVersion: apps/v1 kind: Deployment metadata: diff --git a/services/communication/synapse-rendered.yaml b/services/comms/synapse-rendered.yaml similarity index 91% rename from services/communication/synapse-rendered.yaml rename to services/comms/synapse-rendered.yaml index 9155044..aa6c9d8 100644 --- a/services/communication/synapse-rendered.yaml +++ b/services/comms/synapse-rendered.yaml @@ -5,7 +5,6 @@ kind: ServiceAccount automountServiceAccountToken: true metadata: name: othrys-synapse-redis - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -57,7 +56,6 @@ apiVersion: v1 kind: ConfigMap metadata: name: othrys-synapse-redis-configuration - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -89,7 +87,6 @@ apiVersion: v1 kind: ConfigMap metadata: name: othrys-synapse-redis-health - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -196,7 +193,6 @@ apiVersion: v1 kind: ConfigMap metadata: name: othrys-synapse-redis-scripts - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -313,12 +309,6 @@ data: ## Registration ## enable_registration: false - modules: - - module: guest_register.GuestRegisterModule - config: - shared_secret: "@@GUEST_REGISTER_SECRET@@" - header_name: x-guest-register-secret - path: /_matrix/_guest_register ## Metrics ### @@ -415,7 +405,6 @@ apiVersion: v1 kind: Service metadata: name: othrys-synapse-redis-headless - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -439,7 +428,6 @@ apiVersion: v1 kind: Service metadata: name: othrys-synapse-redis-master - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -511,7 +499,6 @@ apiVersion: apps/v1 kind: Deployment metadata: name: othrys-synapse-redis-master - namespace: "communication" labels: app.kubernetes.io/instance: othrys-synapse app.kubernetes.io/managed-by: Helm @@ -708,7 +695,6 @@ spec: export OIDC_CLIENT_SECRET_ESCAPED=$(echo "${OIDC_CLIENT_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export TURN_SECRET_ESCAPED=$(echo "${TURN_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export MAS_SHARED_SECRET_ESCAPED=$(echo "${MAS_SHARED_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ - export GUEST_REGISTER_SECRET_ESCAPED=$(echo "${GUEST_REGISTER_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ export MACAROON_SECRET_KEY_ESCAPED=$(echo "${MACAROON_SECRET_KEY:-}" | sed 's/[\\/&]/\\&/g') && \ cat /synapse/secrets/*.yaml | \ sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ @@ -725,9 +711,6 @@ spec: if [ -n "${MAS_SHARED_SECRET_ESCAPED}" ]; then \ sed -i "s/@@MAS_SHARED_SECRET@@/${MAS_SHARED_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ fi; \ - if [ -n "${GUEST_REGISTER_SECRET_ESCAPED}" ]; then \ - sed -i "s/@@GUEST_REGISTER_SECRET@@/${GUEST_REGISTER_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ - fi; \ if [ -n "${MACAROON_SECRET_KEY_ESCAPED}" ]; then \ sed -i "s/@@MACAROON_SECRET_KEY@@/${MACAROON_SECRET_KEY_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ fi @@ -760,18 +743,11 @@ spec: secretKeyRef: name: mas-secrets-runtime key: matrix_shared_secret - - name: GUEST_REGISTER_SECRET - valueFrom: - secretKeyRef: - name: guest-register-shared-secret-runtime - key: secret - name: MACAROON_SECRET_KEY valueFrom: secretKeyRef: name: synapse-macaroon key: macaroon_secret_key - - name: PYTHONPATH - value: /synapse/modules image: "ghcr.io/element-hq/synapse:v1.144.0" imagePullPolicy: IfNotPresent securityContext: @@ -808,9 +784,6 @@ spec: mountPath: /synapse/config/conf.d - name: secrets mountPath: /synapse/secrets - - name: modules - mountPath: /synapse/modules - readOnly: true - name: signingkey mountPath: /synapse/keys - name: media @@ -831,12 +804,6 @@ spec: - name: secrets secret: secretName: othrys-synapse-matrix-synapse - - name: modules - configMap: - name: synapse-guest-register-module - items: - - key: guest_register.py - path: guest_register.py - name: signingkey secret: secretName: "othrys-synapse-signingkey" @@ -866,73 +833,6 @@ spec: - rpi4 weight: 50 --- -# Source: matrix-synapse/templates/ingress.yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: othrys-synapse-matrix-synapse - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - annotations: - cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.entrypoints: websecure -spec: - ingressClassName: traefik - tls: - - hosts: - - "matrix.live.bstein.dev" - - "live.bstein.dev" - secretName: matrix-live-tls - rules: - - host: "live.bstein.dev" - http: - paths: - - path: /_matrix - backend: - service: - name: othrys-synapse-matrix-synapse - port: - number: 8008 - pathType: Prefix - - path: /.well-known/matrix - backend: - service: - name: othrys-synapse-matrix-synapse - port: - number: 8008 - pathType: Prefix - - host: "matrix.live.bstein.dev" - http: - paths: - - path: /_matrix - backend: - service: - name: othrys-synapse-matrix-synapse - port: - number: 8008 - pathType: Prefix - - path: /_synapse - backend: - service: - name: othrys-synapse-matrix-synapse - port: - number: 8008 - pathType: Prefix - - host: "bstein.dev" - http: - paths: - - path: /.well-known/matrix - backend: - service: - name: othrys-synapse-matrix-synapse - port: - number: 8008 - pathType: Prefix ---- # Source: matrix-synapse/templates/signing-key-job.yaml apiVersion: v1 kind: ServiceAccount diff --git a/services/communication/synapse-seeder-admin-ensure-job.yaml b/services/comms/synapse-seeder-admin-ensure-job.yaml similarity index 93% rename from services/communication/synapse-seeder-admin-ensure-job.yaml rename to services/comms/synapse-seeder-admin-ensure-job.yaml index b21f573..0885722 100644 --- a/services/communication/synapse-seeder-admin-ensure-job.yaml +++ b/services/comms/synapse-seeder-admin-ensure-job.yaml @@ -1,4 +1,4 @@ -# services/communication/synapse-seeder-admin-ensure-job.yaml +# services/comms/synapse-seeder-admin-ensure-job.yaml apiVersion: batch/v1 kind: Job metadata: diff --git a/services/communication/synapse-signingkey-ensure-job.yaml b/services/comms/synapse-signingkey-ensure-job.yaml similarity index 95% rename from services/communication/synapse-signingkey-ensure-job.yaml rename to services/comms/synapse-signingkey-ensure-job.yaml index 06e8fa8..a76948d 100644 --- a/services/communication/synapse-signingkey-ensure-job.yaml +++ b/services/comms/synapse-signingkey-ensure-job.yaml @@ -1,4 +1,4 @@ -# services/communication/synapse-signingkey-ensure-job.yaml +# services/comms/synapse-signingkey-ensure-job.yaml apiVersion: batch/v1 kind: Job metadata: diff --git a/services/communication/values-element.yaml b/services/comms/values-element.yaml similarity index 96% rename from services/communication/values-element.yaml rename to services/comms/values-element.yaml index 9ab91de..b8c7d87 100644 --- a/services/communication/values-element.yaml +++ b/services/comms/values-element.yaml @@ -1,4 +1,4 @@ -# services/communication/values-element.yaml +# services/comms/values-element.yaml replicaCount: 1 defaultServer: diff --git a/services/communication/values-synapse.yaml b/services/comms/values-synapse.yaml similarity index 98% rename from services/communication/values-synapse.yaml rename to services/comms/values-synapse.yaml index 7df16b6..650d0e8 100644 --- a/services/communication/values-synapse.yaml +++ b/services/comms/values-synapse.yaml @@ -1,4 +1,4 @@ -# services/communication/values-synapse.yaml +# services/comms/values-synapse.yaml serverName: live.bstein.dev publicServerName: matrix.live.bstein.dev diff --git a/services/communication/wellknown.yaml b/services/comms/wellknown.yaml similarity index 82% rename from services/communication/wellknown.yaml rename to services/comms/wellknown.yaml index d09ce27..601bafa 100644 --- a/services/communication/wellknown.yaml +++ b/services/comms/wellknown.yaml @@ -1,9 +1,8 @@ -# services/communication/wellknown.yaml +# services/comms/wellknown.yaml apiVersion: v1 kind: ConfigMap metadata: name: matrix-wellknown - namespace: communication data: client.json: | { @@ -30,7 +29,6 @@ apiVersion: v1 kind: ConfigMap metadata: name: matrix-wellknown-nginx - namespace: communication data: default.conf: | server { @@ -57,7 +55,6 @@ apiVersion: apps/v1 kind: Deployment metadata: name: matrix-wellknown - namespace: communication labels: app: matrix-wellknown spec: @@ -102,7 +99,6 @@ apiVersion: v1 kind: Service metadata: name: matrix-wellknown - namespace: communication spec: selector: app: matrix-wellknown @@ -115,7 +111,6 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: matrix-wellknown - namespace: communication annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure @@ -149,7 +144,6 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: matrix-wellknown-matrix-live - namespace: communication annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure @@ -177,3 +171,36 @@ spec: name: matrix-wellknown port: number: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: matrix-wellknown-bstein-dev + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: + - bstein.dev + secretName: bstein-dev-home-tls + rules: + - host: bstein.dev + http: + paths: + - path: /.well-known/matrix/client + pathType: Prefix + backend: + service: + name: matrix-wellknown + port: + number: 80 + - path: /.well-known/matrix/server + pathType: Prefix + backend: + service: + name: matrix-wellknown + port: + number: 80 diff --git a/services/communication/guest-register-ingress.yaml b/services/communication/guest-register-ingress.yaml deleted file mode 100644 index c3f38c1..0000000 --- a/services/communication/guest-register-ingress.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# services/communication/guest-register-ingress.yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: matrix-guest-register - annotations: - kubernetes.io/ingress.class: traefik - traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.tls: "true" - cert-manager.io/cluster-issuer: letsencrypt -spec: - tls: - - hosts: - - matrix.live.bstein.dev - secretName: matrix-live-tls - rules: - - host: matrix.live.bstein.dev - http: - paths: - - path: /_matrix/client/v3/register - pathType: Prefix - backend: - service: - name: matrix-guest-register - port: - number: 8080 - - path: /_matrix/client/r0/register - pathType: Prefix - backend: - service: - name: matrix-guest-register - port: - number: 8080 - diff --git a/services/communication/guest-register-shared-secret-ensure-job.yaml b/services/communication/guest-register-shared-secret-ensure-job.yaml deleted file mode 100644 index 06f2440..0000000 --- a/services/communication/guest-register-shared-secret-ensure-job.yaml +++ /dev/null @@ -1,86 +0,0 @@ -# services/communication/guest-register-shared-secret-ensure-job.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: guest-register-secret-writer - namespace: comms ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: guest-register-secret-writer - namespace: comms -rules: - - apiGroups: [""] - resources: ["secrets"] - resourceNames: ["guest-register-shared-secret-runtime"] - verbs: ["get", "patch", "update"] - - apiGroups: [""] - resources: ["secrets"] - verbs: ["create"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: guest-register-secret-writer - namespace: comms -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: guest-register-secret-writer -subjects: - - kind: ServiceAccount - name: guest-register-secret-writer - namespace: comms ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: guest-register-shared-secret-ensure-1 - namespace: comms -spec: - backoffLimit: 2 - template: - spec: - serviceAccountName: guest-register-secret-writer - restartPolicy: OnFailure - volumes: - - name: work - emptyDir: {} - initContainers: - - name: generate - image: alpine:3.20 - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - umask 077 - dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n' > /work/secret - chmod 0644 /work/secret - volumeMounts: - - name: work - mountPath: /work - containers: - - name: write - image: bitnami/kubectl:latest - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - if kubectl -n comms get secret guest-register-shared-secret-runtime >/dev/null 2>&1; then - if kubectl -n comms get secret guest-register-shared-secret-runtime -o jsonpath='{.data.secret}' 2>/dev/null | grep -q .; then - exit 0 - fi - else - kubectl -n comms create secret generic guest-register-shared-secret-runtime \ - --from-file=secret=/work/secret >/dev/null - exit 0 - fi - - secret_b64="$(base64 /work/secret | tr -d '\n')" - payload="$(printf '{\"data\":{\"secret\":\"%s\"}}' \"${secret_b64}\")" - kubectl -n comms patch secret guest-register-shared-secret-runtime --type=merge -p \"${payload}\" >/dev/null - volumeMounts: - - name: work - mountPath: /work - diff --git a/services/communication/kustomization.yaml b/services/communication/kustomization.yaml deleted file mode 100644 index d2352b8..0000000 --- a/services/communication/kustomization.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# services/communication/kustomization.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: comms -resources: - - atlasbot-rbac.yaml - - synapse-rendered.yaml - - synapse-signingkey-ensure-job.yaml - - synapse-seeder-admin-ensure-job.yaml - - synapse-guest-appservice-secret-ensure-job.yaml - - guest-register-shared-secret-ensure-job.yaml - - synapse-guest-register-module-configmap.yaml - - mas-configmap.yaml - - mas-admin-client-secret-ensure-job.yaml - - mas-deployment.yaml - - mas-ingress.yaml - - element-rendered.yaml - - livekit-config.yaml - - livekit.yaml - - coturn.yaml - - livekit-token-deployment.yaml - - livekit-ingress.yaml - - livekit-middlewares.yaml - - element-call-config.yaml - - element-call-deployment.yaml - - reset-othrys-room-job.yaml - - bstein-force-leave-job.yaml - - pin-othrys-job.yaml - - guest-name-job.yaml - - guest-register-configmap.yaml - - guest-register-deployment.yaml - - guest-register-service.yaml - - guest-register-ingress.yaml - - atlasbot-configmap.yaml - - atlasbot-deployment.yaml - - seed-othrys-room.yaml - - wellknown.yaml - -patchesStrategicMerge: - - synapse-deployment-strategy-patch.yaml - -configMapGenerator: - - name: atlas-kb - files: - - INDEX.md=../../knowledge/INDEX.md - - atlas.json=../../knowledge/catalog/atlas.json - - atlas-summary.json=../../knowledge/catalog/atlas-summary.json - - runbooks.json=../../knowledge/catalog/runbooks.json - - atlas-http.mmd=../../knowledge/diagrams/atlas-http.mmd diff --git a/services/communication/synapse-guest-appservice-secret-ensure-job.yaml b/services/communication/synapse-guest-appservice-secret-ensure-job.yaml deleted file mode 100644 index 6dd8564..0000000 --- a/services/communication/synapse-guest-appservice-secret-ensure-job.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# services/communication/synapse-guest-appservice-secret-ensure-job.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: synapse-guest-appservice-secret-writer - namespace: comms ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: synapse-guest-appservice-secret-writer - namespace: comms -rules: - - apiGroups: [""] - resources: ["secrets"] - resourceNames: ["synapse-guest-appservice-runtime"] - verbs: ["get", "patch", "update"] - - apiGroups: [""] - resources: ["secrets"] - verbs: ["create"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: synapse-guest-appservice-secret-writer - namespace: comms -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: synapse-guest-appservice-secret-writer -subjects: - - kind: ServiceAccount - name: synapse-guest-appservice-secret-writer - namespace: comms ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: synapse-guest-appservice-secret-ensure-1 - namespace: comms -spec: - backoffLimit: 2 - template: - spec: - serviceAccountName: synapse-guest-appservice-secret-writer - restartPolicy: OnFailure - volumes: - - name: work - emptyDir: {} - initContainers: - - name: generate - image: alpine:3.20 - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - umask 077 - AS_TOKEN="$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n')" - HS_TOKEN="$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n')" - - printf '%s' "${AS_TOKEN}" > /work/as_token - printf '%s' "${HS_TOKEN}" > /work/hs_token - - cat > /work/registration.yaml </dev/null 2>&1; then - if kubectl -n comms get secret synapse-guest-appservice-runtime -o jsonpath='{.data.registration\.yaml}' 2>/dev/null | grep -q .; then - exit 0 - fi - else - kubectl -n comms create secret generic synapse-guest-appservice-runtime \ - --from-file=registration.yaml=/work/registration.yaml \ - --from-file=as_token=/work/as_token \ - --from-file=hs_token=/work/hs_token >/dev/null - exit 0 - fi - - reg_b64="$(base64 /work/registration.yaml | tr -d '\n')" - as_b64="$(base64 /work/as_token | tr -d '\n')" - hs_b64="$(base64 /work/hs_token | tr -d '\n')" - - payload="$(printf '{\"data\":{\"registration.yaml\":\"%s\",\"as_token\":\"%s\",\"hs_token\":\"%s\"}}' \"${reg_b64}\" \"${as_b64}\" \"${hs_b64}\")" - kubectl -n comms patch secret synapse-guest-appservice-runtime --type=merge -p \"${payload}\" >/dev/null - volumeMounts: - - name: work - mountPath: /work - diff --git a/services/communication/synapse-guest-register-module-configmap.yaml b/services/communication/synapse-guest-register-module-configmap.yaml deleted file mode 100644 index 3afb3d9..0000000 --- a/services/communication/synapse-guest-register-module-configmap.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# services/communication/synapse-guest-register-module-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: synapse-guest-register-module -data: - guest_register.py: | - import secrets - import random - - import synapse.api.auth - from synapse.api.errors import Codes, SynapseError - from synapse.http.server import DirectServeJsonResource - from synapse.http.servlet import parse_json_object_from_request - from synapse.types import UserID, create_requester - - - class GuestRegisterResource(DirectServeJsonResource): - def __init__(self, hs, shared_secret: str, header_name: str): - super().__init__(clock=hs.get_clock()) - self._hs = hs - self._shared_secret = shared_secret - self._header_name = header_name - - self._adj = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] - self._noun = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] - - async def _async_render_POST(self, request): # noqa: N802 - provided = request.requestHeaders.getRawHeaders(self._header_name) - if not provided or not secrets.compare_digest(provided[0], self._shared_secret): - raise SynapseError(403, "Forbidden", errcode=Codes.FORBIDDEN) - - body = parse_json_object_from_request(request) - initial_device_display_name = body.get("initial_device_display_name") - if not isinstance(initial_device_display_name, str): - initial_device_display_name = None - - reg = self._hs.get_registration_handler() - address = request.getClientAddress().host - - user_id = await reg.register_user(make_guest=True, address=address) - - device_id = synapse.api.auth.GUEST_DEVICE_ID - device_id, access_token, valid_until_ms, refresh_token = await reg.register_device( - user_id, - device_id, - initial_device_display_name, - is_guest=True, - ) - - displayname = body.get("displayname") - if not isinstance(displayname, str) or not displayname.strip(): - displayname = f"{random.choice(self._adj)}-{random.choice(self._noun)}" - - try: - requester = create_requester(user_id, is_guest=True, device_id=device_id) - await self._hs.get_profile_handler().set_displayname( - UserID.from_string(user_id), - requester, - displayname, - propagate=False, - ) - except Exception: - pass - - result = { - "user_id": user_id, - "device_id": device_id, - "access_token": access_token, - "home_server": self._hs.hostname, - } - - if valid_until_ms is not None: - result["expires_in_ms"] = valid_until_ms - self._hs.get_clock().time_msec() - - if refresh_token is not None: - result["refresh_token"] = refresh_token - - return 200, result - - - class GuestRegisterModule: - def __init__(self, config, api): - shared_secret = config["shared_secret"] - header_name = config.get("header_name", "x-guest-register-secret") - path = config.get("path", "/_matrix/_guest_register") - - hs = api._hs # noqa: SLF001 - api.register_web_resource(path, GuestRegisterResource(hs, shared_secret, header_name)) -- 2.47.2 From 9a76680cc46bc9dc899d5f10ce955cc0c5e9c1a9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 01:58:17 -0300 Subject: [PATCH 516/684] comms: track local knowledge markdown --- .gitignore | 1 + services/comms/knowledge/INDEX.md | 22 ++++++++++++ .../knowledge/runbooks/ci-gitea-jenkins.md | 27 +++++++++++++++ .../comms/knowledge/runbooks/kb-authoring.md | 34 +++++++++++++++++++ .../comms/knowledge/runbooks/observability.md | 26 ++++++++++++++ services/comms/knowledge/runbooks/template.md | 18 ++++++++++ 6 files changed, 128 insertions(+) create mode 100644 services/comms/knowledge/INDEX.md create mode 100644 services/comms/knowledge/runbooks/ci-gitea-jenkins.md create mode 100644 services/comms/knowledge/runbooks/kb-authoring.md create mode 100644 services/comms/knowledge/runbooks/observability.md create mode 100644 services/comms/knowledge/runbooks/template.md diff --git a/.gitignore b/.gitignore index 1d2e516..2fcd3f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.md !README.md !knowledge/**/*.md +!services/comms/knowledge/**/*.md __pycache__/ *.py[cod] diff --git a/services/comms/knowledge/INDEX.md b/services/comms/knowledge/INDEX.md new file mode 100644 index 0000000..fac9153 --- /dev/null +++ b/services/comms/knowledge/INDEX.md @@ -0,0 +1,22 @@ +Atlas Knowledge Base (KB) + +This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be: +- Accurate (grounded in GitOps + read-only cluster tools) +- Maintainable (small docs + deterministic generators) +- Safe (no secrets; refer to Secret/Vault paths by name only) + +Layout +- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown). +- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON). +- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog. + +Regeneration +- Update manifests/docs, then regenerate generated artifacts: + - `python scripts/knowledge_render_atlas.py --write` + +Authoring rules +- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`. +- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths. +- Keep each runbook small; one topic per file; use headings. +- When in doubt, link to the exact file path in this repo that configures the behavior. + diff --git a/services/comms/knowledge/runbooks/ci-gitea-jenkins.md b/services/comms/knowledge/runbooks/ci-gitea-jenkins.md new file mode 100644 index 0000000..48dc91f --- /dev/null +++ b/services/comms/knowledge/runbooks/ci-gitea-jenkins.md @@ -0,0 +1,27 @@ +--- +title: "CI: Gitea → Jenkins pipeline" +tags: ["atlas", "ci", "gitea", "jenkins"] +owners: ["brad"] +entrypoints: ["scm.bstein.dev", "ci.bstein.dev"] +source_paths: ["services/gitea", "services/jenkins", "scripts/jenkins_cred_sync.sh", "scripts/gitea_cred_sync.sh"] +--- + +# CI: Gitea → Jenkins pipeline + +## What this is +Atlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO). + +## Where it is configured +- Gitea manifests: `services/gitea/` +- Jenkins manifests: `services/jenkins/` +- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh` + +## What users do (typical flow) +- Create a repo in Gitea. +- Create/update a Jenkins job/pipeline that can fetch the repo. +- Configure a webhook (or SCM polling) so pushes trigger builds. + +## Troubleshooting (common) +- “Webhook not firing”: confirm ingress host, webhook URL, and Jenkins job is reachable. +- “Auth denied cloning”: confirm Keycloak group membership and that Jenkins has a valid token/credential configured. + diff --git a/services/comms/knowledge/runbooks/kb-authoring.md b/services/comms/knowledge/runbooks/kb-authoring.md new file mode 100644 index 0000000..9378d1d --- /dev/null +++ b/services/comms/knowledge/runbooks/kb-authoring.md @@ -0,0 +1,34 @@ +--- +title: "KB authoring: what to write (and what not to)" +tags: ["atlas", "kb", "runbooks"] +owners: ["brad"] +entrypoints: [] +source_paths: ["knowledge/runbooks", "scripts/knowledge_render_atlas.py"] +--- + +# KB authoring: what to write (and what not to) + +## The goal +Give Atlas assistants enough grounded, Atlas-specific context to answer “how do I…?” questions without guessing. + +## What to capture (high value) +- User workflows: “click here, set X, expected result” +- Operator workflows: “edit these files, reconcile this kustomization, verify with these commands” +- Wiring: “this host routes to this service; this service depends on Postgres/Vault/etc” +- Failure modes: exact error messages + the 2–5 checks that usually resolve them +- Permissions: Keycloak groups/roles and what they unlock + +## What to avoid (low value / fluff) +- Generic Kubernetes explanations (link to upstream docs instead) +- Copy-pasting large manifests (prefer file paths + small snippets) +- Anything that will drift quickly (render it from GitOps instead) +- Any secret values (reference Secret/Vault locations by name only) + +## Document pattern (recommended) +Each runbook should answer: +- “What is this?” +- “What do users do?” +- “What do operators change (where in Git)?” +- “How do we verify it works?” +- “What breaks and how to debug it?” + diff --git a/services/comms/knowledge/runbooks/observability.md b/services/comms/knowledge/runbooks/observability.md new file mode 100644 index 0000000..4c5be6e --- /dev/null +++ b/services/comms/knowledge/runbooks/observability.md @@ -0,0 +1,26 @@ +--- +title: "Observability: Grafana + VictoriaMetrics (how to query safely)" +tags: ["atlas", "monitoring", "grafana", "victoriametrics"] +owners: ["brad"] +entrypoints: ["metrics.bstein.dev", "alerts.bstein.dev"] +source_paths: ["services/monitoring"] +--- + +# Observability: Grafana + VictoriaMetrics (how to query safely) + +## Where it is configured +- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values) +- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL) + +## Using metrics as a “tool” for Atlas assistants +The safest pattern is: map a small set of intents → fixed PromQL queries, then summarize results. + +Examples (intents) +- “Is the cluster healthy?” → node readiness + pod restart rate +- “Why is Element Call failing?” → LiveKit/coturn pod restarts + synapse errors + ingress 5xx +- “Is Jenkins slow?” → pod CPU/memory + HTTP latency metrics (if exported) + +## Why dashboards are not the KB +Dashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the +KB focused on wiring, runbooks, and stable conventions. + diff --git a/services/comms/knowledge/runbooks/template.md b/services/comms/knowledge/runbooks/template.md new file mode 100644 index 0000000..086c65f --- /dev/null +++ b/services/comms/knowledge/runbooks/template.md @@ -0,0 +1,18 @@ +--- +title: "" +tags: ["atlas", "", ""] +owners: ["brad"] +entrypoints: [""] +source_paths: ["services/", "clusters/atlas/<...>"] +--- + +# + +## What this is + +## For users (how to) + +## For operators (where configured) + +## Troubleshooting (symptoms → checks) + -- 2.47.2 From 3aa36e87b830d3417436f30e54a7172d0572b57f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:00:52 -0300 Subject: [PATCH 517/684] comms: retry guest rename when MAS restarts --- services/comms/guest-name-job.yaml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index 5e5a2e9..10dde37 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -51,6 +51,7 @@ spec: import os import random import requests + import time import urllib.parse ADJ = [ @@ -76,14 +77,21 @@ spec: with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: secret = f.read().strip() basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode() - r = requests.post( - MAS_TOKEN_URL, - headers={"Authorization": f"Basic {basic}"}, - data={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, - timeout=30, - ) - r.raise_for_status() - return r.json()["access_token"] + last_err = None + for attempt in range(5): + try: + r = requests.post( + MAS_TOKEN_URL, + headers={"Authorization": f"Basic {basic}"}, + data={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, + timeout=30, + ) + r.raise_for_status() + return r.json()["access_token"] + except Exception as exc: # noqa: BLE001 + last_err = exc + time.sleep(2 ** attempt) + raise last_err def mas_user_id(token, username): r = requests.get( -- 2.47.2 From 05848223eba98e4d9c046ae02eaa3fa6632c54c0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:09:23 -0300 Subject: [PATCH 518/684] comms: ensure MAS secrets via keycloak admin job --- services/comms/kustomization.yaml | 1 + services/comms/mas-secrets-ensure-rbac.yaml | 25 +++++ services/keycloak/kustomization.yaml | 1 + services/keycloak/mas-secrets-ensure-job.yaml | 95 +++++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100644 services/comms/mas-secrets-ensure-rbac.yaml create mode 100644 services/keycloak/mas-secrets-ensure-job.yaml diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 99b0b4a..e3e182f 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -10,6 +10,7 @@ resources: - synapse-seeder-admin-ensure-job.yaml - mas-configmap.yaml - mas-admin-client-secret-ensure-job.yaml + - mas-secrets-ensure-rbac.yaml - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml diff --git a/services/comms/mas-secrets-ensure-rbac.yaml b/services/comms/mas-secrets-ensure-rbac.yaml new file mode 100644 index 0000000..9723d03 --- /dev/null +++ b/services/comms/mas-secrets-ensure-rbac.yaml @@ -0,0 +1,25 @@ +# services/comms/mas-secrets-ensure-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: mas-secrets-ensure + namespace: comms +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["mas-secrets-runtime"] + verbs: ["get", "create", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: mas-secrets-ensure + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: mas-secrets-ensure +subjects: + - kind: ServiceAccount + name: mas-secrets-ensure + namespace: sso diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 24490de..d6dd32e 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -16,6 +16,7 @@ resources: - portal-e2e-execute-actions-email-test-job.yaml - ldap-federation-job.yaml - user-overrides-job.yaml + - mas-secrets-ensure-job.yaml - service.yaml - ingress.yaml generatorOptions: diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml new file mode 100644 index 0000000..7a6972a --- /dev/null +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -0,0 +1,95 @@ +# services/keycloak/mas-secrets-ensure-job.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mas-secrets-ensure + namespace: sso +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: mas-secrets-ensure-1 + namespace: sso +spec: + backoffLimit: 2 + template: + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: OnFailure + volumes: + - name: work + emptyDir: {} + initContainers: + - name: generate + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + umask 077 + apk add --no-cache curl openssl jq >/dev/null + + KC_URL="http://keycloak.sso.svc.cluster.local" + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}")" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token')" + if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 + fi + + CLIENT_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=othrys-mas" | jq -r '.[0].id')" + if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client othrys-mas not found" >&2 + exit 1 + fi + + CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value')" + if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 + fi + + printf '%s' "$CLIENT_SECRET" > /work/keycloak_client_secret + openssl rand -base64 32 > /work/encryption + openssl rand -hex 32 > /work/matrix_shared_secret + openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 -out /work/rsa_key >/dev/null 2>&1 + chmod 0600 /work/* + env: + - name: KEYCLOAK_ADMIN + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password + volumeMounts: + - name: work + mountPath: /work + containers: + - name: apply + image: bitnami/kubectl:latest + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + if kubectl -n comms get secret mas-secrets-runtime >/dev/null 2>&1; then + exit 0 + fi + kubectl -n comms create secret generic mas-secrets-runtime \ + --from-file=encryption=/work/encryption \ + --from-file=matrix_shared_secret=/work/matrix_shared_secret \ + --from-file=keycloak_client_secret=/work/keycloak_client_secret \ + --from-file=rsa_key=/work/rsa_key >/dev/null + volumeMounts: + - name: work + mountPath: /work -- 2.47.2 From 2aea7e36018d5e749f21c0adb114c92a93634864 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:12:40 -0300 Subject: [PATCH 519/684] keycloak: retry MAS secret bootstrap --- services/keycloak/mas-secrets-ensure-job.yaml | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 7a6972a..99725c3 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-1 + name: mas-secrets-ensure-2 namespace: sso spec: backoffLimit: 2 @@ -30,27 +30,33 @@ spec: apk add --no-cache curl openssl jq >/dev/null KC_URL="http://keycloak.sso.svc.cluster.local" - TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ - -H 'Content-Type: application/x-www-form-urlencoded' \ - -d "grant_type=password" \ - -d "client_id=admin-cli" \ - -d "username=${KEYCLOAK_ADMIN}" \ - -d "password=${KEYCLOAK_ADMIN_PASSWORD}")" - ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token')" + ACCESS_TOKEN="" + for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then echo "Failed to fetch Keycloak admin token" >&2 exit 1 fi - CLIENT_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ - "$KC_URL/admin/realms/atlas/clients?clientId=othrys-mas" | jq -r '.[0].id')" + "$KC_URL/admin/realms/atlas/clients?clientId=othrys-mas" | jq -r '.[0].id' 2>/dev/null || true)" if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then echo "Keycloak client othrys-mas not found" >&2 exit 1 fi - CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ - "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value')" + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then echo "Keycloak client secret not found" >&2 exit 1 -- 2.47.2 From afce04b9b271d45b0eefdf21f8605ce88d841c98 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:17:04 -0300 Subject: [PATCH 520/684] keycloak: rerun MAS secrets bootstrap --- services/keycloak/mas-secrets-ensure-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 99725c3..8e43cbf 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-2 + name: mas-secrets-ensure-3 namespace: sso spec: backoffLimit: 2 -- 2.47.2 From e9fb11af40bbd6fb30f909be3493c1ecb2ffe333 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:19:21 -0300 Subject: [PATCH 521/684] keycloak: allow MAS secret apply read access --- services/keycloak/mas-secrets-ensure-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 8e43cbf..8098062 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-3 + name: mas-secrets-ensure-4 namespace: sso spec: backoffLimit: 2 @@ -66,7 +66,7 @@ spec: openssl rand -base64 32 > /work/encryption openssl rand -hex 32 > /work/matrix_shared_secret openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 -out /work/rsa_key >/dev/null 2>&1 - chmod 0600 /work/* + chmod 0644 /work/* env: - name: KEYCLOAK_ADMIN valueFrom: -- 2.47.2 From 6c99eb452e033f7972bab58dd0f719f043f6d282 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:21:37 -0300 Subject: [PATCH 522/684] keycloak: make MAS secret job idempotent --- services/keycloak/mas-secrets-ensure-job.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 8098062..dc5f92f 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,14 +8,15 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-4 + name: mas-secrets-ensure-5 namespace: sso spec: - backoffLimit: 2 + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 template: spec: serviceAccountName: mas-secrets-ensure - restartPolicy: OnFailure + restartPolicy: Never volumes: - name: work emptyDir: {} @@ -95,7 +96,8 @@ spec: --from-file=encryption=/work/encryption \ --from-file=matrix_shared_secret=/work/matrix_shared_secret \ --from-file=keycloak_client_secret=/work/keycloak_client_secret \ - --from-file=rsa_key=/work/rsa_key >/dev/null + --from-file=rsa_key=/work/rsa_key \ + --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null volumeMounts: - name: work mountPath: /work -- 2.47.2 From 6e7118c14d7f2553fa53bb78f5ac904dbc0a8d3a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:23:40 -0300 Subject: [PATCH 523/684] keycloak: use create for MAS secrets --- services/keycloak/mas-secrets-ensure-job.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index dc5f92f..d7550c6 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-5 + name: mas-secrets-ensure-6 namespace: sso spec: backoffLimit: 0 @@ -96,8 +96,7 @@ spec: --from-file=encryption=/work/encryption \ --from-file=matrix_shared_secret=/work/matrix_shared_secret \ --from-file=keycloak_client_secret=/work/keycloak_client_secret \ - --from-file=rsa_key=/work/rsa_key \ - --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null + --from-file=rsa_key=/work/rsa_key >/dev/null volumeMounts: - name: work mountPath: /work -- 2.47.2 From 1c9efd680829e1745add8d5e28c9aa768ab927bb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:25:19 -0300 Subject: [PATCH 524/684] comms: allow MAS secrets create --- services/comms/mas-secrets-ensure-rbac.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/comms/mas-secrets-ensure-rbac.yaml b/services/comms/mas-secrets-ensure-rbac.yaml index 9723d03..11f59c3 100644 --- a/services/comms/mas-secrets-ensure-rbac.yaml +++ b/services/comms/mas-secrets-ensure-rbac.yaml @@ -5,10 +5,13 @@ metadata: name: mas-secrets-ensure namespace: comms rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] - apiGroups: [""] resources: ["secrets"] resourceNames: ["mas-secrets-runtime"] - verbs: ["get", "create", "patch", "update"] + verbs: ["get", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding -- 2.47.2 From 9d3b27e5673e860e8d9bdfc6642e2405fa5fc05e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:25:55 -0300 Subject: [PATCH 525/684] keycloak: rerun MAS secrets bootstrap --- services/keycloak/mas-secrets-ensure-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index d7550c6..edfd2c3 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-6 + name: mas-secrets-ensure-7 namespace: sso spec: backoffLimit: 0 -- 2.47.2 From 9a544010fbc66811c8875d45d7d27635a7f024ba Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:31:54 -0300 Subject: [PATCH 526/684] comms: grant MAS secret bootstrap cluster role --- services/comms/mas-secrets-ensure-rbac.yaml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/services/comms/mas-secrets-ensure-rbac.yaml b/services/comms/mas-secrets-ensure-rbac.yaml index 11f59c3..22ff987 100644 --- a/services/comms/mas-secrets-ensure-rbac.yaml +++ b/services/comms/mas-secrets-ensure-rbac.yaml @@ -1,26 +1,20 @@ # services/comms/mas-secrets-ensure-rbac.yaml apiVersion: rbac.authorization.k8s.io/v1 -kind: Role +kind: ClusterRole metadata: name: mas-secrets-ensure - namespace: comms rules: - apiGroups: [""] resources: ["secrets"] - verbs: ["create"] - - apiGroups: [""] - resources: ["secrets"] - resourceNames: ["mas-secrets-runtime"] - verbs: ["get", "patch", "update"] + verbs: ["get", "create", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: mas-secrets-ensure - namespace: comms roleRef: apiGroup: rbac.authorization.k8s.io - kind: Role + kind: ClusterRole name: mas-secrets-ensure subjects: - kind: ServiceAccount -- 2.47.2 From 6e0b3c43bd4e7b67cf493d5832a8e0b45f4b892d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:32:31 -0300 Subject: [PATCH 527/684] keycloak: rerun MAS secrets bootstrap --- services/keycloak/mas-secrets-ensure-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index edfd2c3..60d6d15 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-7 + name: mas-secrets-ensure-8 namespace: sso spec: backoffLimit: 0 -- 2.47.2 From b95683da2aa9f31e4e6d71f44ab7729b895a44a9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:35:09 -0300 Subject: [PATCH 528/684] comms: restart MAS after secret bootstrap --- services/comms/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index ed88328..d46de89 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-1 + checksum/config: v5-adminapi-2 labels: app: matrix-authentication-service spec: -- 2.47.2 From 0e55dbeaa9ec1c8157a37fd31d694b5d3a7f5468 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:45:00 -0300 Subject: [PATCH 529/684] comms: ensure mas db secret --- services/comms/kustomization.yaml | 2 + services/comms/mas-db-ensure-job.yaml | 72 ++++++++++++++++++++++++++ services/comms/mas-db-ensure-rbac.yaml | 56 ++++++++++++++++++++ 3 files changed, 130 insertions(+) create mode 100644 services/comms/mas-db-ensure-job.yaml create mode 100644 services/comms/mas-db-ensure-rbac.yaml diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index e3e182f..b08f6db 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -11,6 +11,8 @@ resources: - mas-configmap.yaml - mas-admin-client-secret-ensure-job.yaml - mas-secrets-ensure-rbac.yaml + - mas-db-ensure-rbac.yaml + - mas-db-ensure-job.yaml - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml new file mode 100644 index 0000000..f4b4653 --- /dev/null +++ b/services/comms/mas-db-ensure-job.yaml @@ -0,0 +1,72 @@ +# services/comms/mas-db-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: mas-db-ensure-2 + namespace: comms +spec: + backoffLimit: 2 + template: + spec: + serviceAccountName: mas-db-ensure + restartPolicy: OnFailure + volumes: + - name: work + emptyDir: {} + initContainers: + - name: prepare + image: bitnami/kubectl:latest + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + umask 077 + kubectl -n postgres get secret postgres-auth -o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d > /work/postgres_password + if kubectl -n comms get secret mas-db >/dev/null 2>&1; then + kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d > /work/mas_password + else + head -c 32 /dev/urandom | base64 | tr -d '\n' > /work/mas_password + kubectl -n comms create secret generic mas-db --from-file=password=/work/mas_password >/dev/null + fi + volumeMounts: + - name: work + mountPath: /work + containers: + - name: ensure + image: postgres:15 + env: + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: postgres + - name: PGUSER + value: postgres + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + export PGPASSWORD="$(cat /work/postgres_password)" + MAS_PASS="$(cat /work/mas_password)" + psql -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" <<'SQL' + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'mas') THEN + EXECUTE format('CREATE ROLE mas LOGIN PASSWORD %L', :mas_pass); + ELSE + EXECUTE format('ALTER ROLE mas WITH PASSWORD %L', :mas_pass); + END IF; + END + $$; + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_database WHERE datname = 'mas') THEN + CREATE DATABASE mas OWNER mas; + END IF; + END + $$; + SQL + volumeMounts: + - name: work + mountPath: /work diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml new file mode 100644 index 0000000..fe075d6 --- /dev/null +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -0,0 +1,56 @@ +# services/comms/mas-db-ensure-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mas-db-ensure + namespace: comms +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: mas-db-ensure-postgres + namespace: postgres +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["postgres-auth"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: mas-db-ensure-postgres + namespace: postgres +subjects: + - kind: ServiceAccount + name: mas-db-ensure + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: mas-db-ensure-postgres +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: mas-db-ensure-comms + namespace: comms +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["mas-db"] + verbs: ["get", "create", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: mas-db-ensure-comms + namespace: comms +subjects: + - kind: ServiceAccount + name: mas-db-ensure + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: mas-db-ensure-comms -- 2.47.2 From c8fc1dd10a3fb0760b3c1e8ac867f6c14aad3f5e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:47:47 -0300 Subject: [PATCH 530/684] comms: fix mas db ensure rbac --- services/comms/mas-db-ensure-job.yaml | 2 +- services/comms/mas-db-ensure-rbac.yaml | 45 ++++++-------------------- 2 files changed, 10 insertions(+), 37 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index f4b4653..9ac32c2 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-2 + name: mas-db-ensure-3 namespace: comms spec: backoffLimit: 2 diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index fe075d6..bbf5224 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -6,51 +6,24 @@ metadata: namespace: comms --- apiVersion: rbac.authorization.k8s.io/v1 -kind: Role +kind: ClusterRole metadata: - name: mas-db-ensure-postgres - namespace: postgres + name: mas-db-ensure rules: - apiGroups: [""] resources: ["secrets"] - resourceNames: ["postgres-auth"] - verbs: ["get"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: mas-db-ensure-postgres - namespace: postgres -subjects: - - kind: ServiceAccount - name: mas-db-ensure - namespace: comms -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: mas-db-ensure-postgres ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: mas-db-ensure-comms - namespace: comms -rules: - - apiGroups: [""] - resources: ["secrets"] - resourceNames: ["mas-db"] + resourceNames: ["postgres-auth", "mas-db"] verbs: ["get", "create", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: - name: mas-db-ensure-comms - namespace: comms + name: mas-db-ensure +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: mas-db-ensure subjects: - kind: ServiceAccount name: mas-db-ensure namespace: comms -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: mas-db-ensure-comms -- 2.47.2 From ef064ed2bb2ac4ea62c947e4906b5dc70843ac2f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 02:53:53 -0300 Subject: [PATCH 531/684] comms: bootstrap mas db secret --- services/comms/mas-db-ensure-job.yaml | 14 ++++++++++++-- services/comms/mas-db-ensure-rbac.yaml | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 9ac32c2..71a9dad 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-3 + name: mas-db-ensure-4 namespace: comms spec: backoffLimit: 2 @@ -21,7 +21,17 @@ spec: - | set -euo pipefail umask 077 - kubectl -n postgres get secret postgres-auth -o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d > /work/postgres_password + if kubectl -n postgres get secret postgres-auth >/dev/null 2>&1; then + kubectl -n postgres get secret postgres-auth -o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d > /work/postgres_password + else + POSTGRES_PASS="$(kubectl -n postgres get statefulset postgres -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="POSTGRES_PASSWORD")].value}')" + if [ -z "${POSTGRES_PASS}" ]; then + echo "postgres password not found in statefulset env" >&2 + exit 1 + fi + printf '%s' "${POSTGRES_PASS}" > /work/postgres_password + kubectl -n postgres create secret generic postgres-auth --from-file=POSTGRES_PASSWORD=/work/postgres_password >/dev/null + fi if kubectl -n comms get secret mas-db >/dev/null 2>&1; then kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d > /work/mas_password else diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index bbf5224..547b29b 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -14,6 +14,10 @@ rules: resources: ["secrets"] resourceNames: ["postgres-auth", "mas-db"] verbs: ["get", "create", "patch", "update"] + - apiGroups: ["apps"] + resources: ["statefulsets"] + resourceNames: ["postgres"] + verbs: ["get"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding -- 2.47.2 From 72d4766d68256273180a0796e16d67f8d80d2bf7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:00:19 -0300 Subject: [PATCH 532/684] comms: stabilize mas db job --- services/comms/mas-db-ensure-job.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 71a9dad..9a8cebd 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,10 +2,11 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-4 + name: mas-db-ensure-6 namespace: comms spec: - backoffLimit: 2 + backoffLimit: 0 + ttlSecondsAfterFinished: 600 template: spec: serviceAccountName: mas-db-ensure @@ -19,8 +20,9 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail + set -eu umask 077 + echo "ensuring postgres auth secret" if kubectl -n postgres get secret postgres-auth >/dev/null 2>&1; then kubectl -n postgres get secret postgres-auth -o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d > /work/postgres_password else @@ -32,6 +34,7 @@ spec: printf '%s' "${POSTGRES_PASS}" > /work/postgres_password kubectl -n postgres create secret generic postgres-auth --from-file=POSTGRES_PASSWORD=/work/postgres_password >/dev/null fi + echo "ensuring mas db secret" if kubectl -n comms get secret mas-db >/dev/null 2>&1; then kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d > /work/mas_password else @@ -56,9 +59,10 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail + set -eu export PGPASSWORD="$(cat /work/postgres_password)" MAS_PASS="$(cat /work/mas_password)" + echo "ensuring mas role/database" psql -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" <<'SQL' DO $$ BEGIN -- 2.47.2 From 0250de86368a9bff1267e9badaf0083cbc568373 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:04:33 -0300 Subject: [PATCH 533/684] comms: ensure mas db via postgres exec --- services/comms/mas-db-ensure-job.yaml | 62 ++++++-------------------- services/comms/mas-db-ensure-rbac.yaml | 12 +++-- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 9a8cebd..a56b76f 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-6 + name: mas-db-ensure-7 namespace: comms spec: backoffLimit: 0 @@ -11,59 +11,28 @@ spec: spec: serviceAccountName: mas-db-ensure restartPolicy: OnFailure - volumes: - - name: work - emptyDir: {} - initContainers: - - name: prepare + containers: + - name: ensure image: bitnami/kubectl:latest command: ["/bin/sh", "-c"] args: - | set -eu umask 077 - echo "ensuring postgres auth secret" - if kubectl -n postgres get secret postgres-auth >/dev/null 2>&1; then - kubectl -n postgres get secret postgres-auth -o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d > /work/postgres_password - else - POSTGRES_PASS="$(kubectl -n postgres get statefulset postgres -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="POSTGRES_PASSWORD")].value}')" - if [ -z "${POSTGRES_PASS}" ]; then - echo "postgres password not found in statefulset env" >&2 - exit 1 - fi - printf '%s' "${POSTGRES_PASS}" > /work/postgres_password - kubectl -n postgres create secret generic postgres-auth --from-file=POSTGRES_PASSWORD=/work/postgres_password >/dev/null - fi - echo "ensuring mas db secret" if kubectl -n comms get secret mas-db >/dev/null 2>&1; then - kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d > /work/mas_password + MAS_PASS="$(kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d)" else - head -c 32 /dev/urandom | base64 | tr -d '\n' > /work/mas_password - kubectl -n comms create secret generic mas-db --from-file=password=/work/mas_password >/dev/null + MAS_PASS="$(head -c 32 /dev/urandom | base64 | tr -d '\n')" + kubectl -n comms create secret generic mas-db --from-literal=password="${MAS_PASS}" >/dev/null fi - volumeMounts: - - name: work - mountPath: /work - containers: - - name: ensure - image: postgres:15 - env: - - name: PGHOST - value: postgres-service.postgres.svc.cluster.local - - name: PGPORT - value: "5432" - - name: PGDATABASE - value: postgres - - name: PGUSER - value: postgres - command: ["/bin/sh", "-c"] - args: - - | - set -eu - export PGPASSWORD="$(cat /work/postgres_password)" - MAS_PASS="$(cat /work/mas_password)" - echo "ensuring mas role/database" - psql -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" <<'SQL' + + POD_NAME="postgres-0" + if ! kubectl -n postgres get pod "${POD_NAME}" >/dev/null 2>&1; then + echo "postgres pod ${POD_NAME} not found" >&2 + exit 1 + fi + + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" <<'SQL' DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'mas') THEN @@ -81,6 +50,3 @@ spec: END $$; SQL - volumeMounts: - - name: work - mountPath: /work diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index 547b29b..39aa898 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -12,12 +12,16 @@ metadata: rules: - apiGroups: [""] resources: ["secrets"] - resourceNames: ["postgres-auth", "mas-db"] + resourceNames: ["mas-db"] verbs: ["get", "create", "patch", "update"] - - apiGroups: ["apps"] - resources: ["statefulsets"] - resourceNames: ["postgres"] + - apiGroups: [""] + resources: ["pods"] + resourceNames: ["postgres-0"] verbs: ["get"] + - apiGroups: [""] + resources: ["pods/exec"] + resourceNames: ["postgres-0"] + verbs: ["create"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding -- 2.47.2 From e18accc0992000a49e2e1e1804b83bc43676a572 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:06:34 -0300 Subject: [PATCH 534/684] comms: allow postgres exec for mas db --- services/comms/mas-db-ensure-job.yaml | 8 ++++---- services/comms/mas-db-ensure-rbac.yaml | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index a56b76f..8e6666d 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-7 + name: mas-db-ensure-8 namespace: comms spec: backoffLimit: 0 @@ -26,9 +26,9 @@ spec: kubectl -n comms create secret generic mas-db --from-literal=password="${MAS_PASS}" >/dev/null fi - POD_NAME="postgres-0" - if ! kubectl -n postgres get pod "${POD_NAME}" >/dev/null 2>&1; then - echo "postgres pod ${POD_NAME} not found" >&2 + POD_NAME="$(kubectl -n postgres get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}')" + if [ -z "${POD_NAME}" ]; then + echo "postgres pod not found" >&2 exit 1 fi diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index 39aa898..d65cd97 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -16,11 +16,9 @@ rules: verbs: ["get", "create", "patch", "update"] - apiGroups: [""] resources: ["pods"] - resourceNames: ["postgres-0"] - verbs: ["get"] + verbs: ["get", "list"] - apiGroups: [""] resources: ["pods/exec"] - resourceNames: ["postgres-0"] verbs: ["create"] --- apiVersion: rbac.authorization.k8s.io/v1 -- 2.47.2 From 8950306c5346c48884ae497c61a5eb8e40e868a5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:09:27 -0300 Subject: [PATCH 535/684] comms: keep mas db job logs on failure --- services/comms/mas-db-ensure-job.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 8e6666d..9e5cf3b 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,15 +2,15 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-8 + name: mas-db-ensure-9 namespace: comms spec: - backoffLimit: 0 + backoffLimit: 1 ttlSecondsAfterFinished: 600 template: spec: serviceAccountName: mas-db-ensure - restartPolicy: OnFailure + restartPolicy: Never containers: - name: ensure image: bitnami/kubectl:latest @@ -18,6 +18,7 @@ spec: args: - | set -eu + trap 'echo "mas-db-ensure failed"; sleep 300' ERR umask 077 if kubectl -n comms get secret mas-db >/dev/null 2>&1; then MAS_PASS="$(kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d)" -- 2.47.2 From df5a5127f124c58442b7b722de24c234c068f8e9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:12:16 -0300 Subject: [PATCH 536/684] comms: add mas db secret stub --- services/comms/kustomization.yaml | 1 + services/comms/mas-db-ensure-job.yaml | 11 +++++++---- services/comms/mas-db-ensure-rbac.yaml | 2 +- services/comms/mas-db-secret.yaml | 7 +++++++ 4 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 services/comms/mas-db-secret.yaml diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index b08f6db..24e153c 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -12,6 +12,7 @@ resources: - mas-admin-client-secret-ensure-job.yaml - mas-secrets-ensure-rbac.yaml - mas-db-ensure-rbac.yaml + - mas-db-secret.yaml - mas-db-ensure-job.yaml - mas-deployment.yaml - element-rendered.yaml diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 9e5cf3b..6a31080 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-9 + name: mas-db-ensure-10 namespace: comms spec: backoffLimit: 1 @@ -20,11 +20,14 @@ spec: set -eu trap 'echo "mas-db-ensure failed"; sleep 300' ERR umask 077 - if kubectl -n comms get secret mas-db >/dev/null 2>&1; then - MAS_PASS="$(kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' | base64 -d)" + EXISTING_B64="$(kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' 2>/dev/null || true)" + if [ -n "${EXISTING_B64}" ]; then + MAS_PASS="$(printf '%s' "${EXISTING_B64}" | base64 -d)" else MAS_PASS="$(head -c 32 /dev/urandom | base64 | tr -d '\n')" - kubectl -n comms create secret generic mas-db --from-literal=password="${MAS_PASS}" >/dev/null + MAS_B64="$(printf '%s' "${MAS_PASS}" | base64 | tr -d '\n')" + payload="$(printf '{"data":{"password":"%s"}}' "${MAS_B64}")" + kubectl -n comms patch secret mas-db --type=merge -p "${payload}" >/dev/null fi POD_NAME="$(kubectl -n postgres get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}')" diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index d65cd97..06522b9 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -13,7 +13,7 @@ rules: - apiGroups: [""] resources: ["secrets"] resourceNames: ["mas-db"] - verbs: ["get", "create", "patch", "update"] + verbs: ["get", "patch", "update"] - apiGroups: [""] resources: ["pods"] verbs: ["get", "list"] diff --git a/services/comms/mas-db-secret.yaml b/services/comms/mas-db-secret.yaml new file mode 100644 index 0000000..21b408d --- /dev/null +++ b/services/comms/mas-db-secret.yaml @@ -0,0 +1,7 @@ +# services/comms/mas-db-secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: mas-db + namespace: comms +type: Opaque -- 2.47.2 From 3d2f04d67218c87d175c9f210627d87ca8f7bf29 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:15:25 -0300 Subject: [PATCH 537/684] comms: fix mas db psql exec --- services/comms/mas-db-ensure-job.yaml | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 6a31080..f5f95f7 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-10 + name: mas-db-ensure-11 namespace: comms spec: backoffLimit: 1 @@ -36,21 +36,6 @@ spec: exit 1 fi - kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" <<'SQL' - DO $$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'mas') THEN - EXECUTE format('CREATE ROLE mas LOGIN PASSWORD %L', :mas_pass); - ELSE - EXECUTE format('ALTER ROLE mas WITH PASSWORD %L', :mas_pass); - END IF; - END - $$; - DO $$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_database WHERE datname = 'mas') THEN - CREATE DATABASE mas OWNER mas; - END IF; - END - $$; - SQL + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" \ + -c "DO \\$\\$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'mas') THEN EXECUTE format('CREATE ROLE mas LOGIN PASSWORD %L', :'mas_pass'); ELSE EXECUTE format('ALTER ROLE mas WITH PASSWORD %L', :'mas_pass'); END IF; END \\$\\$;" \ + -c "DO \\$\\$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_database WHERE datname = 'mas') THEN CREATE DATABASE mas OWNER mas; END IF; END \\$\\$;" -- 2.47.2 From 898a33d8ee99db79476db125b5601b445b6fe2eb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:18:03 -0300 Subject: [PATCH 538/684] comms: simplify mas db creation --- services/comms/mas-db-ensure-job.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index f5f95f7..f0237f6 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-11 + name: mas-db-ensure-12 namespace: comms spec: backoffLimit: 1 @@ -36,6 +36,9 @@ spec: exit 1 fi - kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v ON_ERROR_STOP=1 -v mas_pass="${MAS_PASS}" \ - -c "DO \\$\\$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'mas') THEN EXECUTE format('CREATE ROLE mas LOGIN PASSWORD %L', :'mas_pass'); ELSE EXECUTE format('ALTER ROLE mas WITH PASSWORD %L', :'mas_pass'); END IF; END \\$\\$;" \ - -c "DO \\$\\$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_database WHERE datname = 'mas') THEN CREATE DATABASE mas OWNER mas; END IF; END \\$\\$;" + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v mas_pass="${MAS_PASS}" \ + -c "CREATE ROLE mas LOGIN PASSWORD :'mas_pass';" || true + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v mas_pass="${MAS_PASS}" \ + -c "ALTER ROLE mas WITH PASSWORD :'mas_pass';" + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ + -c "CREATE DATABASE mas OWNER mas;" || true -- 2.47.2 From e384a9e417da8881d78571e4105ea0bbb6721c6a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:20:28 -0300 Subject: [PATCH 539/684] comms: avoid psql vars for mas --- services/comms/mas-db-ensure-job.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index f0237f6..5d30268 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-12 + name: mas-db-ensure-13 namespace: comms spec: backoffLimit: 1 @@ -36,9 +36,10 @@ spec: exit 1 fi - kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v mas_pass="${MAS_PASS}" \ - -c "CREATE ROLE mas LOGIN PASSWORD :'mas_pass';" || true - kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres -v mas_pass="${MAS_PASS}" \ - -c "ALTER ROLE mas WITH PASSWORD :'mas_pass';" + MAS_PASS_SQL="$(printf '%s' "${MAS_PASS}" | sed "s/'/''/g")" + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ + -c "CREATE ROLE mas LOGIN PASSWORD '${MAS_PASS_SQL}';" || true + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ + -c "ALTER ROLE mas WITH PASSWORD '${MAS_PASS_SQL}';" kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ -c "CREATE DATABASE mas OWNER mas;" || true -- 2.47.2 From 05c2d245b9b5e3fb3b42836101b1a53842f78a94 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:23:09 -0300 Subject: [PATCH 540/684] comms: ensure mas password is url-safe --- services/comms/mas-db-ensure-job.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 5d30268..4a10fcd 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-13 + name: mas-db-ensure-14 namespace: comms spec: backoffLimit: 1 @@ -20,11 +20,21 @@ spec: set -eu trap 'echo "mas-db-ensure failed"; sleep 300' ERR umask 077 + safe_pass() { + head -c 32 /dev/urandom | base64 | tr -d '\n' | tr '+/' '-_' | tr -d '=' + } + EXISTING_B64="$(kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' 2>/dev/null || true)" if [ -n "${EXISTING_B64}" ]; then MAS_PASS="$(printf '%s' "${EXISTING_B64}" | base64 -d)" + if printf '%s' "${MAS_PASS}" | grep -Eq '[^A-Za-z0-9_-]'; then + MAS_PASS="$(safe_pass)" + MAS_B64="$(printf '%s' "${MAS_PASS}" | base64 | tr -d '\n')" + payload="$(printf '{"data":{"password":"%s"}}' "${MAS_B64}")" + kubectl -n comms patch secret mas-db --type=merge -p "${payload}" >/dev/null + fi else - MAS_PASS="$(head -c 32 /dev/urandom | base64 | tr -d '\n')" + MAS_PASS="$(safe_pass)" MAS_B64="$(printf '%s' "${MAS_PASS}" | base64 | tr -d '\n')" payload="$(printf '{"data":{"password":"%s"}}' "${MAS_B64}")" kubectl -n comms patch secret mas-db --type=merge -p "${payload}" >/dev/null -- 2.47.2 From 4f462b8fa7823be47b1c364afaf0a65f2c7099ea Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:26:14 -0300 Subject: [PATCH 541/684] comms: verify mas db login --- services/comms/mas-db-ensure-job.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 4a10fcd..92252a2 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-14 + name: mas-db-ensure-15 namespace: comms spec: backoffLimit: 1 @@ -53,3 +53,5 @@ spec: -c "ALTER ROLE mas WITH PASSWORD '${MAS_PASS_SQL}';" kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ -c "CREATE DATABASE mas OWNER mas;" || true + kubectl -n postgres exec -i "${POD_NAME}" -- /bin/sh -c \ + "PGPASSWORD='${MAS_PASS_SQL}' psql -U mas -d mas -c 'select 1;'" -- 2.47.2 From bebb87fcf845f5593a90243485bb4ce0066e8184 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:28:22 -0300 Subject: [PATCH 542/684] comms: restart mas after db sync --- services/comms/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index d46de89..b29f0a8 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-2 + checksum/config: v5-adminapi-3 labels: app: matrix-authentication-service spec: -- 2.47.2 From 4db5ff68ebeb6bf8c92d1305eb477f37ef63c1e0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:31:19 -0300 Subject: [PATCH 543/684] comms: let mas db secret be job-owned --- services/comms/kustomization.yaml | 1 - services/comms/mas-db-ensure-job.yaml | 6 ++---- services/comms/mas-db-ensure-rbac.yaml | 3 +++ 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 24e153c..b08f6db 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -12,7 +12,6 @@ resources: - mas-admin-client-secret-ensure-job.yaml - mas-secrets-ensure-rbac.yaml - mas-db-ensure-rbac.yaml - - mas-db-secret.yaml - mas-db-ensure-job.yaml - mas-deployment.yaml - element-rendered.yaml diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 92252a2..1c8b5c4 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-15 + name: mas-db-ensure-16 namespace: comms spec: backoffLimit: 1 @@ -35,9 +35,7 @@ spec: fi else MAS_PASS="$(safe_pass)" - MAS_B64="$(printf '%s' "${MAS_PASS}" | base64 | tr -d '\n')" - payload="$(printf '{"data":{"password":"%s"}}' "${MAS_B64}")" - kubectl -n comms patch secret mas-db --type=merge -p "${payload}" >/dev/null + kubectl -n comms create secret generic mas-db --from-literal=password="${MAS_PASS}" >/dev/null fi POD_NAME="$(kubectl -n postgres get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}')" diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index 06522b9..19691d7 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -14,6 +14,9 @@ rules: resources: ["secrets"] resourceNames: ["mas-db"] verbs: ["get", "patch", "update"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] - apiGroups: [""] resources: ["pods"] verbs: ["get", "list"] -- 2.47.2 From 39d8c9e687d2075086cb458a336124b2b8821949 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:33:14 -0300 Subject: [PATCH 544/684] comms: restart mas after secret fix --- services/comms/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index b29f0a8..6176ea0 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-3 + checksum/config: v5-adminapi-4 labels: app: matrix-authentication-service spec: -- 2.47.2 From 072af083bc14c9e2c68affc1a46093573a7e13a8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:35:40 -0300 Subject: [PATCH 545/684] sso: fix mas encryption secret --- services/keycloak/mas-secrets-ensure-job.yaml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 60d6d15..88a4ef4 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-8 + name: mas-secrets-ensure-9 namespace: sso spec: backoffLimit: 0 @@ -64,7 +64,7 @@ spec: fi printf '%s' "$CLIENT_SECRET" > /work/keycloak_client_secret - openssl rand -base64 32 > /work/encryption + openssl rand -hex 32 > /work/encryption openssl rand -hex 32 > /work/matrix_shared_secret openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 -out /work/rsa_key >/dev/null 2>&1 chmod 0644 /work/* @@ -89,14 +89,19 @@ spec: args: - | set -euo pipefail + current="" if kubectl -n comms get secret mas-secrets-runtime >/dev/null 2>&1; then - exit 0 + current="$(kubectl -n comms get secret mas-secrets-runtime -o jsonpath='{.data.encryption}' | base64 -d 2>/dev/null || true)" + if printf '%s' "${current}" | grep -Eq '^[0-9a-fA-F]{64}$'; then + exit 0 + fi fi kubectl -n comms create secret generic mas-secrets-runtime \ --from-file=encryption=/work/encryption \ --from-file=matrix_shared_secret=/work/matrix_shared_secret \ --from-file=keycloak_client_secret=/work/keycloak_client_secret \ - --from-file=rsa_key=/work/rsa_key >/dev/null + --from-file=rsa_key=/work/rsa_key \ + --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null volumeMounts: - name: work mountPath: /work -- 2.47.2 From c1e74c1001a05b055d3391368c751b297c1ac840 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:36:33 -0300 Subject: [PATCH 546/684] comms: restart mas after encryption fix --- services/comms/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index 6176ea0..0a0a257 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-4 + checksum/config: v5-adminapi-5 labels: app: matrix-authentication-service spec: -- 2.47.2 From 04817691c6d9fcb4a975d982d0a2531d413df058 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:38:51 -0300 Subject: [PATCH 547/684] sso: strip mas secret newlines --- services/keycloak/mas-secrets-ensure-job.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 88a4ef4..230473c 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-9 + name: mas-secrets-ensure-10 namespace: sso spec: backoffLimit: 0 @@ -64,8 +64,8 @@ spec: fi printf '%s' "$CLIENT_SECRET" > /work/keycloak_client_secret - openssl rand -hex 32 > /work/encryption - openssl rand -hex 32 > /work/matrix_shared_secret + openssl rand -hex 32 | tr -d '\n' > /work/encryption + openssl rand -hex 32 | tr -d '\n' > /work/matrix_shared_secret openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 -out /work/rsa_key >/dev/null 2>&1 chmod 0644 /work/* env: -- 2.47.2 From f8d172c5a2600b28ca57a8471d145d17a3ac151e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:39:46 -0300 Subject: [PATCH 548/684] comms: restart mas after secret regen --- services/comms/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index 0a0a257..b68fba5 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-5 + checksum/config: v5-adminapi-6 labels: app: matrix-authentication-service spec: -- 2.47.2 From 8d1284412fec7b987098aab5b6456c51af5b1983 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:43:06 -0300 Subject: [PATCH 549/684] sso: validate mas encryption length --- services/keycloak/mas-secrets-ensure-job.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 230473c..2ba6104 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-10 + name: mas-secrets-ensure-11 namespace: sso spec: backoffLimit: 0 @@ -92,7 +92,8 @@ spec: current="" if kubectl -n comms get secret mas-secrets-runtime >/dev/null 2>&1; then current="$(kubectl -n comms get secret mas-secrets-runtime -o jsonpath='{.data.encryption}' | base64 -d 2>/dev/null || true)" - if printf '%s' "${current}" | grep -Eq '^[0-9a-fA-F]{64}$'; then + current_len="$(printf '%s' "${current}" | wc -c | tr -d ' ')" + if [ "${current_len}" = "64" ] && printf '%s' "${current}" | grep -Eq '^[0-9a-fA-F]{64}$'; then exit 0 fi fi -- 2.47.2 From d3c3db612d983eff30baf396ede6ffaee3eefa97 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:44:54 -0300 Subject: [PATCH 550/684] sso: recheck mas encryption bytes --- services/keycloak/mas-secrets-ensure-job.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index 2ba6104..b949e2e 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-11 + name: mas-secrets-ensure-12 namespace: sso spec: backoffLimit: 0 @@ -89,11 +89,10 @@ spec: args: - | set -euo pipefail - current="" if kubectl -n comms get secret mas-secrets-runtime >/dev/null 2>&1; then - current="$(kubectl -n comms get secret mas-secrets-runtime -o jsonpath='{.data.encryption}' | base64 -d 2>/dev/null || true)" - current_len="$(printf '%s' "${current}" | wc -c | tr -d ' ')" - if [ "${current_len}" = "64" ] && printf '%s' "${current}" | grep -Eq '^[0-9a-fA-F]{64}$'; then + kubectl -n comms get secret mas-secrets-runtime -o jsonpath='{.data.encryption}' | base64 -d 2>/dev/null > /tmp/encryption.current || true + current_len="$(wc -c < /tmp/encryption.current | tr -d ' ')" + if [ "${current_len}" = "64" ] && grep -Eq '^[0-9a-fA-F]{64}$' /tmp/encryption.current; then exit 0 fi fi -- 2.47.2 From aa30a3482839281276b2f01572703db7ed47dd44 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:46:02 -0300 Subject: [PATCH 551/684] comms: restart mas after secret cleanup --- services/comms/mas-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index b68fba5..2117c17 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-6 + checksum/config: v5-adminapi-7 labels: app: matrix-authentication-service spec: -- 2.47.2 From 76deb9a160d31607a39b6d30b5ea31702730586c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:53:49 -0300 Subject: [PATCH 552/684] comms: ensure core secrets and synapse oidc --- services/comms/comms-secrets-ensure-job.yaml | 102 ++++++++++++++++++ services/comms/comms-secrets-ensure-rbac.yaml | 34 ++++++ services/comms/kustomization.yaml | 2 + services/keycloak/kustomization.yaml | 1 + .../synapse-oidc-secret-ensure-job.yaml | 75 +++++++++++++ 5 files changed, 214 insertions(+) create mode 100644 services/comms/comms-secrets-ensure-job.yaml create mode 100644 services/comms/comms-secrets-ensure-rbac.yaml create mode 100644 services/keycloak/synapse-oidc-secret-ensure-job.yaml diff --git a/services/comms/comms-secrets-ensure-job.yaml b/services/comms/comms-secrets-ensure-job.yaml new file mode 100644 index 0000000..877649b --- /dev/null +++ b/services/comms/comms-secrets-ensure-job.yaml @@ -0,0 +1,102 @@ +# services/comms/comms-secrets-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: comms-secrets-ensure-1 + namespace: comms +spec: + backoffLimit: 1 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: comms-secrets-ensure + restartPolicy: Never + containers: + - name: ensure + image: bitnami/kubectl:latest + command: ["/bin/sh", "-c"] + args: + - | + set -eu + trap 'echo "comms-secrets-ensure failed"; sleep 300' ERR + umask 077 + + safe_pass() { + head -c 32 /dev/urandom | base64 | tr -d '\n' | tr '+/' '-_' | tr -d '=' + } + + get_secret_value() { + ns="$1" + name="$2" + key="$3" + kubectl -n "${ns}" get secret "${name}" -o "jsonpath={.data.${key}}" 2>/dev/null | base64 -d 2>/dev/null || true + } + + ensure_secret_key() { + ns="$1" + name="$2" + key="$3" + value="$4" + if ! kubectl -n "${ns}" get secret "${name}" >/dev/null 2>&1; then + kubectl -n "${ns}" create secret generic "${name}" --from-literal="${key}=${value}" >/dev/null + return + fi + existing="$(kubectl -n "${ns}" get secret "${name}" -o "jsonpath={.data.${key}}" 2>/dev/null || true)" + if [ -z "${existing}" ]; then + b64="$(printf '%s' "${value}" | base64 | tr -d '\n')" + payload="$(printf '{"data":{"%s":"%s"}}' "${key}" "${b64}")" + kubectl -n "${ns}" patch secret "${name}" --type=merge -p "${payload}" >/dev/null + fi + } + + ensure_chat_secret() { + ns="$1" + if ! kubectl -n "${ns}" get secret chat-ai-keys-runtime >/dev/null 2>&1; then + kubectl -n "${ns}" create secret generic chat-ai-keys-runtime \ + --from-literal=matrix="${CHAT_KEY_MATRIX}" \ + --from-literal=homepage="${CHAT_KEY_HOMEPAGE}" >/dev/null + return + fi + ensure_secret_key "${ns}" chat-ai-keys-runtime matrix "${CHAT_KEY_MATRIX}" + ensure_secret_key "${ns}" chat-ai-keys-runtime homepage "${CHAT_KEY_HOMEPAGE}" + } + + CHAT_KEY_MATRIX="$(get_secret_value comms chat-ai-keys-runtime matrix)" + CHAT_KEY_HOMEPAGE="$(get_secret_value comms chat-ai-keys-runtime homepage)" + if [ -z "${CHAT_KEY_MATRIX}" ] || [ -z "${CHAT_KEY_HOMEPAGE}" ]; then + ALT_MATRIX="$(get_secret_value bstein-dev-home chat-ai-keys-runtime matrix)" + ALT_HOMEPAGE="$(get_secret_value bstein-dev-home chat-ai-keys-runtime homepage)" + [ -z "${CHAT_KEY_MATRIX}" ] && CHAT_KEY_MATRIX="${ALT_MATRIX}" + [ -z "${CHAT_KEY_HOMEPAGE}" ] && CHAT_KEY_HOMEPAGE="${ALT_HOMEPAGE}" + fi + [ -z "${CHAT_KEY_MATRIX}" ] && CHAT_KEY_MATRIX="$(safe_pass)" + [ -z "${CHAT_KEY_HOMEPAGE}" ] && CHAT_KEY_HOMEPAGE="$(safe_pass)" + + ensure_chat_secret comms + ensure_chat_secret bstein-dev-home + + ensure_secret_key comms turn-shared-secret TURN_STATIC_AUTH_SECRET "$(safe_pass)" + ensure_secret_key comms livekit-api primary "$(safe_pass)" + ensure_secret_key comms synapse-redis redis-password "$(safe_pass)" + ensure_secret_key comms synapse-macaroon macaroon_secret_key "$(safe_pass)" + ensure_secret_key comms atlasbot-credentials-runtime bot-password "$(safe_pass)" + ensure_secret_key comms atlasbot-credentials-runtime seeder-password "$(safe_pass)" + + SYN_PASS="$(get_secret_value comms synapse-db POSTGRES_PASSWORD)" + if [ -z "${SYN_PASS}" ]; then + SYN_PASS="$(safe_pass)" + kubectl -n comms create secret generic synapse-db --from-literal=POSTGRES_PASSWORD="${SYN_PASS}" >/dev/null + fi + + POD_NAME="$(kubectl -n postgres get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}')" + if [ -z "${POD_NAME}" ]; then + echo "postgres pod not found" >&2 + exit 1 + fi + SYN_PASS_SQL="$(printf '%s' "${SYN_PASS}" | sed "s/'/''/g")" + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ + -c "CREATE ROLE synapse LOGIN PASSWORD '${SYN_PASS_SQL}';" || true + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ + -c "ALTER ROLE synapse WITH PASSWORD '${SYN_PASS_SQL}';" + kubectl -n postgres exec -i "${POD_NAME}" -- psql -U postgres -d postgres \ + -c "CREATE DATABASE synapse OWNER synapse;" || true diff --git a/services/comms/comms-secrets-ensure-rbac.yaml b/services/comms/comms-secrets-ensure-rbac.yaml new file mode 100644 index 0000000..dfb4f21 --- /dev/null +++ b/services/comms/comms-secrets-ensure-rbac.yaml @@ -0,0 +1,34 @@ +# services/comms/comms-secrets-ensure-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: comms-secrets-ensure + namespace: comms +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: comms-secrets-ensure +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "create", "patch", "update"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["pods/exec"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: comms-secrets-ensure +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: comms-secrets-ensure +subjects: + - kind: ServiceAccount + name: comms-secrets-ensure + namespace: comms diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index b08f6db..f1e0c80 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -11,8 +11,10 @@ resources: - mas-configmap.yaml - mas-admin-client-secret-ensure-job.yaml - mas-secrets-ensure-rbac.yaml + - comms-secrets-ensure-rbac.yaml - mas-db-ensure-rbac.yaml - mas-db-ensure-job.yaml + - comms-secrets-ensure-job.yaml - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index d6dd32e..e3d6513 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -17,6 +17,7 @@ resources: - ldap-federation-job.yaml - user-overrides-job.yaml - mas-secrets-ensure-job.yaml + - synapse-oidc-secret-ensure-job.yaml - service.yaml - ingress.yaml generatorOptions: diff --git a/services/keycloak/synapse-oidc-secret-ensure-job.yaml b/services/keycloak/synapse-oidc-secret-ensure-job.yaml new file mode 100644 index 0000000..6468fd8 --- /dev/null +++ b/services/keycloak/synapse-oidc-secret-ensure-job.yaml @@ -0,0 +1,75 @@ +# services/keycloak/synapse-oidc-secret-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: synapse-oidc-secret-ensure-1 + namespace: sso +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + containers: + - name: apply + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + apk add --no-cache curl jq >/dev/null + + KC_URL="http://keycloak.sso.svc.cluster.local" + ACCESS_TOKEN="" + for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done + if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 + fi + + CLIENT_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=synapse" | jq -r '.[0].id' 2>/dev/null || true)" + if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client synapse not found" >&2 + exit 1 + fi + CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" + if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 + fi + + existing="$(kubectl -n comms get secret synapse-oidc -o jsonpath='{.data.client-secret}' 2>/dev/null || true)" + if [ -n "${existing}" ]; then + exit 0 + fi + + kubectl -n comms create secret generic synapse-oidc \ + --from-literal=client-secret="${CLIENT_SECRET}" \ + --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null + env: + - name: KEYCLOAK_ADMIN + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password -- 2.47.2 From 220cc1f31a7c033c068efe568b31024c6aa76509 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:56:18 -0300 Subject: [PATCH 553/684] sso: run synapse oidc job with kubectl --- services/keycloak/synapse-oidc-secret-ensure-job.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/services/keycloak/synapse-oidc-secret-ensure-job.yaml b/services/keycloak/synapse-oidc-secret-ensure-job.yaml index 6468fd8..69a6698 100644 --- a/services/keycloak/synapse-oidc-secret-ensure-job.yaml +++ b/services/keycloak/synapse-oidc-secret-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-oidc-secret-ensure-1 + name: synapse-oidc-secret-ensure-2 namespace: sso spec: backoffLimit: 0 @@ -13,12 +13,13 @@ spec: restartPolicy: Never containers: - name: apply - image: alpine:3.20 + image: bitnami/kubectl:latest command: ["/bin/sh", "-c"] args: - | set -euo pipefail - apk add --no-cache curl jq >/dev/null + apt-get update >/dev/null + apt-get install -y curl jq >/dev/null KC_URL="http://keycloak.sso.svc.cluster.local" ACCESS_TOKEN="" -- 2.47.2 From 5a23514a307f874a4390ddf2eb43c5d62befb7fc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 03:57:35 -0300 Subject: [PATCH 554/684] sso: install kubectl in synapse oidc job --- services/keycloak/synapse-oidc-secret-ensure-job.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/services/keycloak/synapse-oidc-secret-ensure-job.yaml b/services/keycloak/synapse-oidc-secret-ensure-job.yaml index 69a6698..16a7283 100644 --- a/services/keycloak/synapse-oidc-secret-ensure-job.yaml +++ b/services/keycloak/synapse-oidc-secret-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-oidc-secret-ensure-2 + name: synapse-oidc-secret-ensure-3 namespace: sso spec: backoffLimit: 0 @@ -13,13 +13,12 @@ spec: restartPolicy: Never containers: - name: apply - image: bitnami/kubectl:latest + image: alpine:3.20 command: ["/bin/sh", "-c"] args: - | set -euo pipefail - apt-get update >/dev/null - apt-get install -y curl jq >/dev/null + apk add --no-cache curl jq kubectl >/dev/null KC_URL="http://keycloak.sso.svc.cluster.local" ACCESS_TOKEN="" -- 2.47.2 From 85dce4f97516c4354714f3b5cb97367e47d885ea Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:00:27 -0300 Subject: [PATCH 555/684] comms: use synapse auth for bot jobs --- services/comms/atlasbot-deployment.yaml | 2 +- services/comms/pin-othrys-job.yaml | 4 ++-- services/comms/reset-othrys-room-job.yaml | 2 +- services/comms/seed-othrys-room.yaml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 86e5c28..f9b6b61 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -32,7 +32,7 @@ spec: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: KB_DIR value: /kb - name: VM_URL diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index c42c815..6d7b417 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -22,8 +22,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + - name: AUTH_BASE + value: http://othrys-synapse-matrix-synapse:8008 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 1ae22ca..c6a0a60 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -16,7 +16,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: SERVER_NAME value: live.bstein.dev - name: ROOM_ALIAS diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 5085aa3..65a1ade 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -20,8 +20,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + - name: AUTH_BASE + value: http://othrys-synapse-matrix-synapse:8008 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS -- 2.47.2 From 99ed78ea7fd5a998ced9ee59cf499b1b5bf81d3a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:05:03 -0300 Subject: [PATCH 556/684] comms: fix auth env indentation --- services/comms/pin-othrys-job.yaml | 4 ++-- services/comms/seed-othrys-room.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index 6d7b417..b735273 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -22,8 +22,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://othrys-synapse-matrix-synapse:8008 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 65a1ade..1dc5090 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -20,8 +20,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://othrys-synapse-matrix-synapse:8008 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS -- 2.47.2 From bfe623892a5e21cb15c1bbe3ad7be6ea2e822886 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:07:57 -0300 Subject: [PATCH 557/684] comms: bump othrys reset job --- services/comms/reset-othrys-room-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index c6a0a60..6fd0ee5 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-2 + name: othrys-room-reset-3 namespace: comms spec: backoffLimit: 0 -- 2.47.2 From 12ab281528bc732fd6ee04568988ab347682e772 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:11:20 -0300 Subject: [PATCH 558/684] comms: revert bot auth to mas --- services/comms/atlasbot-deployment.yaml | 2 +- services/comms/pin-othrys-job.yaml | 2 +- services/comms/reset-othrys-room-job.yaml | 4 ++-- services/comms/seed-othrys-room.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index f9b6b61..86e5c28 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -32,7 +32,7 @@ spec: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: KB_DIR value: /kb - name: VM_URL diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index b735273..c42c815 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -23,7 +23,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 6fd0ee5..453363c 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-3 + name: othrys-room-reset-4 namespace: comms spec: backoffLimit: 0 @@ -16,7 +16,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: SERVER_NAME value: live.bstein.dev - name: ROOM_ALIAS diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 1dc5090..5085aa3 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -21,7 +21,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS -- 2.47.2 From 9172f1e140122b57ab328e929fc06dff8709029a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:16:40 -0300 Subject: [PATCH 559/684] comms: enable synapse password login --- services/comms/synapse-rendered.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/comms/synapse-rendered.yaml b/services/comms/synapse-rendered.yaml index aa6c9d8..bf88bcc 100644 --- a/services/comms/synapse-rendered.yaml +++ b/services/comms/synapse-rendered.yaml @@ -340,7 +340,7 @@ data: msc4222_enabled: true max_event_delay_duration: 24h password_config: - enabled: false + enabled: true turn_uris: - "turn:turn.live.bstein.dev:3478?transport=udp" - "turn:turn.live.bstein.dev:3478?transport=tcp" @@ -671,7 +671,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-8 + checksum/config: manual-rtc-enable-9 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From ac7217a32ca517bd9581d6f8706e4b060f6c0144 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:19:20 -0300 Subject: [PATCH 560/684] comms: switch bot auth back to synapse --- services/comms/atlasbot-deployment.yaml | 2 +- services/comms/pin-othrys-job.yaml | 2 +- services/comms/reset-othrys-room-job.yaml | 4 ++-- services/comms/seed-othrys-room.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 86e5c28..f9b6b61 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -32,7 +32,7 @@ spec: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: KB_DIR value: /kb - name: VM_URL diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index c42c815..b735273 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -23,7 +23,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 453363c..307d060 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-4 + name: othrys-room-reset-5 namespace: comms spec: backoffLimit: 0 @@ -16,7 +16,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: SERVER_NAME value: live.bstein.dev - name: ROOM_ALIAS diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 5085aa3..1dc5090 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -21,7 +21,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://matrix-authentication-service:8080 + value: http://othrys-synapse-matrix-synapse:8008 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS -- 2.47.2 From 97e7c69244e363d1d1affe7fe72c07bffb5f43cf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:22:21 -0300 Subject: [PATCH 561/684] comms: retry atlasbot login --- services/comms/atlasbot-configmap.yaml | 12 +++++++++++- services/comms/atlasbot-deployment.yaml | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/services/comms/atlasbot-configmap.yaml b/services/comms/atlasbot-configmap.yaml index d8e74e8..14eb75c 100644 --- a/services/comms/atlasbot-configmap.yaml +++ b/services/comms/atlasbot-configmap.yaml @@ -604,9 +604,19 @@ data: reply = ollama_reply(hist_key, body, context=context) send_msg(token, rid, reply) + def login_with_retry(): + last_err = None + for attempt in range(10): + try: + return login() + except Exception as exc: # noqa: BLE001 + last_err = exc + time.sleep(min(30, 2 ** attempt)) + raise last_err + def main(): load_kb() - token = login() + token = login_with_retry() try: room_id = resolve_alias(token, ROOM_ALIAS) join_room(token, room_id) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index f9b6b61..b2edb76 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: 80fa4d62ccafbfbcdeb63f0976cbea36aada12649f15f8570932296db5d48949 + checksum/atlasbot-configmap: manual-atlasbot-2 spec: serviceAccountName: atlasbot nodeSelector: -- 2.47.2 From 52df8094f506dbc1610be94cc9c1eeaae5fa8994 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:26:48 -0300 Subject: [PATCH 562/684] comms: rerun bstein room cleanup --- services/comms/bstein-force-leave-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml index 4690aa6..c83efe9 100644 --- a/services/comms/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-leave-rooms-1 + name: bstein-leave-rooms-2 namespace: comms spec: backoffLimit: 0 -- 2.47.2 From 31ca499c04af6f0d90eadf2ad2a93cc392447fb6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:29:29 -0300 Subject: [PATCH 563/684] comms: retry mas token for room cleanup --- services/comms/bstein-force-leave-job.yaml | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/services/comms/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml index c83efe9..eef6721 100644 --- a/services/comms/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-leave-rooms-2 + name: bstein-leave-rooms-3 namespace: comms spec: backoffLimit: 0 @@ -50,6 +50,7 @@ spec: import urllib.error import urllib.parse import urllib.request + import time MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] @@ -87,6 +88,8 @@ spec: except Exception: payload = None return e.code, payload + except urllib.error.URLError: + return 0, None with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: mas_admin_client_secret = f.read().strip() @@ -94,13 +97,19 @@ spec: raise RuntimeError("MAS admin client secret file is empty") basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{mas_admin_client_secret}".encode()).decode() - token_status, token_payload = http_json( - "POST", - MAS_TOKEN_URL, - headers={"Authorization": f"Basic {basic}"}, - form={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, - timeout=30, - ) + token_status = 0 + token_payload = None + for attempt in range(1, 6): + token_status, token_payload = http_json( + "POST", + MAS_TOKEN_URL, + headers={"Authorization": f"Basic {basic}"}, + form={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, + timeout=30, + ) + if token_status == 200 and token_payload and "access_token" in token_payload: + break + time.sleep(attempt * 2) if token_status != 200 or not token_payload or "access_token" not in token_payload: raise RuntimeError(f"MAS admin token request failed (HTTP {token_status})") mas_admin_token = token_payload["access_token"] -- 2.47.2 From 7860003f150642f72da7fb3719704b0fca7efa68 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:32:05 -0300 Subject: [PATCH 564/684] comms: retry room leave actions --- services/comms/bstein-force-leave-job.yaml | 36 +++++++++++++--------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/services/comms/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml index eef6721..5465f25 100644 --- a/services/comms/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-leave-rooms-3 + name: bstein-leave-rooms-4 namespace: comms spec: backoffLimit: 0 @@ -150,20 +150,26 @@ spec: try: for room_id in TARGET_ROOMS: room_q = urllib.parse.quote(room_id, safe="") - leave_status, _ = http_json( - "POST", - f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{room_q}/leave", - headers={"Authorization": f"Bearer {personal_token}"}, - json_body={}, - timeout=30, - ) - forget_status, _ = http_json( - "POST", - f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{room_q}/forget", - headers={"Authorization": f"Bearer {personal_token}"}, - json_body={}, - timeout=30, - ) + leave_status = 0 + forget_status = 0 + for attempt in range(1, 6): + leave_status, _ = http_json( + "POST", + f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{room_q}/leave", + headers={"Authorization": f"Bearer {personal_token}"}, + json_body={}, + timeout=30, + ) + forget_status, _ = http_json( + "POST", + f"{SYNAPSE_BASE}/_matrix/client/v3/rooms/{room_q}/forget", + headers={"Authorization": f"Bearer {personal_token}"}, + json_body={}, + timeout=30, + ) + if leave_status == 200 and forget_status == 200: + break + time.sleep(attempt * 2) results["rooms"][room_id] = {"leave": leave_status, "forget": forget_status} if leave_status != 200 or forget_status != 200: failures.append(room_id) -- 2.47.2 From 6b5deb886f165021d68094b8dd4041b3915063e5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:37:33 -0300 Subject: [PATCH 565/684] comms: use mas proxy for leave job --- services/comms/bstein-force-leave-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/comms/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml index 5465f25..85bfa10 100644 --- a/services/comms/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-leave-rooms-4 + name: bstein-leave-rooms-5 namespace: comms spec: backoffLimit: 0 @@ -33,7 +33,7 @@ spec: - name: MAS_ADMIN_API_BASE value: http://matrix-authentication-service:8081/api/admin/v1 - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: TARGET_USERNAME value: bstein - name: TARGET_ROOMS -- 2.47.2 From 0b09f46bb10deb19f6efeabcdd6af6a682d702b0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:42:19 -0300 Subject: [PATCH 566/684] comms: accept missing rooms in cleanup --- services/comms/bstein-force-leave-job.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/comms/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml index 85bfa10..956330b 100644 --- a/services/comms/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-leave-rooms-5 + name: bstein-leave-rooms-6 namespace: comms spec: backoffLimit: 0 @@ -167,11 +167,11 @@ spec: json_body={}, timeout=30, ) - if leave_status == 200 and forget_status == 200: + if leave_status in (200, 404) and forget_status in (200, 404): break time.sleep(attempt * 2) results["rooms"][room_id] = {"leave": leave_status, "forget": forget_status} - if leave_status != 200 or forget_status != 200: + if leave_status not in (200, 404) or forget_status not in (200, 404): failures.append(room_id) finally: revoke_status, _ = http_json( -- 2.47.2 From a1f1c9ada0c905c9c25bf36ef9887933d499c4ab Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:45:01 -0300 Subject: [PATCH 567/684] comms: retry othrys reset login --- services/comms/reset-othrys-room-job.yaml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 307d060..0886401 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-5 + name: othrys-room-reset-6 namespace: comms spec: backoffLimit: 0 @@ -41,7 +41,7 @@ spec: set -euo pipefail pip install --no-cache-dir requests >/dev/null python - <<'PY' - import os, sys, urllib.parse, requests + import os, sys, time, urllib.parse, requests BASE = os.environ["SYNAPSE_BASE"] AUTH_BASE = os.environ.get("AUTH_BASE", BASE) @@ -172,7 +172,17 @@ spec: r.raise_for_status() return r.json()["event_id"] - token = login(SEEDER_USER, SEEDER_PASS) + def login_with_retry(): + last = None + for attempt in range(1, 6): + try: + return login(SEEDER_USER, SEEDER_PASS) + except Exception as exc: # noqa: BLE001 + last = exc + time.sleep(attempt * 2) + raise last + + token = login_with_retry() old_room_id = resolve_alias(token, ROOM_ALIAS) if not old_room_id: -- 2.47.2 From fce33f02ff703e5148bcc94ce04cd1620c814e1b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:51:13 -0300 Subject: [PATCH 568/684] comms: route othrys reset via mas --- services/comms/reset-othrys-room-job.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 0886401..2e0da95 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-6 + name: othrys-room-reset-7 namespace: comms spec: backoffLimit: 0 @@ -14,9 +14,9 @@ spec: image: python:3.11-slim env: - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: SERVER_NAME value: live.bstein.dev - name: ROOM_ALIAS -- 2.47.2 From 28bcf716d01c0047dbeb4140e5ac603ea96321ac Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 04:55:52 -0300 Subject: [PATCH 569/684] comms: seed synapse bot users --- services/comms/kustomization.yaml | 1 + services/comms/synapse-user-seed-job.yaml | 112 ++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 services/comms/synapse-user-seed-job.yaml diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index f1e0c80..6947617 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -15,6 +15,7 @@ resources: - mas-db-ensure-rbac.yaml - mas-db-ensure-job.yaml - comms-secrets-ensure-job.yaml + - synapse-user-seed-job.yaml - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml diff --git a/services/comms/synapse-user-seed-job.yaml b/services/comms/synapse-user-seed-job.yaml new file mode 100644 index 0000000..ea0dc49 --- /dev/null +++ b/services/comms/synapse-user-seed-job.yaml @@ -0,0 +1,112 @@ +# services/comms/synapse-user-seed-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: synapse-user-seed-1 + namespace: comms +spec: + backoffLimit: 1 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: Never + containers: + - name: seed + image: python:3.11-slim + env: + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: synapse + - name: PGUSER + value: synapse + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + - name: BOT_USER + value: atlasbot + - name: BOT_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: bot-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir psycopg2-binary bcrypt >/dev/null + python - <<'PY' + import os + import time + import bcrypt + import psycopg2 + + def get_cols(cur): + cur.execute( + """ + SELECT column_name, is_nullable, column_default + FROM information_schema.columns + WHERE table_schema = 'public' AND table_name = 'users' + """ + ) + cols = {} + for name, is_nullable, default in cur.fetchall(): + cols[name] = {"nullable": is_nullable == "YES", "default": default} + return cols + + def upsert_user(cur, cols, user_id, password, admin): + now_ms = int(time.time() * 1000) + values = { + "name": user_id, + "password_hash": bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode(), + "creation_ts": now_ms, + } + if "admin" in cols: + values["admin"] = admin + if "deactivated" in cols: + values["deactivated"] = False + if "shadow_banned" in cols: + values["shadow_banned"] = False + if "is_guest" in cols: + values["is_guest"] = False + + columns = list(values.keys()) + placeholders = ", ".join(["%s"] * len(columns)) + updates = ", ".join([f"{col}=EXCLUDED.{col}" for col in columns if col != "name"]) + query = f"INSERT INTO users ({', '.join(columns)}) VALUES ({placeholders}) ON CONFLICT (name) DO UPDATE SET {updates};" + cur.execute(query, [values[c] for c in columns]) + + seeder_user = os.environ["SEEDER_USER"] + bot_user = os.environ["BOT_USER"] + server = "live.bstein.dev" + seeder_id = f"@{seeder_user}:{server}" + bot_id = f"@{bot_user}:{server}" + + conn = psycopg2.connect( + host=os.environ["PGHOST"], + port=int(os.environ["PGPORT"]), + dbname=os.environ["PGDATABASE"], + user=os.environ["PGUSER"], + password=os.environ["PGPASSWORD"], + ) + try: + with conn: + with conn.cursor() as cur: + cols = get_cols(cur) + upsert_user(cur, cols, seeder_id, os.environ["SEEDER_PASS"], True) + upsert_user(cur, cols, bot_id, os.environ["BOT_PASS"], False) + finally: + conn.close() + PY -- 2.47.2 From c05cb414aa6283f52d9f18e4c6b12beb1ea2fb29 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:00:58 -0300 Subject: [PATCH 570/684] comms: fix synapse seed booleans --- services/comms/synapse-user-seed-job.yaml | 32 ++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/services/comms/synapse-user-seed-job.yaml b/services/comms/synapse-user-seed-job.yaml index ea0dc49..083f72e 100644 --- a/services/comms/synapse-user-seed-job.yaml +++ b/services/comms/synapse-user-seed-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-user-seed-1 + name: synapse-user-seed-2 namespace: comms spec: backoffLimit: 1 @@ -56,14 +56,18 @@ spec: def get_cols(cur): cur.execute( """ - SELECT column_name, is_nullable, column_default + SELECT column_name, is_nullable, column_default, data_type FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'users' """ ) cols = {} - for name, is_nullable, default in cur.fetchall(): - cols[name] = {"nullable": is_nullable == "YES", "default": default} + for name, is_nullable, default, data_type in cur.fetchall(): + cols[name] = { + "nullable": is_nullable == "YES", + "default": default, + "type": data_type, + } return cols def upsert_user(cur, cols, user_id, password, admin): @@ -73,14 +77,18 @@ spec: "password_hash": bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode(), "creation_ts": now_ms, } - if "admin" in cols: - values["admin"] = admin - if "deactivated" in cols: - values["deactivated"] = False - if "shadow_banned" in cols: - values["shadow_banned"] = False - if "is_guest" in cols: - values["is_guest"] = False + def add_flag(name, flag): + if name not in cols: + return + if cols[name]["type"] in ("smallint", "integer"): + values[name] = int(flag) + else: + values[name] = bool(flag) + + add_flag("admin", admin) + add_flag("deactivated", False) + add_flag("shadow_banned", False) + add_flag("is_guest", False) columns = list(values.keys()) placeholders = ", ".join(["%s"] * len(columns)) -- 2.47.2 From acedad02c0d019a02829e53f96525e62fe64c646 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:03:43 -0300 Subject: [PATCH 571/684] comms: bind synapse to ipv4 --- services/comms/synapse-rendered.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/comms/synapse-rendered.yaml b/services/comms/synapse-rendered.yaml index bf88bcc..f7d9188 100644 --- a/services/comms/synapse-rendered.yaml +++ b/services/comms/synapse-rendered.yaml @@ -273,7 +273,7 @@ data: listeners: - port: 8008 tls: false - bind_addresses: ["::"] + bind_addresses: ["0.0.0.0"] type: http x_forwarded: true @@ -671,7 +671,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-9 + checksum/config: manual-rtc-enable-10 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From 7bea022311cc5cd558c0e851f2db496b0fb62567 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:12:14 -0300 Subject: [PATCH 572/684] comms: add mas bot users and revert synapse auth --- services/comms/atlasbot-deployment.yaml | 2 +- services/comms/kustomization.yaml | 1 + .../comms/mas-local-users-ensure-job.yaml | 155 ++++++++++++++++++ services/comms/pin-othrys-job.yaml | 2 +- services/comms/seed-othrys-room.yaml | 2 +- services/comms/synapse-rendered.yaml | 4 +- 6 files changed, 161 insertions(+), 5 deletions(-) create mode 100644 services/comms/mas-local-users-ensure-job.yaml diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index b2edb76..0d45fe0 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -32,7 +32,7 @@ spec: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: KB_DIR value: /kb - name: VM_URL diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 6947617..206dda1 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -16,6 +16,7 @@ resources: - mas-db-ensure-job.yaml - comms-secrets-ensure-job.yaml - synapse-user-seed-job.yaml + - mas-local-users-ensure-job.yaml - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml new file mode 100644 index 0000000..04b41f6 --- /dev/null +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -0,0 +1,155 @@ +# services/comms/mas-local-users-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: mas-local-users-ensure-1 + namespace: comms +spec: + backoffLimit: 1 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: Never + volumes: + - name: mas-admin-client + secret: + secretName: mas-admin-client-runtime + items: + - key: client_secret + path: client_secret + containers: + - name: ensure + image: python:3.11-slim + volumeMounts: + - name: mas-admin-client + mountPath: /etc/mas-admin-client + readOnly: true + env: + - name: MAS_ADMIN_CLIENT_ID + value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM + - name: MAS_ADMIN_CLIENT_SECRET_FILE + value: /etc/mas-admin-client/client_secret + - name: MAS_TOKEN_URL + value: http://matrix-authentication-service:8080/oauth2/token + - name: MAS_ADMIN_API_BASE + value: http://matrix-authentication-service:8081/api/admin/v1 + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + - name: BOT_USER + value: atlasbot + - name: BOT_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: bot-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import base64 + import os + import time + import requests + import urllib.parse + + MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] + MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] + MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] + MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") + + def admin_token(): + with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: + secret = f.read().strip() + basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode() + last = None + for attempt in range(1, 6): + try: + r = requests.post( + MAS_TOKEN_URL, + headers={"Authorization": f"Basic {basic}"}, + data={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, + timeout=30, + ) + if r.status_code == 200: + return r.json()["access_token"] + except Exception as exc: # noqa: BLE001 + last = exc + time.sleep(attempt * 2) + raise RuntimeError(f"MAS admin token request failed: {last}") + + def get_user(token, username): + r = requests.get( + f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}", + headers={"Authorization": f"Bearer {token}"}, + timeout=30, + ) + if r.status_code == 404: + return None + r.raise_for_status() + return r.json()["data"] + + def create_user(token, username, password): + payload = { + "data": { + "type": "user", + "attributes": { + "username": username, + "password": password, + }, + } + } + r = requests.post( + f"{MAS_ADMIN_API_BASE}/users", + headers={"Authorization": f"Bearer {token}"}, + json=payload, + timeout=30, + ) + if r.status_code in (200, 201): + return r.json()["data"] + if r.status_code == 409: + return None + r.raise_for_status() + return None + + def update_password(token, user_id, password): + payload = { + "data": { + "type": "user", + "id": user_id, + "attributes": { + "password": password, + }, + } + } + r = requests.patch( + f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}", + headers={"Authorization": f"Bearer {token}"}, + json=payload, + timeout=30, + ) + if r.status_code in (200, 204): + return True + return False + + def ensure_user(token, username, password): + user = get_user(token, username) + if user is None: + user = create_user(token, username, password) + if user is None: + user = get_user(token, username) + if user is None: + raise RuntimeError(f"failed to ensure user {username}") + update_password(token, user["id"], password) + + token = admin_token() + ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) + ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"]) + PY diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index b735273..c42c815 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -23,7 +23,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 1dc5090..5085aa3 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -21,7 +21,7 @@ spec: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - name: AUTH_BASE - value: http://othrys-synapse-matrix-synapse:8008 + value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - name: SEEDER_PASS diff --git a/services/comms/synapse-rendered.yaml b/services/comms/synapse-rendered.yaml index f7d9188..097189a 100644 --- a/services/comms/synapse-rendered.yaml +++ b/services/comms/synapse-rendered.yaml @@ -340,7 +340,7 @@ data: msc4222_enabled: true max_event_delay_duration: 24h password_config: - enabled: true + enabled: false turn_uris: - "turn:turn.live.bstein.dev:3478?transport=udp" - "turn:turn.live.bstein.dev:3478?transport=tcp" @@ -671,7 +671,7 @@ spec: template: metadata: annotations: - checksum/config: manual-rtc-enable-10 + checksum/config: manual-rtc-enable-11 checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 labels: app.kubernetes.io/name: matrix-synapse -- 2.47.2 From fa6566ffc85b33f8aa5f61688d6a60f7e5736ee8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:18:20 -0300 Subject: [PATCH 573/684] comms: rerun othrys room reset --- services/comms/reset-othrys-room-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 2e0da95..7561313 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-room-reset-7 + name: othrys-room-reset-8 namespace: comms spec: backoffLimit: 0 -- 2.47.2 From 94c1395c8c78a9fd396f070369a14b4d965db34d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:21:30 -0300 Subject: [PATCH 574/684] comms: verify mas bot logins --- .../comms/mas-local-users-ensure-job.yaml | 100 +++++++++++------- 1 file changed, 62 insertions(+), 38 deletions(-) diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml index 04b41f6..6b7f6bf 100644 --- a/services/comms/mas-local-users-ensure-job.yaml +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-local-users-ensure-1 + name: mas-local-users-ensure-2 namespace: comms spec: backoffLimit: 1 @@ -64,6 +64,7 @@ spec: MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") + AUTH_BASE = "http://matrix-authentication-service:8080" def admin_token(): with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: @@ -97,57 +98,80 @@ spec: return r.json()["data"] def create_user(token, username, password): - payload = { - "data": { - "type": "user", - "attributes": { - "username": username, - "password": password, - }, - } - } - r = requests.post( - f"{MAS_ADMIN_API_BASE}/users", - headers={"Authorization": f"Bearer {token}"}, - json=payload, - timeout=30, - ) - if r.status_code in (200, 201): - return r.json()["data"] - if r.status_code == 409: - return None - r.raise_for_status() + payloads = [ + { + "data": { + "type": "user", + "attributes": { + "username": username, + "password": password, + }, + } + }, + {"username": username, "password": password}, + ] + for payload in payloads: + r = requests.post( + f"{MAS_ADMIN_API_BASE}/users", + headers={"Authorization": f"Bearer {token}"}, + json=payload, + timeout=30, + ) + if r.status_code in (200, 201): + return r.json().get("data") or {} + if r.status_code == 409: + return None return None def update_password(token, user_id, password): - payload = { - "data": { - "type": "user", - "id": user_id, - "attributes": { - "password": password, - }, - } - } - r = requests.patch( - f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}", + payloads = [ + { + "data": { + "type": "user", + "id": user_id, + "attributes": { + "password": password, + }, + } + }, + {"password": password}, + ] + for payload in payloads: + r = requests.patch( + f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}", + headers={"Authorization": f"Bearer {token}"}, + json=payload, + timeout=30, + ) + if r.status_code in (200, 204): + return True + r = requests.post( + f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}/password", headers={"Authorization": f"Bearer {token}"}, - json=payload, + json={"password": password}, timeout=30, ) - if r.status_code in (200, 204): - return True - return False + return r.status_code in (200, 204) def ensure_user(token, username, password): user = get_user(token, username) if user is None: user = create_user(token, username, password) - if user is None: - user = get_user(token, username) + user = get_user(token, username) if user is None: raise RuntimeError(f"failed to ensure user {username}") update_password(token, user["id"], password) + r = requests.post( + f"{AUTH_BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": username}, + "password": password, + }, + timeout=30, + ) + if r.status_code != 200: + raise RuntimeError(f"login failed for {username}: {r.status_code} {r.text}") token = admin_token() ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) -- 2.47.2 From 2a6f0a8db3beb86dd2b21d0585e885834d0e68ef Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:34:03 -0300 Subject: [PATCH 575/684] comms: tidy stack and guest naming --- .../kustomization.yaml | 2 +- .../applications/kustomization.yaml | 2 +- services/comms/NOTES.md | 32 ++++++ services/comms/guest-name-job.yaml | 105 +++++++++++++++++- services/comms/guest-register-configmap.yaml | 12 +- services/comms/kustomization.yaml | 48 ++++---- services/comms/mas-db-secret.yaml | 7 -- 7 files changed, 171 insertions(+), 37 deletions(-) rename clusters/atlas/flux-system/applications/{communication => comms}/kustomization.yaml (94%) create mode 100644 services/comms/NOTES.md delete mode 100644 services/comms/mas-db-secret.yaml diff --git a/clusters/atlas/flux-system/applications/communication/kustomization.yaml b/clusters/atlas/flux-system/applications/comms/kustomization.yaml similarity index 94% rename from clusters/atlas/flux-system/applications/communication/kustomization.yaml rename to clusters/atlas/flux-system/applications/comms/kustomization.yaml index ab2e7d8..0fb664a 100644 --- a/clusters/atlas/flux-system/applications/communication/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/comms/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: communication + name: comms namespace: flux-system spec: interval: 10m diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index d8e27af..4c9fb58 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -5,7 +5,7 @@ resources: - gitea/kustomization.yaml - vault/kustomization.yaml - vaultwarden/kustomization.yaml - - communication/kustomization.yaml + - comms/kustomization.yaml - crypto/kustomization.yaml - monerod/kustomization.yaml - pegasus/kustomization.yaml diff --git a/services/comms/NOTES.md b/services/comms/NOTES.md new file mode 100644 index 0000000..e6868fe --- /dev/null +++ b/services/comms/NOTES.md @@ -0,0 +1,32 @@ +# services/comms/NOTES.md + +Purpose: Matrix + Element + LiveKit stack for Othrys (live.bstein.dev). + +Core flow +- Matrix Authentication Service (MAS) handles login/SSO and issues Matrix access tokens. +- Synapse is the homeserver; MAS fronts login, Synapse serves client/server APIs. +- Element Web provides the main UI; Element Call embeds LiveKit for group video. +- LiveKit handles SFU media; Coturn provides TURN for NAT traversal. +- matrix-guest-register provides guest accounts + guest sessions (no Keycloak). + +Operational jobs +- mas-db-ensure-job: ensures MAS database role/database + secret in comms. +- comms-secrets-ensure-job: creates runtime secrets (TURN, LiveKit, Synapse, atlasbot). +- synapse-signingkey-ensure-job: ensures Synapse signing key secret. +- synapse-seeder-admin-ensure-job: ensures Synapse admin user exists. +- synapse-user-seed-job: seeds atlasbot + othrys-seeder users/passwords. +- mas-local-users-ensure-job: ensures MAS local users exist (seeder/bot). +- seed-othrys-room: (suspended) creates Othrys + joins locals. +- reset-othrys-room: one-off room reset + pin invite. +- pin-othrys-invite: (suspended) pin invite message if missing. +- guest-name-randomizer: renames numeric/guest users to adj-noun names. +- bstein-force-leave: one-off room leave cleanup. + +Manual re-runs +- Bump the job name suffix (e.g., reset-othrys-room-9) to re-run a one-off job. +- Unsuspend a CronJob only when needed; re-suspend after completion. + +Ports +- Traefik (HTTPS) via LB on 192.168.22.9. +- Coturn LB on 192.168.22.5 (3478/5349 + UDP range). +- LiveKit LB on 192.168.22.6 (7880/7881/7882/7883). diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index 10dde37..f3ea00b 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -7,6 +7,8 @@ metadata: spec: schedule: "*/1 * * * *" suspend: false + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 jobTemplate: spec: backoffLimit: 0 @@ -30,6 +32,8 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 - name: MAS_ADMIN_CLIENT_ID value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE @@ -40,6 +44,11 @@ spec: value: http://matrix-authentication-service:8080/oauth2/token - name: SEEDER_USER value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password command: - /bin/sh - -c @@ -66,11 +75,13 @@ spec: ] BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] SEEDER_USER = os.environ["SEEDER_USER"] + SEEDER_PASS = os.environ["SEEDER_PASS"] ROOM_ALIAS = "#othrys:live.bstein.dev" def mas_admin_token(): @@ -126,6 +137,19 @@ spec: timeout=30, ) + def login(user, password): + r = requests.post( + f"{AUTH_BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": user}, + "password": password, + }, + timeout=30, + ) + r.raise_for_status() + return r.json()["access_token"] + def resolve_alias(token, alias): headers = {"Authorization": f"Bearer {token}"} enc = urllib.parse.quote(alias) @@ -167,6 +191,23 @@ spec: break return users + def synapse_list_users(token): + headers = {"Authorization": f"Bearer {token}"} + users = [] + from_token = None + while True: + url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" + if from_token: + url += f"&from={urllib.parse.quote(from_token)}" + r = requests.get(url, headers=headers, timeout=30) + r.raise_for_status() + payload = r.json() + users.extend(payload.get("users", [])) + from_token = payload.get("next_token") + if not from_token: + break + return users + def user_id_for_username(username): return f"@{username}:live.bstein.dev" @@ -176,6 +217,18 @@ spec: r.raise_for_status() return r.json().get("displayname") + def get_displayname_admin(token, user_id): + headers = {"Authorization": f"Bearer {token}"} + r = requests.get( + f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}", + headers=headers, + timeout=30, + ) + if r.status_code == 404: + return None + r.raise_for_status() + return r.json().get("displayname") + def set_displayname(token, room_id, user_id, name, in_room): headers = {"Authorization": f"Bearer {token}"} payload = {"displayname": name} @@ -191,6 +244,25 @@ spec: content = {"membership": "join", "displayname": name} requests.put(state_url, headers=headers, json=content, timeout=30) + def set_displayname_admin(token, user_id, name): + headers = {"Authorization": f"Bearer {token}"} + payload = {"displayname": name} + r = requests.put( + f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}", + headers=headers, + json=payload, + timeout=30, + ) + if r.status_code in (200, 201, 204): + return True + return False + + def needs_rename_username(username): + return username.isdigit() or username.startswith("guest-") + + def needs_rename_display(display): + return not display or display.isdigit() or display.startswith("guest-") + admin_token = mas_admin_token() seeder_id = mas_user_id(admin_token, SEEDER_USER) seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id) @@ -201,19 +273,22 @@ spec: mas_revoke_session(admin_token, seeder_session) users = mas_list_users(admin_token) + mas_usernames = set() for user in users: attrs = user.get("attributes") or {} username = attrs.get("username") or "" + if username: + mas_usernames.add(username) legacy_guest = attrs.get("legacy_guest") if not username: continue - if not (legacy_guest or username.isdigit() or username.startswith("guest-")): + if not (legacy_guest or needs_rename_username(username)): continue user_id = user_id_for_username(username) access_token, session_id = mas_personal_session(admin_token, user["id"]) try: display = get_displayname(access_token, user_id) - if display and (not display.isdigit()) and (not display.startswith("guest-")): + if display and not needs_rename_display(display): continue new = None for _ in range(30): @@ -227,4 +302,30 @@ spec: set_displayname(access_token, room_id, user_id, new, user_id in members) finally: mas_revoke_session(admin_token, session_id) + + seeder_token = login(SEEDER_USER, SEEDER_PASS) + for entry in synapse_list_users(seeder_token): + user_id = entry.get("name") or "" + if not user_id.startswith("@"): + continue + localpart = user_id.split(":", 1)[0].lstrip("@") + if localpart in mas_usernames: + continue + is_guest = entry.get("is_guest") + if not (is_guest or needs_rename_username(localpart)): + continue + display = get_displayname_admin(seeder_token, user_id) + if display and not needs_rename_display(display): + continue + new = None + for _ in range(30): + candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" + if candidate not in existing: + new = candidate + existing.add(candidate) + break + if not new: + continue + if not set_displayname_admin(seeder_token, user_id, new): + continue PY diff --git a/services/comms/guest-register-configmap.yaml b/services/comms/guest-register-configmap.yaml index ded54ec..b5bc803 100644 --- a/services/comms/guest-register-configmap.yaml +++ b/services/comms/guest-register-configmap.yaml @@ -27,8 +27,16 @@ data: RATE_MAX = int(os.environ.get("RATE_MAX", "30")) _rate = {} # ip -> [window_start, count] - ADJ = ["brisk", "calm", "eager", "gentle", "merry", "nifty", "rapid", "sunny", "witty", "zesty"] - NOUN = ["otter", "falcon", "comet", "ember", "grove", "harbor", "meadow", "raven", "river", "summit"] + ADJ = [ + "brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty", + "amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow", + "quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind", + ] + NOUN = [ + "otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit", + "breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak", + "pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr", + ] def _json(method, url, *, headers=None, body=None, timeout=20): hdrs = {"Content-Type": "application/json"} diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 206dda1..9f95958 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -4,41 +4,41 @@ kind: Kustomization namespace: comms resources: - namespace.yaml - - atlasbot-rbac.yaml - - synapse-rendered.yaml - - synapse-signingkey-ensure-job.yaml - - synapse-seeder-admin-ensure-job.yaml - mas-configmap.yaml - - mas-admin-client-secret-ensure-job.yaml - - mas-secrets-ensure-rbac.yaml - - comms-secrets-ensure-rbac.yaml - - mas-db-ensure-rbac.yaml - - mas-db-ensure-job.yaml - - comms-secrets-ensure-job.yaml - - synapse-user-seed-job.yaml - - mas-local-users-ensure-job.yaml - - mas-deployment.yaml - element-rendered.yaml - livekit-config.yaml - - livekit.yaml - - coturn.yaml - - livekit-token-deployment.yaml - - livekit-ingress.yaml - - livekit-middlewares.yaml - element-call-config.yaml - element-call-deployment.yaml - - reset-othrys-room-job.yaml - - bstein-force-leave-job.yaml - - pin-othrys-job.yaml - - guest-name-job.yaml - guest-register-configmap.yaml - guest-register-deployment.yaml - guest-register-service.yaml - - matrix-ingress.yaml - atlasbot-configmap.yaml - atlasbot-deployment.yaml - - seed-othrys-room.yaml - wellknown.yaml + - atlasbot-rbac.yaml + - mas-secrets-ensure-rbac.yaml + - comms-secrets-ensure-rbac.yaml + - mas-db-ensure-rbac.yaml + - mas-admin-client-secret-ensure-job.yaml + - mas-db-ensure-job.yaml + - comms-secrets-ensure-job.yaml + - synapse-signingkey-ensure-job.yaml + - synapse-seeder-admin-ensure-job.yaml + - synapse-user-seed-job.yaml + - mas-local-users-ensure-job.yaml + - synapse-rendered.yaml + - mas-deployment.yaml + - livekit-token-deployment.yaml + - livekit.yaml + - coturn.yaml + - seed-othrys-room.yaml + - guest-name-job.yaml + - pin-othrys-job.yaml + - reset-othrys-room-job.yaml + - bstein-force-leave-job.yaml + - livekit-ingress.yaml + - livekit-middlewares.yaml + - matrix-ingress.yaml patches: - path: synapse-deployment-strategy-patch.yaml diff --git a/services/comms/mas-db-secret.yaml b/services/comms/mas-db-secret.yaml deleted file mode 100644 index 21b408d..0000000 --- a/services/comms/mas-db-secret.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# services/comms/mas-db-secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: mas-db - namespace: comms -type: Opaque -- 2.47.2 From d3c6ddeead3a4868159d71a09ccbf83427bf0676 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:40:28 -0300 Subject: [PATCH 576/684] comms: re-run signing key and synapse oidc --- services/comms/synapse-signingkey-ensure-job.yaml | 8 ++++---- services/keycloak/synapse-oidc-secret-ensure-job.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/services/comms/synapse-signingkey-ensure-job.yaml b/services/comms/synapse-signingkey-ensure-job.yaml index a76948d..5ebaeda 100644 --- a/services/comms/synapse-signingkey-ensure-job.yaml +++ b/services/comms/synapse-signingkey-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-synapse-signingkey-ensure-4 + name: othrys-synapse-signingkey-ensure-5 namespace: comms spec: backoffLimit: 2 @@ -34,9 +34,9 @@ spec: if kubectl -n comms get secret othrys-synapse-signingkey -o jsonpath='{.data.signing\.key}' 2>/dev/null | grep -q .; then exit 0 fi - signing_key_b64="$(base64 /work/signing.key | tr -d '\n')" - payload="$(printf '{"data":{"signing.key":"%s"}}' "${signing_key_b64}")" - kubectl -n comms patch secret othrys-synapse-signingkey --type=merge -p "${payload}" >/dev/null + kubectl -n comms create secret generic othrys-synapse-signingkey \ + --from-file=signing.key=/work/signing.key \ + --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null volumeMounts: - name: work mountPath: /work diff --git a/services/keycloak/synapse-oidc-secret-ensure-job.yaml b/services/keycloak/synapse-oidc-secret-ensure-job.yaml index 16a7283..7486ced 100644 --- a/services/keycloak/synapse-oidc-secret-ensure-job.yaml +++ b/services/keycloak/synapse-oidc-secret-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-oidc-secret-ensure-3 + name: synapse-oidc-secret-ensure-4 namespace: sso spec: backoffLimit: 0 -- 2.47.2 From 0fc4b299da764d725ea1e8b498a07f01e4f318ce Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:43:33 -0300 Subject: [PATCH 577/684] keycloak: re-run mas secrets ensure --- services/keycloak/mas-secrets-ensure-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index b949e2e..b0951cf 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -8,7 +8,7 @@ metadata: apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-12 + name: mas-secrets-ensure-13 namespace: sso spec: backoffLimit: 0 -- 2.47.2 From c909d45fda65a7dd820ded76f8a9f8f067f518b0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:47:21 -0300 Subject: [PATCH 578/684] comms: make guest renamer MAS-only --- services/comms/guest-name-job.yaml | 114 ++++++++++++----------------- 1 file changed, 45 insertions(+), 69 deletions(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index f3ea00b..b209cf2 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -32,8 +32,6 @@ spec: env: - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - name: MAS_ADMIN_CLIENT_ID value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE @@ -44,11 +42,6 @@ spec: value: http://matrix-authentication-service:8080/oauth2/token - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password command: - /bin/sh - -c @@ -75,13 +68,11 @@ spec: ] BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ.get("AUTH_BASE", BASE) MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] SEEDER_USER = os.environ["SEEDER_USER"] - SEEDER_PASS = os.environ["SEEDER_PASS"] ROOM_ALIAS = "#othrys:live.bstein.dev" def mas_admin_token(): @@ -137,19 +128,6 @@ spec: timeout=30, ) - def login(user, password): - r = requests.post( - f"{AUTH_BASE}/_matrix/client/v3/login", - json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }, - timeout=30, - ) - r.raise_for_status() - return r.json()["access_token"] - def resolve_alias(token, alias): headers = {"Authorization": f"Bearer {token}"} enc = urllib.parse.quote(alias) @@ -269,25 +247,48 @@ spec: try: room_id = resolve_alias(seeder_token, ROOM_ALIAS) members, existing = room_members(seeder_token, room_id) - finally: - mas_revoke_session(admin_token, seeder_session) + users = mas_list_users(admin_token) + mas_usernames = set() + for user in users: + attrs = user.get("attributes") or {} + username = attrs.get("username") or "" + if username: + mas_usernames.add(username) + legacy_guest = attrs.get("legacy_guest") + if not username: + continue + if not (legacy_guest or needs_rename_username(username)): + continue + user_id = user_id_for_username(username) + access_token, session_id = mas_personal_session(admin_token, user["id"]) + try: + display = get_displayname(access_token, user_id) + if display and not needs_rename_display(display): + continue + new = None + for _ in range(30): + candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" + if candidate not in existing: + new = candidate + existing.add(candidate) + break + if not new: + continue + set_displayname(access_token, room_id, user_id, new, user_id in members) + finally: + mas_revoke_session(admin_token, session_id) - users = mas_list_users(admin_token) - mas_usernames = set() - for user in users: - attrs = user.get("attributes") or {} - username = attrs.get("username") or "" - if username: - mas_usernames.add(username) - legacy_guest = attrs.get("legacy_guest") - if not username: - continue - if not (legacy_guest or needs_rename_username(username)): - continue - user_id = user_id_for_username(username) - access_token, session_id = mas_personal_session(admin_token, user["id"]) - try: - display = get_displayname(access_token, user_id) + for entry in synapse_list_users(seeder_token): + user_id = entry.get("name") or "" + if not user_id.startswith("@"): + continue + localpart = user_id.split(":", 1)[0].lstrip("@") + if localpart in mas_usernames: + continue + is_guest = entry.get("is_guest") + if not (is_guest or needs_rename_username(localpart)): + continue + display = get_displayname_admin(seeder_token, user_id) if display and not needs_rename_display(display): continue new = None @@ -299,33 +300,8 @@ spec: break if not new: continue - set_displayname(access_token, room_id, user_id, new, user_id in members) - finally: - mas_revoke_session(admin_token, session_id) - - seeder_token = login(SEEDER_USER, SEEDER_PASS) - for entry in synapse_list_users(seeder_token): - user_id = entry.get("name") or "" - if not user_id.startswith("@"): - continue - localpart = user_id.split(":", 1)[0].lstrip("@") - if localpart in mas_usernames: - continue - is_guest = entry.get("is_guest") - if not (is_guest or needs_rename_username(localpart)): - continue - display = get_displayname_admin(seeder_token, user_id) - if display and not needs_rename_display(display): - continue - new = None - for _ in range(30): - candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - if candidate not in existing: - new = candidate - existing.add(candidate) - break - if not new: - continue - if not set_displayname_admin(seeder_token, user_id, new): - continue + if not set_displayname_admin(seeder_token, user_id, new): + continue + finally: + mas_revoke_session(admin_token, seeder_session) PY -- 2.47.2 From 835146bd5b1487e9020a41430921b109b2bdf381 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 05:51:43 -0300 Subject: [PATCH 579/684] comms: rerun MAS local user ensure --- services/comms/mas-local-users-ensure-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml index 6b7f6bf..3f06378 100644 --- a/services/comms/mas-local-users-ensure-job.yaml +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-local-users-ensure-2 + name: mas-local-users-ensure-3 namespace: comms spec: backoffLimit: 1 -- 2.47.2 From 4eb82811b522f7bfc8565ba8bbc5ad8b5bb33f5d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 06:01:45 -0300 Subject: [PATCH 580/684] comms: set MAS user passwords via set-password --- .../comms/mas-local-users-ensure-job.yaml | 25 ++----------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml index 3f06378..111810a 100644 --- a/services/comms/mas-local-users-ensure-job.yaml +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-local-users-ensure-3 + name: mas-local-users-ensure-4 namespace: comms spec: backoffLimit: 1 @@ -124,29 +124,8 @@ spec: return None def update_password(token, user_id, password): - payloads = [ - { - "data": { - "type": "user", - "id": user_id, - "attributes": { - "password": password, - }, - } - }, - {"password": password}, - ] - for payload in payloads: - r = requests.patch( - f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}", - headers={"Authorization": f"Bearer {token}"}, - json=payload, - timeout=30, - ) - if r.status_code in (200, 204): - return True r = requests.post( - f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}/password", + f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}/set-password", headers={"Authorization": f"Bearer {token}"}, json={"password": password}, timeout=30, -- 2.47.2 From d870e97b38e891e3c74ac41b0a0182bd1fef2d5c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 06:05:20 -0300 Subject: [PATCH 581/684] comms: use full user IDs for MAS logins --- services/comms/atlasbot-configmap.yaml | 3 ++- services/comms/atlasbot-deployment.yaml | 2 +- .../comms/mas-local-users-ensure-job.yaml | 6 +++++- services/comms/pin-othrys-job.yaml | 11 +++++++++- services/comms/reset-othrys-room-job.yaml | 21 +++++++++++++------ services/comms/seed-othrys-room.yaml | 11 +++++++++- 6 files changed, 43 insertions(+), 11 deletions(-) diff --git a/services/comms/atlasbot-configmap.yaml b/services/comms/atlasbot-configmap.yaml index 14eb75c..be9640e 100644 --- a/services/comms/atlasbot-configmap.yaml +++ b/services/comms/atlasbot-configmap.yaml @@ -130,9 +130,10 @@ data: return json.loads(raw.decode()) if raw else {} def login() -> str: + login_user = normalize_user_id(USER) payload = { "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": USER}, + "identifier": {"type": "m.id.user", "user": login_user}, "password": PASSWORD, } res = req("POST", "/_matrix/client/v3/login", body=payload, base=AUTH_BASE) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 0d45fe0..4d8bfc7 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: manual-atlasbot-2 + checksum/atlasbot-configmap: manual-atlasbot-3 spec: serviceAccountName: atlasbot nodeSelector: diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml index 111810a..e17043b 100644 --- a/services/comms/mas-local-users-ensure-job.yaml +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -65,6 +65,7 @@ spec: MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") AUTH_BASE = "http://matrix-authentication-service:8080" + SERVER_NAME = "live.bstein.dev" def admin_token(): with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: @@ -140,11 +141,14 @@ spec: if user is None: raise RuntimeError(f"failed to ensure user {username}") update_password(token, user["id"], password) + login_name = username + if not login_name.startswith("@"): + login_name = f"@{login_name}:{SERVER_NAME}" r = requests.post( f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": username}, + "identifier": {"type": "m.id.user", "user": login_name}, "password": password, }, timeout=30, diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index c42c815..3639194 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -50,10 +50,19 @@ spec: def auth(token): return {"Authorization": f"Bearer {token}"} + def canon_user(user): + u = (user or "").strip() + if u.startswith("@") and ":" in u: + return u + u = u.lstrip("@") + if ":" in u: + return f"@{u}" + return f"@{u}:live.bstein.dev" + def login(user, password): r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, + "identifier": {"type": "m.id.user", "user": canon_user(user)}, "password": password, }) r.raise_for_status() diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 7561313..9657626 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -78,12 +78,21 @@ spec: def auth(token): return {"Authorization": f"Bearer {token}"} - def login(user, password): - r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, - "password": password, - }) + def canon_user(user): + u = (user or "").strip() + if u.startswith("@") and ":" in u: + return u + u = u.lstrip("@") + if ":" in u: + return f"@{u}" + return f"@{u}:live.bstein.dev" + + def login(user, password): + r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": canon_user(user)}, + "password": password, + }) if r.status_code != 200: raise SystemExit(f"login failed: {r.status_code} {r.text}") return r.json()["access_token"] diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 5085aa3..901f14d 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -48,10 +48,19 @@ spec: BASE = os.environ["SYNAPSE_BASE"] AUTH_BASE = os.environ.get("AUTH_BASE", BASE) + def canon_user(user): + u = (user or "").strip() + if u.startswith("@") and ":" in u: + return u + u = u.lstrip("@") + if ":" in u: + return f"@{u}" + return f"@{u}:live.bstein.dev" + def login(user, password): r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": user}, + "identifier": {"type": "m.id.user", "user": canon_user(user)}, "password": password, }) if r.status_code != 200: -- 2.47.2 From ffddd71116c6055da4f69c0a7ca9e87ebfc9d1e7 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 06:09:34 -0300 Subject: [PATCH 582/684] comms: make room reset a suspended cronjob --- services/comms/NOTES.md | 3 +- services/comms/reset-othrys-room-job.yaml | 441 ++++++++++++---------- 2 files changed, 235 insertions(+), 209 deletions(-) diff --git a/services/comms/NOTES.md b/services/comms/NOTES.md index e6868fe..f3ba78f 100644 --- a/services/comms/NOTES.md +++ b/services/comms/NOTES.md @@ -17,13 +17,12 @@ Operational jobs - synapse-user-seed-job: seeds atlasbot + othrys-seeder users/passwords. - mas-local-users-ensure-job: ensures MAS local users exist (seeder/bot). - seed-othrys-room: (suspended) creates Othrys + joins locals. -- reset-othrys-room: one-off room reset + pin invite. +- reset-othrys-room: suspended CronJob for a manual room reset + pin invite. - pin-othrys-invite: (suspended) pin invite message if missing. - guest-name-randomizer: renames numeric/guest users to adj-noun names. - bstein-force-leave: one-off room leave cleanup. Manual re-runs -- Bump the job name suffix (e.g., reset-othrys-room-9) to re-run a one-off job. - Unsuspend a CronJob only when needed; re-suspend after completion. Ports diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index 9657626..dd056c3 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -1,82 +1,93 @@ # services/comms/reset-othrys-room-job.yaml apiVersion: batch/v1 -kind: Job +kind: CronJob metadata: - name: othrys-room-reset-8 + name: othrys-room-reset namespace: comms spec: - backoffLimit: 0 - template: + schedule: "0 0 1 1 *" + suspend: true + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: spec: - restartPolicy: Never - containers: - - name: reset - image: python:3.11-slim - env: - - name: SYNAPSE_BASE - value: http://matrix-authentication-service:8080 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - - name: SERVER_NAME - value: live.bstein.dev - - name: ROOM_ALIAS - value: "#othrys:live.bstein.dev" - - name: ROOM_NAME - value: Othrys - - name: PIN_MESSAGE - value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." - - name: SEEDER_USER - value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - - name: BOT_USER - value: atlasbot - command: - - /bin/sh - - -c - - | - set -euo pipefail - pip install --no-cache-dir requests >/dev/null - python - <<'PY' - import os, sys, time, urllib.parse, requests + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: reset + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 + - name: SERVER_NAME + value: live.bstein.dev + - name: ROOM_ALIAS + value: "#othrys:live.bstein.dev" + - name: ROOM_NAME + value: Othrys + - name: PIN_MESSAGE + value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + - name: BOT_USER + value: atlasbot + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import os + import time + import urllib.parse + import requests - BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ.get("AUTH_BASE", BASE) - SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") - ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev") - ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys") - PIN_MESSAGE = os.environ["PIN_MESSAGE"] - SEEDER_USER = os.environ["SEEDER_USER"] - SEEDER_PASS = os.environ["SEEDER_PASS"] - BOT_USER = os.environ["BOT_USER"] + BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) + SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") + ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev") + ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys") + PIN_MESSAGE = os.environ["PIN_MESSAGE"] + SEEDER_USER = os.environ["SEEDER_USER"] + SEEDER_PASS = os.environ["SEEDER_PASS"] + BOT_USER = os.environ["BOT_USER"] - POWER_LEVELS = { - "ban": 50, - "events": { - "m.room.avatar": 50, - "m.room.canonical_alias": 50, - "m.room.encryption": 100, - "m.room.history_visibility": 100, - "m.room.name": 50, - "m.room.power_levels": 100, - "m.room.server_acl": 100, - "m.room.tombstone": 100, - }, - "events_default": 0, - "historical": 100, - "invite": 50, - "kick": 50, - "m.call.invite": 50, - "redact": 50, - "state_default": 50, - "users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100}, - "users_default": 0, - } + POWER_LEVELS = { + "ban": 50, + "events": { + "m.room.avatar": 50, + "m.room.canonical_alias": 50, + "m.room.encryption": 100, + "m.room.history_visibility": 100, + "m.room.name": 50, + "m.room.power_levels": 100, + "m.room.server_acl": 100, + "m.room.tombstone": 100, + }, + "events_default": 0, + "historical": 100, + "invite": 50, + "kick": 50, + "m.call.invite": 50, + "redact": 50, + "state_default": 50, + "users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100}, + "users_default": 0, + } - def auth(token): return {"Authorization": f"Bearer {token}"} + def auth(token): + return {"Authorization": f"Bearer {token}"} def canon_user(user): u = (user or "").strip() @@ -85,155 +96,171 @@ spec: u = u.lstrip("@") if ":" in u: return f"@{u}" - return f"@{u}:live.bstein.dev" + return f"@{u}:{SERVER_NAME}" def login(user, password): - r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": canon_user(user)}, - "password": password, - }) - if r.status_code != 200: - raise SystemExit(f"login failed: {r.status_code} {r.text}") - return r.json()["access_token"] + r = requests.post( + f"{AUTH_BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": canon_user(user)}, + "password": password, + }, + ) + if r.status_code != 200: + raise SystemExit(f"login failed: {r.status_code} {r.text}") + return r.json()["access_token"] - def resolve_alias(token, alias): - enc = urllib.parse.quote(alias) - r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) - if r.status_code == 404: - return None - r.raise_for_status() - return r.json()["room_id"] + def resolve_alias(token, alias): + enc = urllib.parse.quote(alias) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) + if r.status_code == 404: + return None + r.raise_for_status() + return r.json()["room_id"] - def create_room(token): - r = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=auth(token), json={ - "preset": "public_chat", - "name": ROOM_NAME, - "room_version": "11", - }) - r.raise_for_status() - return r.json()["room_id"] + def create_room(token): + r = requests.post( + f"{BASE}/_matrix/client/v3/createRoom", + headers=auth(token), + json={ + "preset": "public_chat", + "name": ROOM_NAME, + "room_version": "11", + }, + ) + r.raise_for_status() + return r.json()["room_id"] - def put_state(token, room_id, ev_type, content): - r = requests.put( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}", - headers=auth(token), - json=content, - ) - r.raise_for_status() + def put_state(token, room_id, ev_type, content): + r = requests.put( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}", + headers=auth(token), + json=content, + ) + r.raise_for_status() - def set_directory_visibility(token, room_id, visibility): - r = requests.put( - f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}", - headers=auth(token), - json={"visibility": visibility}, - ) - r.raise_for_status() + def set_directory_visibility(token, room_id, visibility): + r = requests.put( + f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}", + headers=auth(token), + json={"visibility": visibility}, + ) + r.raise_for_status() - def delete_alias(token, alias): - enc = urllib.parse.quote(alias) - r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) - if r.status_code in (200, 202, 404): - return - r.raise_for_status() + def delete_alias(token, alias): + enc = urllib.parse.quote(alias) + r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) + if r.status_code in (200, 202, 404): + return + r.raise_for_status() - def put_alias(token, alias, room_id): - enc = urllib.parse.quote(alias) - r = requests.put( - f"{BASE}/_matrix/client/v3/directory/room/{enc}", - headers=auth(token), - json={"room_id": room_id}, - ) - r.raise_for_status() + def put_alias(token, alias, room_id): + enc = urllib.parse.quote(alias) + r = requests.put( + f"{BASE}/_matrix/client/v3/directory/room/{enc}", + headers=auth(token), + json={"room_id": room_id}, + ) + r.raise_for_status() - def list_joined_members(token, room_id): - r = requests.get( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join", - headers=auth(token), - ) - r.raise_for_status() - members = [] - for ev in r.json().get("chunk", []): - if ev.get("type") != "m.room.member": + def list_joined_members(token, room_id): + r = requests.get( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join", + headers=auth(token), + ) + r.raise_for_status() + members = [] + for ev in r.json().get("chunk", []): + if ev.get("type") != "m.room.member": + continue + uid = ev.get("state_key") + if not isinstance(uid, str) or not uid.startswith("@"): + continue + members.append(uid) + return members + + def invite_user(token, room_id, user_id): + r = requests.post( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite", + headers=auth(token), + json={"user_id": user_id}, + ) + if r.status_code in (200, 202): + return + r.raise_for_status() + + def send_message(token, room_id, body): + r = requests.post( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", + headers=auth(token), + json={"msgtype": "m.text", "body": body}, + ) + r.raise_for_status() + return r.json()["event_id"] + + def login_with_retry(): + last = None + for attempt in range(1, 6): + try: + return login(SEEDER_USER, SEEDER_PASS) + except Exception as exc: # noqa: BLE001 + last = exc + time.sleep(attempt * 2) + raise last + + token = login_with_retry() + + old_room_id = resolve_alias(token, ROOM_ALIAS) + if not old_room_id: + raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed") + + new_room_id = create_room(token) + + # Configure the new room. + put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"}) + put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"}) + put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"}) + put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS) + + # Move the alias. + delete_alias(token, ROOM_ALIAS) + put_alias(token, ROOM_ALIAS, new_room_id) + put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS}) + + set_directory_visibility(token, new_room_id, "public") + + # Invite the bot and all joined members of the old room. + bot_user_id = f"@{BOT_USER}:{SERVER_NAME}" + invite_user(token, new_room_id, bot_user_id) + for uid in list_joined_members(token, old_room_id): + if uid == f"@{SEEDER_USER}:{SERVER_NAME}": continue - uid = ev.get("state_key") - if not isinstance(uid, str) or not uid.startswith("@"): + localpart = uid.split(":", 1)[0].lstrip("@") + if localpart.isdigit(): continue - members.append(uid) - return members + invite_user(token, new_room_id, uid) - def invite_user(token, room_id, user_id): - r = requests.post( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite", - headers=auth(token), - json={"user_id": user_id}, + # Pin the guest invite message in the new room. + event_id = send_message(token, new_room_id, PIN_MESSAGE) + put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]}) + + # De-list and tombstone the old room. + set_directory_visibility(token, old_room_id, "private") + put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"}) + put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"}) + put_state( + token, + old_room_id, + "m.room.tombstone", + {"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id}, ) - if r.status_code in (200, 202): - return - r.raise_for_status() - - def send_message(token, room_id, body): - r = requests.post( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", - headers=auth(token), - json={"msgtype": "m.text", "body": body}, + send_message( + token, + old_room_id, + "Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join", ) - r.raise_for_status() - return r.json()["event_id"] - def login_with_retry(): - last = None - for attempt in range(1, 6): - try: - return login(SEEDER_USER, SEEDER_PASS) - except Exception as exc: # noqa: BLE001 - last = exc - time.sleep(attempt * 2) - raise last - - token = login_with_retry() - - old_room_id = resolve_alias(token, ROOM_ALIAS) - if not old_room_id: - raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed") - - new_room_id = create_room(token) - - # Configure the new room. - put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"}) - put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"}) - put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"}) - put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS) - - # Move the alias. - delete_alias(token, ROOM_ALIAS) - put_alias(token, ROOM_ALIAS, new_room_id) - put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS}) - - set_directory_visibility(token, new_room_id, "public") - - # Invite the bot and all joined members of the old room. - bot_user_id = f"@{BOT_USER}:{SERVER_NAME}" - invite_user(token, new_room_id, bot_user_id) - for uid in list_joined_members(token, old_room_id): - if uid == f"@{SEEDER_USER}:{SERVER_NAME}": - continue - localpart = uid.split(":", 1)[0].lstrip("@") - if localpart.isdigit(): - continue - invite_user(token, new_room_id, uid) - - # Pin the guest invite message in the new room. - event_id = send_message(token, new_room_id, PIN_MESSAGE) - put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]}) - - # De-list and tombstone the old room. - set_directory_visibility(token, old_room_id, "private") - put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"}) - put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"}) - put_state(token, old_room_id, "m.room.tombstone", {"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id}) - send_message(token, old_room_id, "Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join") - - print(f"old_room_id={old_room_id}") - print(f"new_room_id={new_room_id}") - PY + print(f"old_room_id={old_room_id}") + print(f"new_room_id={new_room_id}") + PY -- 2.47.2 From 70a707872eda11bca114b9cdd8a8c168cc2c883c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 06:11:47 -0300 Subject: [PATCH 583/684] comms: rerun MAS local user ensure (v5) --- services/comms/mas-local-users-ensure-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml index e17043b..e462426 100644 --- a/services/comms/mas-local-users-ensure-job.yaml +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-local-users-ensure-4 + name: mas-local-users-ensure-5 namespace: comms spec: backoffLimit: 1 -- 2.47.2 From b86800cd6dc23ee22862dd5e905df35bbfc8b3f8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 06:14:32 -0300 Subject: [PATCH 584/684] comms: skip synapse admin list on 403 --- services/comms/guest-name-job.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index b209cf2..40b0a52 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -278,7 +278,12 @@ spec: finally: mas_revoke_session(admin_token, session_id) - for entry in synapse_list_users(seeder_token): + try: + entries = synapse_list_users(seeder_token) + except Exception as exc: # noqa: BLE001 + print(f"synapse admin list skipped: {exc}") + entries = [] + for entry in entries: user_id = entry.get("name") or "" if not user_id.startswith("@"): continue -- 2.47.2 From 59305ca27ce37ea9b327d409cf4d83dd82692167 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 11:56:35 -0300 Subject: [PATCH 585/684] comms: mint guest tokens via MAS login --- services/comms/NOTES.md | 2 +- services/comms/guest-register-configmap.yaml | 44 ++++++++++++------- services/comms/guest-register-deployment.yaml | 2 - 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/services/comms/NOTES.md b/services/comms/NOTES.md index f3ba78f..39898da 100644 --- a/services/comms/NOTES.md +++ b/services/comms/NOTES.md @@ -7,7 +7,7 @@ Core flow - Synapse is the homeserver; MAS fronts login, Synapse serves client/server APIs. - Element Web provides the main UI; Element Call embeds LiveKit for group video. - LiveKit handles SFU media; Coturn provides TURN for NAT traversal. -- matrix-guest-register provides guest accounts + guest sessions (no Keycloak). +- matrix-guest-register provisions MAS guest accounts and performs MAS password login to mint device-bound guest tokens (no Keycloak). Operational jobs - mas-db-ensure-job: ensures MAS database role/database + secret in comms. diff --git a/services/comms/guest-register-configmap.yaml b/services/comms/guest-register-configmap.yaml index b5bc803..a40d52c 100644 --- a/services/comms/guest-register-configmap.yaml +++ b/services/comms/guest-register-configmap.yaml @@ -21,8 +21,6 @@ data: MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") - SESSION_TTL_SEC = int(os.environ.get("SESSION_TTL_SEC", "43200")) - RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) RATE_MAX = int(os.environ.get("RATE_MAX", "30")) _rate = {} # ip -> [window_start, count] @@ -126,21 +124,30 @@ data: user = payload.get("data") or {} return status, user.get("id") - def _create_session(admin_token, user_id, scope): - status, payload = _admin_api( + def _set_password(admin_token, user_id, password): + status, _payload = _admin_api( admin_token, "POST", - "/personal-sessions", - { - "actor_user_id": user_id, - "human_name": "guest session", - "scope": scope, - "expires_in": SESSION_TTL_SEC, - }, + f"/users/{parse.quote(user_id)}/set-password", + {"password": password}, ) - if status != 201: - return None - return (payload.get("data", {}).get("attributes", {}) or {}).get("access_token") + return status in (200, 204) + + def _login_password(username, password): + payload = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": f"@{username}:{SERVER_NAME}"}, + "password": password, + } + status, data = _json( + "POST", + f"{MAS_BASE}/_matrix/client/v3/login", + body=payload, + timeout=20, + ) + if status != 200: + return None, None + return data.get("access_token"), data.get("device_id") def _set_display_name(access_token, user_id, displayname): _json( @@ -235,9 +242,12 @@ data: if not mas_user_id or not localpart: raise RuntimeError("add_user_failed") - access_token = _create_session(admin_token, mas_user_id, "urn:matrix:client:api:*") + password = secrets.token_urlsafe(18) + if not _set_password(admin_token, mas_user_id, password): + raise RuntimeError("set_password_failed") + access_token, device_id = _login_password(localpart, password) if not access_token: - raise RuntimeError("session_failed") + raise RuntimeError("login_failed") try: _set_display_name(access_token, f"@{localpart}:{SERVER_NAME}", displayname) except Exception: @@ -248,7 +258,7 @@ data: resp = { "user_id": f"@{localpart}:{SERVER_NAME}", "access_token": access_token, - "device_id": "guest_device", + "device_id": device_id or "guest_device", "home_server": SERVER_NAME, } return self._send_json(200, resp) diff --git a/services/comms/guest-register-deployment.yaml b/services/comms/guest-register-deployment.yaml index a9dd675..284cc42 100644 --- a/services/comms/guest-register-deployment.yaml +++ b/services/comms/guest-register-deployment.yaml @@ -47,8 +47,6 @@ spec: value: http://matrix-authentication-service:8081/api/admin/v1 - name: SYNAPSE_BASE value: http://othrys-synapse-matrix-synapse:8008 - - name: SESSION_TTL_SEC - value: "43200" - name: MATRIX_SERVER_NAME value: live.bstein.dev - name: RATE_WINDOW_SEC -- 2.47.2 From 831f3684937727d94b58079a0117ed649b85a426 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 11:59:51 -0300 Subject: [PATCH 586/684] comms: rename numeric guests via db --- services/comms/guest-name-job.yaml | 63 +++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index 40b0a52..d1466e5 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -42,12 +42,25 @@ spec: value: http://matrix-authentication-service:8080/oauth2/token - name: SEEDER_USER value: othrys-seeder + - name: PGHOST + value: postgres-service.postgres.svc.cluster.local + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: synapse + - name: PGUSER + value: synapse + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: synapse-db + key: POSTGRES_PASSWORD command: - /bin/sh - -c - | set -euo pipefail - pip install --no-cache-dir requests >/dev/null + pip install --no-cache-dir requests psycopg2-binary >/dev/null python - <<'PY' import base64 import os @@ -55,6 +68,7 @@ spec: import requests import time import urllib.parse + import psycopg2 ADJ = [ "brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty", @@ -74,6 +88,7 @@ spec: MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] SEEDER_USER = os.environ["SEEDER_USER"] ROOM_ALIAS = "#othrys:live.bstein.dev" + SERVER_NAME = "live.bstein.dev" def mas_admin_token(): with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: @@ -241,6 +256,51 @@ spec: def needs_rename_display(display): return not display or display.isdigit() or display.startswith("guest-") + def db_rename_numeric(existing_names): + user_ids = [] + profiles = {} + conn = psycopg2.connect( + host=os.environ["PGHOST"], + port=int(os.environ["PGPORT"]), + dbname=os.environ["PGDATABASE"], + user=os.environ["PGUSER"], + password=os.environ["PGPASSWORD"], + ) + try: + with conn: + with conn.cursor() as cur: + cur.execute( + "SELECT name FROM users WHERE name ~ %s", + (f\"^@\\\\d+:{SERVER_NAME}$\",), + ) + user_ids = [row[0] for row in cur.fetchall()] + if not user_ids: + return + cur.execute( + \"SELECT user_id, displayname FROM profiles WHERE user_id = ANY(%s)\", + (user_ids,), + ) + profiles = {row[0]: row[1] for row in cur.fetchall()} + for user_id in user_ids: + display = profiles.get(user_id) + if display and not needs_rename_display(display): + continue + new = None + for _ in range(30): + candidate = f\"{random.choice(ADJ)}-{random.choice(NOUN)}\" + if candidate not in existing_names: + new = candidate + existing_names.add(candidate) + break + if not new: + continue + cur.execute( + \"INSERT INTO profiles (user_id, displayname) VALUES (%s, %s) ON CONFLICT (user_id) DO UPDATE SET displayname = EXCLUDED.displayname\", + (user_id, new), + ) + finally: + conn.close() + admin_token = mas_admin_token() seeder_id = mas_user_id(admin_token, SEEDER_USER) seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id) @@ -307,6 +367,7 @@ spec: continue if not set_displayname_admin(seeder_token, user_id, new): continue + db_rename_numeric(existing) finally: mas_revoke_session(admin_token, seeder_session) PY -- 2.47.2 From d8c3bb2f1b458675b8430990a1c67f49a0cb8a28 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 12:03:53 -0300 Subject: [PATCH 587/684] comms: fix guest renamer db sql quoting --- services/comms/guest-name-job.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index d1466e5..f31f949 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -271,13 +271,13 @@ spec: with conn.cursor() as cur: cur.execute( "SELECT name FROM users WHERE name ~ %s", - (f\"^@\\\\d+:{SERVER_NAME}$\",), + (f"^@\\d+:{SERVER_NAME}$",), ) user_ids = [row[0] for row in cur.fetchall()] if not user_ids: return cur.execute( - \"SELECT user_id, displayname FROM profiles WHERE user_id = ANY(%s)\", + "SELECT user_id, displayname FROM profiles WHERE user_id = ANY(%s)", (user_ids,), ) profiles = {row[0]: row[1] for row in cur.fetchall()} @@ -287,7 +287,7 @@ spec: continue new = None for _ in range(30): - candidate = f\"{random.choice(ADJ)}-{random.choice(NOUN)}\" + candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" if candidate not in existing_names: new = candidate existing_names.add(candidate) @@ -295,7 +295,7 @@ spec: if not new: continue cur.execute( - \"INSERT INTO profiles (user_id, displayname) VALUES (%s, %s) ON CONFLICT (user_id) DO UPDATE SET displayname = EXCLUDED.displayname\", + "INSERT INTO profiles (user_id, displayname) VALUES (%s, %s) ON CONFLICT (user_id) DO UPDATE SET displayname = EXCLUDED.displayname", (user_id, new), ) finally: -- 2.47.2 From 1cce3048723a7c11698441b8e40748856b5d35a1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 12:07:46 -0300 Subject: [PATCH 588/684] comms: include full_user_id when renaming --- services/comms/guest-name-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index f31f949..44d3090 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -295,8 +295,8 @@ spec: if not new: continue cur.execute( - "INSERT INTO profiles (user_id, displayname) VALUES (%s, %s) ON CONFLICT (user_id) DO UPDATE SET displayname = EXCLUDED.displayname", - (user_id, new), + "INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) ON CONFLICT (user_id) DO UPDATE SET displayname = EXCLUDED.displayname", + (user_id, new, user_id), ) finally: conn.close() -- 2.47.2 From 41a762d6a6f02f0733cd8c770ddc02e5846e3273 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 12:12:08 -0300 Subject: [PATCH 589/684] comms: update numeric guest rename logic --- services/comms/guest-name-job.yaml | 56 ++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index 44d3090..6179967 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -257,8 +257,9 @@ spec: return not display or display.isdigit() or display.startswith("guest-") def db_rename_numeric(existing_names): - user_ids = [] - profiles = {} + profile_rows = [] + profile_index = {} + users = [] conn = psycopg2.connect( host=os.environ["PGHOST"], port=int(os.environ["PGPORT"]), @@ -270,19 +271,12 @@ spec: with conn: with conn.cursor() as cur: cur.execute( - "SELECT name FROM users WHERE name ~ %s", + "SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s", (f"^@\\d+:{SERVER_NAME}$",), ) - user_ids = [row[0] for row in cur.fetchall()] - if not user_ids: - return - cur.execute( - "SELECT user_id, displayname FROM profiles WHERE user_id = ANY(%s)", - (user_ids,), - ) - profiles = {row[0]: row[1] for row in cur.fetchall()} - for user_id in user_ids: - display = profiles.get(user_id) + profile_rows = cur.fetchall() + profile_index = {row[1]: row for row in profile_rows} + for user_id, full_user_id, display in profile_rows: if display and not needs_rename_display(display): continue new = None @@ -295,8 +289,40 @@ spec: if not new: continue cur.execute( - "INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) ON CONFLICT (user_id) DO UPDATE SET displayname = EXCLUDED.displayname", - (user_id, new, user_id), + "UPDATE profiles SET displayname = %s WHERE full_user_id = %s", + (new, full_user_id), + ) + + cur.execute( + "SELECT name FROM users WHERE name ~ %s", + (f"^@\\d+:{SERVER_NAME}$",), + ) + users = [row[0] for row in cur.fetchall()] + if not users: + return + cur.execute( + "SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)", + (users,), + ) + for existing_full in cur.fetchall(): + profile_index.setdefault(existing_full[1], existing_full) + + for full_user_id in users: + if full_user_id in profile_index: + continue + localpart = full_user_id.split(":", 1)[0].lstrip("@") + new = None + for _ in range(30): + candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" + if candidate not in existing_names: + new = candidate + existing_names.add(candidate) + break + if not new: + continue + cur.execute( + "INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s)", + (localpart, new, full_user_id), ) finally: conn.close() -- 2.47.2 From a272a219a41995788c26719714d7675646380633 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 12:15:59 -0300 Subject: [PATCH 590/684] comms: serialize guest renamer inserts --- services/comms/guest-name-job.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index 6179967..156617d 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -7,6 +7,7 @@ metadata: spec: schedule: "*/1 * * * *" suspend: false + concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 1 jobTemplate: @@ -321,7 +322,8 @@ spec: if not new: continue cur.execute( - "INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s)", + "INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) " + "ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname", (localpart, new, full_user_id), ) finally: -- 2.47.2 From a57448f074a1cf874e03b4d417b6f7cc5db419db Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 12:44:00 -0300 Subject: [PATCH 591/684] comms: kick numeric members from Othrys --- services/comms/kustomization.yaml | 1 + services/comms/othrys-kick-numeric-job.yaml | 115 ++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 services/comms/othrys-kick-numeric-job.yaml diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 9f95958..6b69c1e 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -33,6 +33,7 @@ resources: - coturn.yaml - seed-othrys-room.yaml - guest-name-job.yaml + - othrys-kick-numeric-job.yaml - pin-othrys-job.yaml - reset-othrys-room-job.yaml - bstein-force-leave-job.yaml diff --git a/services/comms/othrys-kick-numeric-job.yaml b/services/comms/othrys-kick-numeric-job.yaml new file mode 100644 index 0000000..8f02bbb --- /dev/null +++ b/services/comms/othrys-kick-numeric-job.yaml @@ -0,0 +1,115 @@ +# services/comms/othrys-kick-numeric-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: othrys-kick-numeric-1 + namespace: comms +spec: + backoffLimit: 0 + template: + spec: + restartPolicy: Never + containers: + - name: kick + image: python:3.11-slim + env: + - name: SYNAPSE_BASE + value: http://othrys-synapse-matrix-synapse:8008 + - name: AUTH_BASE + value: http://matrix-authentication-service:8080 + - name: SERVER_NAME + value: live.bstein.dev + - name: ROOM_ALIAS + value: "#othrys:live.bstein.dev" + - name: SEEDER_USER + value: othrys-seeder + - name: SEEDER_PASS + valueFrom: + secretKeyRef: + name: atlasbot-credentials-runtime + key: seeder-password + command: + - /bin/sh + - -c + - | + set -euo pipefail + pip install --no-cache-dir requests >/dev/null + python - <<'PY' + import os + import urllib.parse + import requests + + BASE = os.environ["SYNAPSE_BASE"] + AUTH_BASE = os.environ.get("AUTH_BASE", BASE) + SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") + ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev") + SEEDER_USER = os.environ["SEEDER_USER"] + SEEDER_PASS = os.environ["SEEDER_PASS"] + + def canon_user(user): + u = (user or "").strip() + if u.startswith("@") and ":" in u: + return u + u = u.lstrip("@") + if ":" in u: + return f"@{u}" + return f"@{u}:{SERVER_NAME}" + + def auth(token): + return {"Authorization": f"Bearer {token}"} + + def login(user, password): + r = requests.post( + f"{AUTH_BASE}/_matrix/client/v3/login", + json={ + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": canon_user(user)}, + "password": password, + }, + timeout=30, + ) + r.raise_for_status() + return r.json()["access_token"] + + def resolve_alias(token, alias): + enc = urllib.parse.quote(alias) + r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token), timeout=30) + r.raise_for_status() + return r.json()["room_id"] + + def list_members(token, room_id): + r = requests.get( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join", + headers=auth(token), + timeout=30, + ) + r.raise_for_status() + members = [] + for ev in r.json().get("chunk", []): + uid = ev.get("state_key") + if isinstance(uid, str) and uid.startswith("@"): + members.append(uid) + return members + + def is_numeric(user_id): + localpart = user_id.split(":", 1)[0].lstrip("@") + return localpart.isdigit() + + def kick(token, room_id, user_id): + r = requests.post( + f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/kick", + headers=auth(token), + json={"user_id": user_id, "reason": "cleanup numeric guest"}, + timeout=30, + ) + if r.status_code not in (200, 202): + raise SystemExit(f"kick {user_id} failed: {r.status_code} {r.text}") + + token = login(SEEDER_USER, SEEDER_PASS) + room_id = resolve_alias(token, ROOM_ALIAS) + for user_id in list_members(token, room_id): + if user_id == canon_user(SEEDER_USER): + continue + if is_numeric(user_id): + kick(token, room_id, user_id) + PY -- 2.47.2 From 1027fe5ce5b3a71e61fcc24913a84d6c6051c880 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 22:31:45 -0300 Subject: [PATCH 592/684] logging: add loki and fluent-bit --- .../atlas/applications/kustomization.yaml | 1 + .../flux-system/platform/kustomization.yaml | 1 + .../platform/logging/kustomization.yaml | 14 +++ infrastructure/sources/helm/fluent-bit.yaml | 9 ++ .../sources/helm/kustomization.yaml | 1 + services/logging/fluent-bit-helmrelease.yaml | 99 +++++++++++++++++ services/logging/ingress.yaml | 25 +++++ services/logging/kustomization.yaml | 9 ++ services/logging/loki-helmrelease.yaml | 59 ++++++++++ services/logging/namespace.yaml | 5 + services/logging/oauth2-proxy.yaml | 101 ++++++++++++++++++ services/monitoring/helmrelease.yaml | 7 ++ 12 files changed, 331 insertions(+) create mode 100644 clusters/atlas/flux-system/platform/logging/kustomization.yaml create mode 100644 infrastructure/sources/helm/fluent-bit.yaml create mode 100644 services/logging/fluent-bit-helmrelease.yaml create mode 100644 services/logging/ingress.yaml create mode 100644 services/logging/kustomization.yaml create mode 100644 services/logging/loki-helmrelease.yaml create mode 100644 services/logging/namespace.yaml create mode 100644 services/logging/oauth2-proxy.yaml diff --git a/clusters/atlas/applications/kustomization.yaml b/clusters/atlas/applications/kustomization.yaml index f5c64e8..ed6d795 100644 --- a/clusters/atlas/applications/kustomization.yaml +++ b/clusters/atlas/applications/kustomization.yaml @@ -7,6 +7,7 @@ resources: - ../../services/jellyfin - ../../services/comms - ../../services/monitoring + - ../../services/logging - ../../services/pegasus - ../../services/vault - ../../services/bstein-dev-home diff --git a/clusters/atlas/flux-system/platform/kustomization.yaml b/clusters/atlas/flux-system/platform/kustomization.yaml index e1c5d23..df226e2 100644 --- a/clusters/atlas/flux-system/platform/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/kustomization.yaml @@ -8,5 +8,6 @@ resources: - traefik/kustomization.yaml - gitops-ui/kustomization.yaml - monitoring/kustomization.yaml + - logging/kustomization.yaml - longhorn-ui/kustomization.yaml - ../platform/vault-csi/kustomization.yaml diff --git a/clusters/atlas/flux-system/platform/logging/kustomization.yaml b/clusters/atlas/flux-system/platform/logging/kustomization.yaml new file mode 100644 index 0000000..c51eb5e --- /dev/null +++ b/clusters/atlas/flux-system/platform/logging/kustomization.yaml @@ -0,0 +1,14 @@ +# clusters/atlas/flux-system/platform/logging/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: logging + namespace: flux-system +spec: + interval: 10m + path: ./services/logging + prune: true + sourceRef: + kind: GitRepository + name: flux-system + wait: false diff --git a/infrastructure/sources/helm/fluent-bit.yaml b/infrastructure/sources/helm/fluent-bit.yaml new file mode 100644 index 0000000..b4cb214 --- /dev/null +++ b/infrastructure/sources/helm/fluent-bit.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/fluent-bit.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: fluent + namespace: flux-system +spec: + interval: 1h + url: https://fluent.github.io/helm-charts diff --git a/infrastructure/sources/helm/kustomization.yaml b/infrastructure/sources/helm/kustomization.yaml index 7b2163b..1cbf20e 100644 --- a/infrastructure/sources/helm/kustomization.yaml +++ b/infrastructure/sources/helm/kustomization.yaml @@ -2,6 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - fluent-bit.yaml - grafana.yaml - hashicorp.yaml - jetstack.yaml diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml new file mode 100644 index 0000000..7e0614d --- /dev/null +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -0,0 +1,99 @@ +# services/logging/fluent-bit-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: fluent-bit + namespace: logging +spec: + interval: 15m + chart: + spec: + chart: fluent-bit + version: "~0.46.0" + sourceRef: + kind: HelmRepository + name: fluent + namespace: flux-system + values: + serviceAccount: + create: true + rbac: + create: true + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + extraVolumes: + - name: runlogjournal + hostPath: + path: /run/log/journal + - name: varlogjournal + hostPath: + path: /var/log/journal + extraVolumeMounts: + - name: runlogjournal + mountPath: /run/log/journal + readOnly: true + - name: varlogjournal + mountPath: /var/log/journal + readOnly: true + config: + service: | + [SERVICE] + Flush 1 + Log_Level info + Daemon Off + Parsers_File parsers.conf + Parsers_File custom_parsers.conf + HTTP_Server On + HTTP_Listen 0.0.0.0 + HTTP_Port 2020 + inputs: | + [INPUT] + Name tail + Tag kube.* + Path /var/log/containers/*.log + Parser cri + Mem_Buf_Limit 50MB + Skip_Long_Lines On + Refresh_Interval 10 + Rotate_Wait 30 + storage.type memory + + [INPUT] + Name systemd + Tag journald.* + Read_From_Tail On + storage.type memory + filters: | + [FILTER] + Name kubernetes + Match kube.* + Kube_URL https://kubernetes.default.svc:443 + Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token + Merge_Log On + Keep_Log Off + K8S-Logging.Parser On + K8S-Logging.Exclude On + outputs: | + [OUTPUT] + Name loki + Match kube.* + Host loki.logging.svc.cluster.local + Port 3100 + Labels {job="fluent-bit", namespace="$kubernetes['namespace_name']", pod="$kubernetes['pod_name']", container="$kubernetes['container_name']"} + LabelKeys stream + Line_Format json + + [OUTPUT] + Name loki + Match journald.* + Host loki.logging.svc.cluster.local + Port 3100 + Labels {job="systemd"} + LabelKeys _SYSTEMD_UNIT,_HOSTNAME,SYSLOG_IDENTIFIER + Line_Format json diff --git a/services/logging/ingress.yaml b/services/logging/ingress.yaml new file mode 100644 index 0000000..f3211b2 --- /dev/null +++ b/services/logging/ingress.yaml @@ -0,0 +1,25 @@ +# services/logging/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: loki + namespace: logging + annotations: + cert-manager.io/cluster-issuer: letsencrypt +spec: + ingressClassName: traefik + tls: + - hosts: + - logs.bstein.dev + secretName: logs-tls + rules: + - host: logs.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: oauth2-proxy-loki + port: + name: http diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml new file mode 100644 index 0000000..476d88b --- /dev/null +++ b/services/logging/kustomization.yaml @@ -0,0 +1,9 @@ +# services/logging/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - loki-helmrelease.yaml + - fluent-bit-helmrelease.yaml + - oauth2-proxy.yaml + - ingress.yaml diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml new file mode 100644 index 0000000..a1ce519 --- /dev/null +++ b/services/logging/loki-helmrelease.yaml @@ -0,0 +1,59 @@ +# services/logging/loki-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: loki + namespace: logging +spec: + interval: 15m + chart: + spec: + chart: loki + version: "~6.6.0" + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + values: + fullnameOverride: loki + deploymentMode: SingleBinary + loki: + auth_enabled: false + commonConfig: + replication_factor: 1 + storage: + type: filesystem + storageConfig: + filesystem: + directory: /var/loki/chunks + tsdb_shipper: + active_index_directory: /var/loki/index + cache_location: /var/loki/index_cache + schemaConfig: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: loki_index_ + period: 24h + compactor: + working_directory: /var/loki/compactor + shared_store: filesystem + retention_enabled: true + delete_request_store: filesystem + limits_config: + retention_period: 4320h + reject_old_samples: true + reject_old_samples_max_age: 168h + singleBinary: + replicas: 1 + persistence: + enabled: true + size: 200Gi + storageClass: asteria + service: + type: ClusterIP + ingress: + enabled: false diff --git a/services/logging/namespace.yaml b/services/logging/namespace.yaml new file mode 100644 index 0000000..ac585d9 --- /dev/null +++ b/services/logging/namespace.yaml @@ -0,0 +1,5 @@ +# services/logging/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: logging diff --git a/services/logging/oauth2-proxy.yaml b/services/logging/oauth2-proxy.yaml new file mode 100644 index 0000000..ac6accc --- /dev/null +++ b/services/logging/oauth2-proxy.yaml @@ -0,0 +1,101 @@ +# services/logging/oauth2-proxy.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy-loki + namespace: logging + labels: + app: oauth2-proxy-loki +spec: + ports: + - name: http + port: 80 + targetPort: 4180 + selector: + app: oauth2-proxy-loki + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy-loki + namespace: logging + labels: + app: oauth2-proxy-loki +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy-loki + template: + metadata: + labels: + app: oauth2-proxy-loki + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5","rpi4"] + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + imagePullPolicy: IfNotPresent + args: + - --provider=oidc + - --redirect-url=https://logs.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --insecure-oidc-allow-unverified-email=true + - --upstream=http://loki.logging.svc.cluster.local:3100 + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + - --cookie-domain=logs.bstein.dev + env: + - name: OAUTH2_PROXY_CLIENT_ID + valueFrom: + secretKeyRef: + name: oauth2-proxy-loki-oidc + key: client_id + - name: OAUTH2_PROXY_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-loki-oidc + key: client_secret + - name: OAUTH2_PROXY_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: oauth2-proxy-loki-oidc + key: cookie_secret + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index ddd24e5..873a323 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -320,6 +320,13 @@ spec: timeInterval: "15s" uid: atlas-vm orgId: 2 + - name: Loki + type: loki + access: proxy + url: http://loki.logging.svc.cluster.local:3100 + isDefault: false + uid: atlas-loki + orgId: 1 dashboardProviders: dashboardproviders.yaml: apiVersion: 1 -- 2.47.2 From 1fd4a426b4bc5ddd4bd274e00e284fbd0616c830 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 22:33:27 -0300 Subject: [PATCH 593/684] logging: fix loki single-binary mode --- services/logging/loki-helmrelease.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index a1ce519..eb0298b 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -47,6 +47,12 @@ spec: retention_period: 4320h reject_old_samples: true reject_old_samples_max_age: 168h + read: + replicas: 0 + write: + replicas: 0 + backend: + replicas: 0 singleBinary: replicas: 1 persistence: -- 2.47.2 From e5d04f2bcf7548995fd829aefe76cca619462d78 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 22:40:02 -0300 Subject: [PATCH 594/684] logging: fix loki config and fluent-bit output --- services/logging/fluent-bit-helmrelease.yaml | 12 ++++++------ services/logging/loki-helmrelease.yaml | 10 +++++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index 7e0614d..e257d9a 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -85,15 +85,15 @@ spec: Match kube.* Host loki.logging.svc.cluster.local Port 3100 - Labels {job="fluent-bit", namespace="$kubernetes['namespace_name']", pod="$kubernetes['pod_name']", container="$kubernetes['container_name']"} - LabelKeys stream - Line_Format json + labels {job="fluent-bit", namespace="$kubernetes['namespace_name']", pod="$kubernetes['pod_name']", container="$kubernetes['container_name']"} + label_keys stream + line_format json [OUTPUT] Name loki Match journald.* Host loki.logging.svc.cluster.local Port 3100 - Labels {job="systemd"} - LabelKeys _SYSTEMD_UNIT,_HOSTNAME,SYSLOG_IDENTIFIER - Line_Format json + labels {job="systemd"} + label_keys _SYSTEMD_UNIT,_HOSTNAME,SYSLOG_IDENTIFIER + line_format json diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index eb0298b..e2a1235 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -31,7 +31,7 @@ spec: cache_location: /var/loki/index_cache schemaConfig: configs: - - from: 2024-01-01 + - from: "2024-01-01" store: tsdb object_store: filesystem schema: v13 @@ -59,6 +59,14 @@ spec: enabled: true size: 200Gi storageClass: asteria + gateway: + enabled: false + chunksCache: + enabled: false + resultsCache: + enabled: false + lokiCanary: + enabled: false service: type: ClusterIP ingress: -- 2.47.2 From 0463c2bf60a776d8cac3e23a3e7f71525638869c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 22:41:07 -0300 Subject: [PATCH 595/684] logging: drop fluent-bit label_keys --- services/logging/fluent-bit-helmrelease.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index e257d9a..4a57425 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -86,7 +86,6 @@ spec: Host loki.logging.svc.cluster.local Port 3100 labels {job="fluent-bit", namespace="$kubernetes['namespace_name']", pod="$kubernetes['pod_name']", container="$kubernetes['container_name']"} - label_keys stream line_format json [OUTPUT] @@ -95,5 +94,4 @@ spec: Host loki.logging.svc.cluster.local Port 3100 labels {job="systemd"} - label_keys _SYSTEMD_UNIT,_HOSTNAME,SYSLOG_IDENTIFIER line_format json -- 2.47.2 From 19d236ab43ec9e104345b2e8a5c97c14025e7be6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:02:34 -0300 Subject: [PATCH 596/684] logging: keep loki defaults for canary and gateway --- services/logging/loki-helmrelease.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index e2a1235..81f71c9 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -59,14 +59,6 @@ spec: enabled: true size: 200Gi storageClass: asteria - gateway: - enabled: false - chunksCache: - enabled: false - resultsCache: - enabled: false - lokiCanary: - enabled: false service: type: ClusterIP ingress: -- 2.47.2 From c87a34a0f95f5c9511e97dd09442fe7b07d085d2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:11:00 -0300 Subject: [PATCH 597/684] logging: trim loki compactor config --- services/logging/loki-helmrelease.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index 81f71c9..fdf8205 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -40,9 +40,7 @@ spec: period: 24h compactor: working_directory: /var/loki/compactor - shared_store: filesystem retention_enabled: true - delete_request_store: filesystem limits_config: retention_period: 4320h reject_old_samples: true -- 2.47.2 From e305d312b143fa796d12b0a8400dd66242d884c8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:22:24 -0300 Subject: [PATCH 598/684] logging: add loki delete_request_store --- services/logging/loki-helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index fdf8205..cba22e3 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -41,6 +41,7 @@ spec: compactor: working_directory: /var/loki/compactor retention_enabled: true + delete_request_store: filesystem limits_config: retention_period: 4320h reject_old_samples: true -- 2.47.2 From 19f1060b878aec930e4dda17ed28174a9d800fdf Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:44:48 -0300 Subject: [PATCH 599/684] logging: disable fluent-bit inotify watcher --- services/logging/fluent-bit-helmrelease.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index 4a57425..611d17b 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -56,11 +56,13 @@ spec: Name tail Tag kube.* Path /var/log/containers/*.log + Exclude_Path /var/log/containers/*_POD_*.log Parser cri Mem_Buf_Limit 50MB Skip_Long_Lines On Refresh_Interval 10 Rotate_Wait 30 + Inotify_Watcher false storage.type memory [INPUT] -- 2.47.2 From f8fad8d5997132e5f45e96138d3297926d09265f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:47:52 -0300 Subject: [PATCH 600/684] logging: fix fluent-bit loki labels --- services/logging/fluent-bit-helmrelease.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index 611d17b..ea7b815 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -87,7 +87,7 @@ spec: Match kube.* Host loki.logging.svc.cluster.local Port 3100 - labels {job="fluent-bit", namespace="$kubernetes['namespace_name']", pod="$kubernetes['pod_name']", container="$kubernetes['container_name']"} + labels job=fluent-bit,namespace=$kubernetes['namespace_name'],pod=$kubernetes['pod_name'],container=$kubernetes['container_name'] line_format json [OUTPUT] @@ -95,5 +95,5 @@ spec: Match journald.* Host loki.logging.svc.cluster.local Port 3100 - labels {job="systemd"} + labels job=systemd line_format json -- 2.47.2 From c4980b975c452075b72f8e8ebd60ed205ca07a5d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:54:04 -0300 Subject: [PATCH 601/684] logging: set systemd journal path --- services/logging/fluent-bit-helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index ea7b815..2f65e83 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -68,6 +68,7 @@ spec: [INPUT] Name systemd Tag journald.* + Path /run/log/journal Read_From_Tail On storage.type memory filters: | -- 2.47.2 From 3694b8f76e5e1c84b697dac85e562337d96ccee0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 8 Jan 2026 23:58:42 -0300 Subject: [PATCH 602/684] logging: point systemd input at /var/log/journal --- services/logging/fluent-bit-helmrelease.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index 2f65e83..ca156e4 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -68,7 +68,7 @@ spec: [INPUT] Name systemd Tag journald.* - Path /run/log/journal + Path /var/log/journal Read_From_Tail On storage.type memory filters: | -- 2.47.2 From 9e496cb8d6189d3722d5653b41fcf3ec30b5b982 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 07:12:40 -0300 Subject: [PATCH 603/684] logging: fix oauth2 scope and pin loki to rpi --- services/logging/loki-helmrelease.yaml | 54 ++++++++++++++++++++++++++ services/logging/oauth2-proxy.yaml | 14 +++---- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index cba22e3..136ae8c 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -54,10 +54,64 @@ spec: replicas: 0 singleBinary: replicas: 1 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 persistence: enabled: true size: 200Gi storageClass: asteria + gateway: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + chunksCache: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + resultsCache: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + canary: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 service: type: ClusterIP ingress: diff --git a/services/logging/oauth2-proxy.yaml b/services/logging/oauth2-proxy.yaml index ac6accc..29d29d6 100644 --- a/services/logging/oauth2-proxy.yaml +++ b/services/logging/oauth2-proxy.yaml @@ -37,13 +37,14 @@ spec: node-role.kubernetes.io/worker: "true" affinity: nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 90 - preference: - matchExpressions: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: - key: hardware operator: In - values: ["rpi5","rpi4"] + values: + - rpi5 + - rpi4 containers: - name: oauth2-proxy image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 @@ -52,7 +53,7 @@ spec: - --provider=oidc - --redirect-url=https://logs.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - - --scope=openid profile email groups + - --scope=openid profile email - --email-domain=* - --set-xauthrequest=true - --pass-access-token=true @@ -66,7 +67,6 @@ spec: - --http-address=0.0.0.0:4180 - --skip-provider-button=true - --skip-jwt-bearer-tokens=true - - --oidc-groups-claim=groups - --cookie-domain=logs.bstein.dev env: - name: OAUTH2_PROXY_CLIENT_ID -- 2.47.2 From 0b8caa4c7c2a1ac6695074e10179e38b13ebf1f0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 07:16:10 -0300 Subject: [PATCH 604/684] logging: shrink loki caches for rpi nodes --- services/logging/loki-helmrelease.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index 136ae8c..a60f0eb 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -80,6 +80,7 @@ spec: - rpi5 - rpi4 chunksCache: + allocatedMemory: 512 affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -91,6 +92,7 @@ spec: - rpi5 - rpi4 resultsCache: + allocatedMemory: 256 affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: -- 2.47.2 From 5004bbd8ecb57ed0924ad77c4a1d853f64f558c5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 07:19:59 -0300 Subject: [PATCH 605/684] logging: pin loki canary to rpi5 nodes --- services/logging/loki-helmrelease.yaml | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index a60f0eb..4b4de23 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -103,17 +103,9 @@ spec: values: - rpi5 - rpi4 - canary: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 + lokiCanary: + nodeSelector: + hardware: rpi5 service: type: ClusterIP ingress: -- 2.47.2 From afb7eb80f248412e4433236786a9d3ca883e1171 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 07:26:12 -0300 Subject: [PATCH 606/684] logging: keep loki canary on rpi5 workers --- services/logging/loki-helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml index 4b4de23..f14d80d 100644 --- a/services/logging/loki-helmrelease.yaml +++ b/services/logging/loki-helmrelease.yaml @@ -106,6 +106,7 @@ spec: lokiCanary: nodeSelector: hardware: rpi5 + node-role.kubernetes.io/worker: "true" service: type: ClusterIP ingress: -- 2.47.2 From 719f16c4e30d35f5535e45f1b98994b1ca7f6cbb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 08:07:46 -0300 Subject: [PATCH 607/684] logging: route oauth2-proxy via loki gateway --- services/logging/oauth2-proxy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/logging/oauth2-proxy.yaml b/services/logging/oauth2-proxy.yaml index 29d29d6..f9b9616 100644 --- a/services/logging/oauth2-proxy.yaml +++ b/services/logging/oauth2-proxy.yaml @@ -63,7 +63,7 @@ spec: - --cookie-refresh=20m - --cookie-expire=168h - --insecure-oidc-allow-unverified-email=true - - --upstream=http://loki.logging.svc.cluster.local:3100 + - --upstream=http://loki-gateway.logging.svc.cluster.local - --http-address=0.0.0.0:4180 - --skip-provider-button=true - --skip-jwt-bearer-tokens=true -- 2.47.2 From cac71e4a41437430b4d42131173fa5bde45e1dce Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 08:54:07 -0300 Subject: [PATCH 608/684] logging: add opensearch dashboards ui --- .../sources/helm/kustomization.yaml | 1 + infrastructure/sources/helm/opensearch.yaml | 9 ++ services/keycloak/kustomization.yaml | 1 + .../keycloak/logs-oidc-secret-ensure-job.yaml | 96 +++++++++++++++++++ services/logging/fluent-bit-helmrelease.yaml | 24 +++-- services/logging/ingress.yaml | 4 +- services/logging/kustomization.yaml | 5 +- services/logging/oauth2-proxy.yaml | 22 ++--- .../opensearch-dashboards-helmrelease.yaml | 46 +++++++++ services/logging/opensearch-helmrelease.yaml | 56 +++++++++++ services/logging/opensearch-ism-job.yaml | 47 +++++++++ 11 files changed, 287 insertions(+), 24 deletions(-) create mode 100644 infrastructure/sources/helm/opensearch.yaml create mode 100644 services/keycloak/logs-oidc-secret-ensure-job.yaml create mode 100644 services/logging/opensearch-dashboards-helmrelease.yaml create mode 100644 services/logging/opensearch-helmrelease.yaml create mode 100644 services/logging/opensearch-ism-job.yaml diff --git a/infrastructure/sources/helm/kustomization.yaml b/infrastructure/sources/helm/kustomization.yaml index 1cbf20e..97fd70e 100644 --- a/infrastructure/sources/helm/kustomization.yaml +++ b/infrastructure/sources/helm/kustomization.yaml @@ -8,6 +8,7 @@ resources: - jetstack.yaml - jenkins.yaml - mailu.yaml + - opensearch.yaml - harbor.yaml - prometheus.yaml - victoria-metrics.yaml diff --git a/infrastructure/sources/helm/opensearch.yaml b/infrastructure/sources/helm/opensearch.yaml new file mode 100644 index 0000000..e5b60c3 --- /dev/null +++ b/infrastructure/sources/helm/opensearch.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/opensearch.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: opensearch + namespace: flux-system +spec: + interval: 1h + url: https://opensearch-project.github.io/helm-charts diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index e3d6513..05639e5 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -18,6 +18,7 @@ resources: - user-overrides-job.yaml - mas-secrets-ensure-job.yaml - synapse-oidc-secret-ensure-job.yaml + - logs-oidc-secret-ensure-job.yaml - service.yaml - ingress.yaml generatorOptions: diff --git a/services/keycloak/logs-oidc-secret-ensure-job.yaml b/services/keycloak/logs-oidc-secret-ensure-job.yaml new file mode 100644 index 0000000..9550d8b --- /dev/null +++ b/services/keycloak/logs-oidc-secret-ensure-job.yaml @@ -0,0 +1,96 @@ +# services/keycloak/logs-oidc-secret-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: logs-oidc-secret-ensure-1 + namespace: sso +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + containers: + - name: apply + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + apk add --no-cache curl jq kubectl openssl >/dev/null + + KC_URL="http://keycloak.sso.svc.cluster.local" + ACCESS_TOKEN="" + for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done + if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 + fi + + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=logs" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + + if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + create_payload='{"clientId":"logs","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://logs.bstein.dev/oauth2/callback"],"webOrigins":["https://logs.bstein.dev"],"rootUrl":"https://logs.bstein.dev","baseUrl":"/"}' + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${create_payload}" \ + "$KC_URL/admin/realms/atlas/clients")" + if [ "$status" != "201" ] && [ "$status" != "204" ]; then + echo "Keycloak client create failed (status ${status})" >&2 + exit 1 + fi + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=logs" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + fi + + if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client logs not found" >&2 + exit 1 + fi + + CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" + if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 + fi + + if kubectl -n logging get secret oauth2-proxy-logs-oidc >/dev/null 2>&1; then + exit 0 + fi + + COOKIE_SECRET="$(openssl rand -base64 32 | tr -d '\n')" + kubectl -n logging create secret generic oauth2-proxy-logs-oidc \ + --from-literal=client_id="logs" \ + --from-literal=client_secret="${CLIENT_SECRET}" \ + --from-literal=cookie_secret="${COOKIE_SECRET}" \ + --dry-run=client -o yaml | kubectl -n logging apply -f - >/dev/null + env: + - name: KEYCLOAK_ADMIN + valueFrom: + secretKeyRef: + name: keycloak-admin + key: username + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin + key: password diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index ca156e4..a3f1c26 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -84,17 +84,21 @@ spec: K8S-Logging.Exclude On outputs: | [OUTPUT] - Name loki + Name es Match kube.* - Host loki.logging.svc.cluster.local - Port 3100 - labels job=fluent-bit,namespace=$kubernetes['namespace_name'],pod=$kubernetes['pod_name'],container=$kubernetes['container_name'] - line_format json + Host opensearch-master.logging.svc.cluster.local + Port 9200 + Logstash_Format On + Logstash_Prefix kube + Replace_Dots On + Suppress_Type_Name On [OUTPUT] - Name loki + Name es Match journald.* - Host loki.logging.svc.cluster.local - Port 3100 - labels job=systemd - line_format json + Host opensearch-master.logging.svc.cluster.local + Port 9200 + Logstash_Format On + Logstash_Prefix journald + Replace_Dots On + Suppress_Type_Name On diff --git a/services/logging/ingress.yaml b/services/logging/ingress.yaml index f3211b2..7beeb9a 100644 --- a/services/logging/ingress.yaml +++ b/services/logging/ingress.yaml @@ -2,7 +2,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: loki + name: logs namespace: logging annotations: cert-manager.io/cluster-issuer: letsencrypt @@ -20,6 +20,6 @@ spec: pathType: Prefix backend: service: - name: oauth2-proxy-loki + name: oauth2-proxy-logs port: name: http diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index 476d88b..9132b8e 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -3,7 +3,10 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml - - loki-helmrelease.yaml + - opensearch-helmrelease.yaml + - opensearch-dashboards-helmrelease.yaml + - opensearch-ism-job.yaml - fluent-bit-helmrelease.yaml + - loki-helmrelease.yaml - oauth2-proxy.yaml - ingress.yaml diff --git a/services/logging/oauth2-proxy.yaml b/services/logging/oauth2-proxy.yaml index f9b9616..ef3621f 100644 --- a/services/logging/oauth2-proxy.yaml +++ b/services/logging/oauth2-proxy.yaml @@ -2,36 +2,36 @@ apiVersion: v1 kind: Service metadata: - name: oauth2-proxy-loki + name: oauth2-proxy-logs namespace: logging labels: - app: oauth2-proxy-loki + app: oauth2-proxy-logs spec: ports: - name: http port: 80 targetPort: 4180 selector: - app: oauth2-proxy-loki + app: oauth2-proxy-logs --- apiVersion: apps/v1 kind: Deployment metadata: - name: oauth2-proxy-loki + name: oauth2-proxy-logs namespace: logging labels: - app: oauth2-proxy-loki + app: oauth2-proxy-logs spec: replicas: 2 selector: matchLabels: - app: oauth2-proxy-loki + app: oauth2-proxy-logs template: metadata: labels: - app: oauth2-proxy-loki + app: oauth2-proxy-logs spec: nodeSelector: node-role.kubernetes.io/worker: "true" @@ -63,7 +63,7 @@ spec: - --cookie-refresh=20m - --cookie-expire=168h - --insecure-oidc-allow-unverified-email=true - - --upstream=http://loki-gateway.logging.svc.cluster.local + - --upstream=http://opensearch-dashboards.logging.svc.cluster.local:5601 - --http-address=0.0.0.0:4180 - --skip-provider-button=true - --skip-jwt-bearer-tokens=true @@ -72,17 +72,17 @@ spec: - name: OAUTH2_PROXY_CLIENT_ID valueFrom: secretKeyRef: - name: oauth2-proxy-loki-oidc + name: oauth2-proxy-logs-oidc key: client_id - name: OAUTH2_PROXY_CLIENT_SECRET valueFrom: secretKeyRef: - name: oauth2-proxy-loki-oidc + name: oauth2-proxy-logs-oidc key: client_secret - name: OAUTH2_PROXY_COOKIE_SECRET valueFrom: secretKeyRef: - name: oauth2-proxy-loki-oidc + name: oauth2-proxy-logs-oidc key: cookie_secret ports: - containerPort: 4180 diff --git a/services/logging/opensearch-dashboards-helmrelease.yaml b/services/logging/opensearch-dashboards-helmrelease.yaml new file mode 100644 index 0000000..4943200 --- /dev/null +++ b/services/logging/opensearch-dashboards-helmrelease.yaml @@ -0,0 +1,46 @@ +# services/logging/opensearch-dashboards-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: opensearch-dashboards + namespace: logging +spec: + interval: 15m + chart: + spec: + chart: opensearch-dashboards + version: "~2.32.0" + sourceRef: + kind: HelmRepository + name: opensearch + namespace: flux-system + values: + fullnameOverride: opensearch-dashboards + opensearchHosts: "http://opensearch-master.logging.svc.cluster.local:9200" + replicaCount: 1 + config: + opensearch_dashboards.yml: | + server.host: 0.0.0.0 + opensearch.hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] + opensearch_security.enabled: false + extraEnvs: + - name: NODE_OPTIONS + value: "--max-old-space-size=512" + resources: + requests: + cpu: "200m" + memory: "512Mi" + limits: + memory: "512Mi" + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 diff --git a/services/logging/opensearch-helmrelease.yaml b/services/logging/opensearch-helmrelease.yaml new file mode 100644 index 0000000..9ccbad0 --- /dev/null +++ b/services/logging/opensearch-helmrelease.yaml @@ -0,0 +1,56 @@ +# services/logging/opensearch-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: opensearch + namespace: logging +spec: + interval: 15m + chart: + spec: + chart: opensearch + version: "~2.36.0" + sourceRef: + kind: HelmRepository + name: opensearch + namespace: flux-system + values: + fullnameOverride: opensearch + clusterName: opensearch + nodeGroup: master + masterService: opensearch-master + singleNode: true + replicas: 1 + minimumMasterNodes: 1 + opensearchJavaOpts: "-Xms1g -Xmx1g" + resources: + requests: + cpu: "500m" + memory: "2Gi" + limits: + memory: "2Gi" + persistence: + enabled: true + storageClass: asteria + size: 500Gi + config: + opensearch.yml: | + cluster.name: opensearch + network.host: 0.0.0.0 + discovery.type: single-node + plugins.security.disabled: true + node.store.allow_mmap: false + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + sysctlInit: + enabled: true diff --git a/services/logging/opensearch-ism-job.yaml b/services/logging/opensearch-ism-job.yaml new file mode 100644 index 0000000..c33a700 --- /dev/null +++ b/services/logging/opensearch-ism-job.yaml @@ -0,0 +1,47 @@ +# services/logging/opensearch-ism-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: opensearch-ism-setup-1 + namespace: logging +spec: + backoffLimit: 3 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: OnFailure + containers: + - name: apply + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + apk add --no-cache curl >/dev/null + + OS_URL="http://opensearch-master.logging.svc.cluster.local:9200" + for attempt in $(seq 1 60); do + if curl -s -o /dev/null -w "%{http_code}" "${OS_URL}" | grep -q "200"; then + break + fi + sleep 5 + done + if ! curl -s -o /dev/null -w "%{http_code}" "${OS_URL}" | grep -q "200"; then + echo "OpenSearch did not become ready in time" >&2 + exit 1 + fi + + policy='{"policy":{"description":"Delete logs after 180 days","schema_version":1,"default_state":"hot","states":[{"name":"hot","actions":[],"transitions":[{"state_name":"delete","conditions":{"min_index_age":"180d"}}]},{"name":"delete","actions":[{"delete":{}}],"transitions":[]}]}}' + curl -sS -X PUT "${OS_URL}/_plugins/_ism/policies/logging-180d" \ + -H 'Content-Type: application/json' \ + -d "${policy}" >/dev/null + + kube_template='{"index_patterns":["kube-*"],"priority":200,"template":{"settings":{"index.number_of_shards":1,"index.number_of_replicas":0,"index.refresh_interval":"30s","plugins.index_state_management.policy_id":"logging-180d"},"mappings":{"properties":{"@timestamp":{"type":"date"}}}}}' + curl -sS -X PUT "${OS_URL}/_index_template/kube-logs" \ + -H 'Content-Type: application/json' \ + -d "${kube_template}" >/dev/null + + journal_template='{"index_patterns":["journald-*"],"priority":200,"template":{"settings":{"index.number_of_shards":1,"index.number_of_replicas":0,"index.refresh_interval":"30s","plugins.index_state_management.policy_id":"logging-180d"},"mappings":{"properties":{"@timestamp":{"type":"date"}}}}}' + curl -sS -X PUT "${OS_URL}/_index_template/journald-logs" \ + -H 'Content-Type: application/json' \ + -d "${journal_template}" >/dev/null -- 2.47.2 From b9383c9709e5c806f47574f813932bd15ea2ba38 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 08:55:39 -0300 Subject: [PATCH 609/684] logging: fix dashboards cpu limits --- services/logging/opensearch-dashboards-helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/logging/opensearch-dashboards-helmrelease.yaml b/services/logging/opensearch-dashboards-helmrelease.yaml index 4943200..2cef3dd 100644 --- a/services/logging/opensearch-dashboards-helmrelease.yaml +++ b/services/logging/opensearch-dashboards-helmrelease.yaml @@ -31,6 +31,7 @@ spec: cpu: "200m" memory: "512Mi" limits: + cpu: "200m" memory: "512Mi" nodeSelector: node-role.kubernetes.io/worker: "true" -- 2.47.2 From 7a9cf1df98b1f0b868ea1937d0bf497a7d42e6a2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 08:57:13 -0300 Subject: [PATCH 610/684] keycloak: fix logs oauth2 cookie secret --- services/keycloak/logs-oidc-secret-ensure-job.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/services/keycloak/logs-oidc-secret-ensure-job.yaml b/services/keycloak/logs-oidc-secret-ensure-job.yaml index 9550d8b..11d48f9 100644 --- a/services/keycloak/logs-oidc-secret-ensure-job.yaml +++ b/services/keycloak/logs-oidc-secret-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: logs-oidc-secret-ensure-1 + name: logs-oidc-secret-ensure-2 namespace: sso spec: backoffLimit: 0 @@ -74,10 +74,17 @@ spec: fi if kubectl -n logging get secret oauth2-proxy-logs-oidc >/dev/null 2>&1; then - exit 0 + current_cookie="$(kubectl -n logging get secret oauth2-proxy-logs-oidc -o jsonpath='{.data.cookie_secret}' 2>/dev/null || true)" + if [ -n "${current_cookie}" ]; then + decoded="$(printf '%s' "${current_cookie}" | base64 -d 2>/dev/null || true)" + length="$(printf '%s' "${decoded}" | wc -c | tr -d ' ')" + if [ "${length}" = "16" ] || [ "${length}" = "24" ] || [ "${length}" = "32" ]; then + exit 0 + fi + fi fi - COOKIE_SECRET="$(openssl rand -base64 32 | tr -d '\n')" + COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')" kubectl -n logging create secret generic oauth2-proxy-logs-oidc \ --from-literal=client_id="logs" \ --from-literal=client_secret="${CLIENT_SECRET}" \ -- 2.47.2 From b668e2d29e13c6d233d5d472f48b80bc8ef2bdcc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 08:58:48 -0300 Subject: [PATCH 611/684] logging: pin opensearch ISM job to rpi --- services/logging/opensearch-ism-job.yaml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/services/logging/opensearch-ism-job.yaml b/services/logging/opensearch-ism-job.yaml index c33a700..ef67ef4 100644 --- a/services/logging/opensearch-ism-job.yaml +++ b/services/logging/opensearch-ism-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-ism-setup-1 + name: opensearch-ism-setup-2 namespace: logging spec: backoffLimit: 3 @@ -10,6 +10,18 @@ spec: template: spec: restartPolicy: OnFailure + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 containers: - name: apply image: alpine:3.20 -- 2.47.2 From 8e9403885850518e5ac36e4d60c83f8aebc47dda Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 09:00:25 -0300 Subject: [PATCH 612/684] logging: pin opensearch to rpi5 --- services/logging/opensearch-dashboards-helmrelease.yaml | 2 +- services/logging/opensearch-helmrelease.yaml | 2 +- services/logging/opensearch-ism-job.yaml | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/services/logging/opensearch-dashboards-helmrelease.yaml b/services/logging/opensearch-dashboards-helmrelease.yaml index 2cef3dd..9cfc3b3 100644 --- a/services/logging/opensearch-dashboards-helmrelease.yaml +++ b/services/logging/opensearch-dashboards-helmrelease.yaml @@ -35,6 +35,7 @@ spec: memory: "512Mi" nodeSelector: node-role.kubernetes.io/worker: "true" + hardware: rpi5 affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -44,4 +45,3 @@ spec: operator: In values: - rpi5 - - rpi4 diff --git a/services/logging/opensearch-helmrelease.yaml b/services/logging/opensearch-helmrelease.yaml index 9ccbad0..3d7dd6b 100644 --- a/services/logging/opensearch-helmrelease.yaml +++ b/services/logging/opensearch-helmrelease.yaml @@ -42,6 +42,7 @@ spec: node.store.allow_mmap: false nodeSelector: node-role.kubernetes.io/worker: "true" + hardware: rpi5 affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -51,6 +52,5 @@ spec: operator: In values: - rpi5 - - rpi4 sysctlInit: enabled: true diff --git a/services/logging/opensearch-ism-job.yaml b/services/logging/opensearch-ism-job.yaml index ef67ef4..426f64a 100644 --- a/services/logging/opensearch-ism-job.yaml +++ b/services/logging/opensearch-ism-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-ism-setup-2 + name: opensearch-ism-setup-3 namespace: logging spec: backoffLimit: 3 @@ -12,6 +12,7 @@ spec: restartPolicy: OnFailure nodeSelector: node-role.kubernetes.io/worker: "true" + hardware: rpi5 affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -19,9 +20,8 @@ spec: - matchExpressions: - key: hardware operator: In - values: - - rpi5 - - rpi4 + values: + - rpi5 containers: - name: apply image: alpine:3.20 -- 2.47.2 From 0239f57a84cb757d6a6ba66963b818b8ccba855a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 09:01:15 -0300 Subject: [PATCH 613/684] logging: fix opensearch ism job yaml --- services/logging/opensearch-ism-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/logging/opensearch-ism-job.yaml b/services/logging/opensearch-ism-job.yaml index 426f64a..4b15285 100644 --- a/services/logging/opensearch-ism-job.yaml +++ b/services/logging/opensearch-ism-job.yaml @@ -18,8 +18,8 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: hardware - operator: In + - key: hardware + operator: In values: - rpi5 containers: -- 2.47.2 From 456677cfbbd77a5306c7509abbcfdbfd1ed266ee Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 09:07:40 -0300 Subject: [PATCH 614/684] logging: extend dashboards helm timeout --- services/logging/opensearch-dashboards-helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/logging/opensearch-dashboards-helmrelease.yaml b/services/logging/opensearch-dashboards-helmrelease.yaml index 9cfc3b3..90547a1 100644 --- a/services/logging/opensearch-dashboards-helmrelease.yaml +++ b/services/logging/opensearch-dashboards-helmrelease.yaml @@ -6,6 +6,7 @@ metadata: namespace: logging spec: interval: 15m + timeout: 10m chart: spec: chart: opensearch-dashboards -- 2.47.2 From 0b78ec663d385a7227b947b5700dc3f5ff49eb54 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 18:08:39 -0300 Subject: [PATCH 615/684] logging: remove loki and backfill to opensearch --- services/logging/fluent-bit-helmrelease.yaml | 19 ++- services/logging/kustomization.yaml | 3 +- services/logging/loki-helmrelease.yaml | 113 --------------- services/logging/oauth2-proxy.yaml | 1 + .../opensearch-dashboards-setup-job.yaml | 63 +++++++++ services/logging/opensearch-helmrelease.yaml | 2 +- .../logging/opensearch-prune-cronjob.yaml | 132 ++++++++++++++++++ services/monitoring/helmrelease.yaml | 7 - 8 files changed, 215 insertions(+), 125 deletions(-) delete mode 100644 services/logging/loki-helmrelease.yaml create mode 100644 services/logging/opensearch-dashboards-setup-job.yaml create mode 100644 services/logging/opensearch-prune-cronjob.yaml diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index a3f1c26..df8051e 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -33,6 +33,10 @@ spec: - name: varlogjournal hostPath: path: /var/log/journal + - name: fluentbit-state + hostPath: + path: /var/lib/fluent-bit + type: DirectoryOrCreate extraVolumeMounts: - name: runlogjournal mountPath: /run/log/journal @@ -40,6 +44,8 @@ spec: - name: varlogjournal mountPath: /var/log/journal readOnly: true + - name: fluentbit-state + mountPath: /var/lib/fluent-bit config: service: | [SERVICE] @@ -51,6 +57,10 @@ spec: HTTP_Server On HTTP_Listen 0.0.0.0 HTTP_Port 2020 + storage.path /var/lib/fluent-bit/storage + storage.sync normal + storage.checksum on + storage.backlog.mem_limit 50M inputs: | [INPUT] Name tail @@ -63,14 +73,17 @@ spec: Refresh_Interval 10 Rotate_Wait 30 Inotify_Watcher false - storage.type memory + Read_from_Head On + DB /var/lib/fluent-bit/kube.db + storage.type filesystem [INPUT] Name systemd Tag journald.* Path /var/log/journal - Read_From_Tail On - storage.type memory + Read_From_Tail Off + DB /var/lib/fluent-bit/systemd.db + storage.type filesystem filters: | [FILTER] Name kubernetes diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index 9132b8e..d331308 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -6,7 +6,8 @@ resources: - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml - opensearch-ism-job.yaml + - opensearch-dashboards-setup-job.yaml + - opensearch-prune-cronjob.yaml - fluent-bit-helmrelease.yaml - - loki-helmrelease.yaml - oauth2-proxy.yaml - ingress.yaml diff --git a/services/logging/loki-helmrelease.yaml b/services/logging/loki-helmrelease.yaml deleted file mode 100644 index f14d80d..0000000 --- a/services/logging/loki-helmrelease.yaml +++ /dev/null @@ -1,113 +0,0 @@ -# services/logging/loki-helmrelease.yaml -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: loki - namespace: logging -spec: - interval: 15m - chart: - spec: - chart: loki - version: "~6.6.0" - sourceRef: - kind: HelmRepository - name: grafana - namespace: flux-system - values: - fullnameOverride: loki - deploymentMode: SingleBinary - loki: - auth_enabled: false - commonConfig: - replication_factor: 1 - storage: - type: filesystem - storageConfig: - filesystem: - directory: /var/loki/chunks - tsdb_shipper: - active_index_directory: /var/loki/index - cache_location: /var/loki/index_cache - schemaConfig: - configs: - - from: "2024-01-01" - store: tsdb - object_store: filesystem - schema: v13 - index: - prefix: loki_index_ - period: 24h - compactor: - working_directory: /var/loki/compactor - retention_enabled: true - delete_request_store: filesystem - limits_config: - retention_period: 4320h - reject_old_samples: true - reject_old_samples_max_age: 168h - read: - replicas: 0 - write: - replicas: 0 - backend: - replicas: 0 - singleBinary: - replicas: 1 - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 - persistence: - enabled: true - size: 200Gi - storageClass: asteria - gateway: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 - chunksCache: - allocatedMemory: 512 - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 - resultsCache: - allocatedMemory: 256 - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 - lokiCanary: - nodeSelector: - hardware: rpi5 - node-role.kubernetes.io/worker: "true" - service: - type: ClusterIP - ingress: - enabled: false diff --git a/services/logging/oauth2-proxy.yaml b/services/logging/oauth2-proxy.yaml index ef3621f..ecebfa7 100644 --- a/services/logging/oauth2-proxy.yaml +++ b/services/logging/oauth2-proxy.yaml @@ -55,6 +55,7 @@ spec: - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --scope=openid profile email - --email-domain=* + - --code-challenge-method=S256 - --set-xauthrequest=true - --pass-access-token=true - --set-authorization-header=true diff --git a/services/logging/opensearch-dashboards-setup-job.yaml b/services/logging/opensearch-dashboards-setup-job.yaml new file mode 100644 index 0000000..fa308cc --- /dev/null +++ b/services/logging/opensearch-dashboards-setup-job.yaml @@ -0,0 +1,63 @@ +# services/logging/opensearch-dashboards-setup-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: opensearch-dashboards-setup-1 + namespace: logging +spec: + backoffLimit: 3 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: OnFailure + nodeSelector: + node-role.kubernetes.io/worker: "true" + hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + containers: + - name: setup + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + apk add --no-cache curl >/dev/null + + OSD_URL="http://opensearch-dashboards.logging.svc.cluster.local:5601" + for attempt in $(seq 1 60); do + code="$(curl -s -o /dev/null -w "%{http_code}" "${OSD_URL}/api/status" || true)" + if [ "${code}" = "200" ]; then + break + fi + sleep 5 + done + + if ! curl -s -o /dev/null -w "%{http_code}" "${OSD_URL}/api/status" | grep -q "200"; then + echo "OpenSearch Dashboards did not become ready in time" >&2 + exit 1 + fi + + create_view() { + view_id="$1" + title="$2" + curl -sS -X POST "${OSD_URL}/api/saved_objects/index-pattern/${view_id}?overwrite=true" \ + -H 'Content-Type: application/json' \ + -H 'osd-xsrf: true' \ + -d "{\"attributes\":{\"title\":\"${title}\",\"timeFieldName\":\"@timestamp\"}}" >/dev/null + } + + create_view kube-logs "kube-*" + create_view journald-logs "journald-*" + + curl -sS -X POST "${OSD_URL}/api/opensearch-dashboards/settings" \ + -H 'Content-Type: application/json' \ + -H 'osd-xsrf: true' \ + -d '{"changes":{"defaultIndex":"kube-logs"}}' >/dev/null diff --git a/services/logging/opensearch-helmrelease.yaml b/services/logging/opensearch-helmrelease.yaml index 3d7dd6b..627dee4 100644 --- a/services/logging/opensearch-helmrelease.yaml +++ b/services/logging/opensearch-helmrelease.yaml @@ -32,7 +32,7 @@ spec: persistence: enabled: true storageClass: asteria - size: 500Gi + size: 1024Gi config: opensearch.yml: | cluster.name: opensearch diff --git a/services/logging/opensearch-prune-cronjob.yaml b/services/logging/opensearch-prune-cronjob.yaml new file mode 100644 index 0000000..74e2837 --- /dev/null +++ b/services/logging/opensearch-prune-cronjob.yaml @@ -0,0 +1,132 @@ +# services/logging/opensearch-prune-cronjob.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-prune-script + namespace: logging +data: + prune.py: | + import json + import os + import re + import sys + import urllib.error + import urllib.request + + os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/") + limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4))) + patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()] + + UNITS = { + "b": 1, + "kb": 1024, + "mb": 1024**2, + "gb": 1024**3, + "tb": 1024**4, + } + + def parse_size(value: str) -> int: + if not value: + return 0 + text = value.strip().lower() + if text in ("-", "0"): + return 0 + match = re.match(r"^([0-9.]+)([a-z]+)$", text) + if not match: + return 0 + number = float(match.group(1)) + unit = match.group(2) + if unit not in UNITS: + return 0 + return int(number * UNITS[unit]) + + def request_json(path: str): + url = f"{os_url}{path}" + with urllib.request.urlopen(url, timeout=30) as response: + payload = response.read().decode("utf-8") + return json.loads(payload) + + def delete_index(index: str) -> None: + url = f"{os_url}/{index}" + req = urllib.request.Request(url, method="DELETE") + with urllib.request.urlopen(req, timeout=30) as response: + _ = response.read() + print(f"deleted {index}") + + indices = [] + for pattern in patterns: + try: + data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date") + except urllib.error.HTTPError as exc: + if exc.code == 404: + continue + raise + for item in data: + index = item.get("index") + if not index or index.startswith("."): + continue + size = parse_size(item.get("store.size", "")) + created = int(item.get("creation.date", "0") or 0) + indices.append({"index": index, "size": size, "created": created}) + + total = sum(item["size"] for item in indices) + print(f"total_log_bytes={total}") + if total <= limit_bytes: + print("within limit") + sys.exit(0) + + indices.sort(key=lambda item: item["created"]) + for item in indices: + if total <= limit_bytes: + break + delete_index(item["index"]) + total -= item["size"] + + print(f"remaining_log_bytes={total}") +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: opensearch-prune + namespace: logging +spec: + schedule: "23 3 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 2 + template: + spec: + restartPolicy: OnFailure + nodeSelector: + node-role.kubernetes.io/worker: "true" + hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + containers: + - name: prune + image: python:3.11-alpine + command: ["python", "/scripts/prune.py"] + env: + - name: OPENSEARCH_URL + value: http://opensearch-master.logging.svc.cluster.local:9200 + - name: LOG_LIMIT_BYTES + value: "1099511627776" + - name: LOG_INDEX_PATTERNS + value: "kube-*,journald-*" + volumeMounts: + - name: scripts + mountPath: /scripts + volumes: + - name: scripts + configMap: + name: opensearch-prune-script diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 873a323..ddd24e5 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -320,13 +320,6 @@ spec: timeInterval: "15s" uid: atlas-vm orgId: 2 - - name: Loki - type: loki - access: proxy - url: http://loki.logging.svc.cluster.local:3100 - isDefault: false - uid: atlas-loki - orgId: 1 dashboardProviders: dashboardproviders.yaml: apiVersion: 1 -- 2.47.2 From ecf28580b9028c17b6a2fd1e8dea9467062310d5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 18:11:32 -0300 Subject: [PATCH 616/684] logging: manage opensearch pvc size --- services/logging/kustomization.yaml | 1 + services/logging/opensearch-helmrelease.yaml | 2 +- services/logging/opensearch-pvc.yaml | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 services/logging/opensearch-pvc.yaml diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index d331308..c7b520c 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml + - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml - opensearch-ism-job.yaml diff --git a/services/logging/opensearch-helmrelease.yaml b/services/logging/opensearch-helmrelease.yaml index 627dee4..3d7dd6b 100644 --- a/services/logging/opensearch-helmrelease.yaml +++ b/services/logging/opensearch-helmrelease.yaml @@ -32,7 +32,7 @@ spec: persistence: enabled: true storageClass: asteria - size: 1024Gi + size: 500Gi config: opensearch.yml: | cluster.name: opensearch diff --git a/services/logging/opensearch-pvc.yaml b/services/logging/opensearch-pvc.yaml new file mode 100644 index 0000000..f537b99 --- /dev/null +++ b/services/logging/opensearch-pvc.yaml @@ -0,0 +1,14 @@ +# services/logging/opensearch-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: opensearch-opensearch-0 + namespace: logging +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1024Gi + storageClassName: asteria + volumeMode: Filesystem -- 2.47.2 From b40a9952254d0a283ab2efb3607b444e34eccac4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 18:17:02 -0300 Subject: [PATCH 617/684] logging: force opensearch replicas to 0 --- services/logging/opensearch-ism-job.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/logging/opensearch-ism-job.yaml b/services/logging/opensearch-ism-job.yaml index 4b15285..c800677 100644 --- a/services/logging/opensearch-ism-job.yaml +++ b/services/logging/opensearch-ism-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-ism-setup-3 + name: opensearch-ism-setup-4 namespace: logging spec: backoffLimit: 3 @@ -57,3 +57,7 @@ spec: curl -sS -X PUT "${OS_URL}/_index_template/journald-logs" \ -H 'Content-Type: application/json' \ -d "${journal_template}" >/dev/null + + curl -sS -X PUT "${OS_URL}/_all/_settings" \ + -H 'Content-Type: application/json' \ + -d '{"index":{"number_of_replicas":0}}' >/dev/null -- 2.47.2 From 087026bd23f9967a23950a68a279f7cbc6f9a8cc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 18:18:58 -0300 Subject: [PATCH 618/684] logging: throttle fluent-bit backfill --- services/logging/fluent-bit-helmrelease.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index df8051e..30952c1 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -101,17 +101,21 @@ spec: Match kube.* Host opensearch-master.logging.svc.cluster.local Port 9200 + Workers 1 Logstash_Format On Logstash_Prefix kube Replace_Dots On Suppress_Type_Name On + Retry_Limit False [OUTPUT] Name es Match journald.* Host opensearch-master.logging.svc.cluster.local Port 9200 + Workers 1 Logstash_Format On Logstash_Prefix journald Replace_Dots On Suppress_Type_Name On + Retry_Limit False -- 2.47.2 From 64ddd73b5086174e806761662a4e124883df66cd Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 21:17:08 -0300 Subject: [PATCH 619/684] logging: force dark theme in dashboards --- services/logging/opensearch-dashboards-helmrelease.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/logging/opensearch-dashboards-helmrelease.yaml b/services/logging/opensearch-dashboards-helmrelease.yaml index 90547a1..85f859e 100644 --- a/services/logging/opensearch-dashboards-helmrelease.yaml +++ b/services/logging/opensearch-dashboards-helmrelease.yaml @@ -24,6 +24,9 @@ spec: server.host: 0.0.0.0 opensearch.hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] opensearch_security.enabled: false + uiSettings: + overrides: + "theme:darkMode": true extraEnvs: - name: NODE_OPTIONS value: "--max-old-space-size=512" -- 2.47.2 From a25ddd808237a029cdf83b40283e522486beaa29 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 22:20:36 -0300 Subject: [PATCH 620/684] logging: add OpenSearch dashboards generator --- scripts/dashboards_render_logs.py | 375 ++++++++++++++++++ services/logging/dashboards/logs.ndjson | 105 +++++ services/logging/kustomization.yaml | 1 + .../opensearch-dashboards-objects.yaml | 114 ++++++ .../opensearch-dashboards-setup-job.yaml | 27 +- 5 files changed, 611 insertions(+), 11 deletions(-) create mode 100755 scripts/dashboards_render_logs.py create mode 100644 services/logging/dashboards/logs.ndjson create mode 100644 services/logging/opensearch-dashboards-objects.yaml diff --git a/scripts/dashboards_render_logs.py b/scripts/dashboards_render_logs.py new file mode 100755 index 0000000..dc83f81 --- /dev/null +++ b/scripts/dashboards_render_logs.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python3 +"""Generate OpenSearch Dashboards saved objects and render them into ConfigMaps. + +Usage: + scripts/dashboards_render_logs.py --build # rebuild NDJSON + ConfigMap + scripts/dashboards_render_logs.py # re-render ConfigMap from NDJSON +""" + +from __future__ import annotations + +import argparse +import json +import textwrap +from dataclasses import dataclass +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +DASHBOARD_DIR = ROOT / "services" / "logging" / "dashboards" +NDJSON_PATH = DASHBOARD_DIR / "logs.ndjson" +CONFIG_PATH = ROOT / "services" / "logging" / "opensearch-dashboards-objects.yaml" + +CONFIG_TEMPLATE = textwrap.dedent( + """# {relative_path} +# Generated by scripts/dashboards_render_logs.py --build +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-dashboards-objects + namespace: logging +data: + objects.ndjson: | +{payload} +""" +) + +DASHBOARD_VERSION = "7.10.0" + +ERROR_TERMS = ("*error*", "*exception*", "*fail*") + + +@dataclass(frozen=True) +class AppSpec: + slug: str + title: str + query: str + index_id: str = "kube-logs" + kind: str = "kube" + + +def error_query(base: str | None = None) -> str: + parts = [f'(log : "{term}" or message : "{term}")' for term in ERROR_TERMS] + expr = " or ".join(parts) + if base: + return f"({base}) and ({expr})" + return f"({expr})" + + +def json_line(obj: dict) -> str: + return json.dumps(obj, separators=(",", ":")) + + +def search_source(query: str) -> dict: + return { + "query": {"language": "kuery", "query": query}, + "filter": [], + "indexRefName": "kibanaSavedObjectMeta.searchSourceJSON.index", + } + + +def index_pattern(object_id: str, title: str, time_field: str = "@timestamp") -> dict: + return { + "type": "index-pattern", + "id": object_id, + "attributes": {"title": title, "timeFieldName": time_field}, + } + + +def histogram_vis(object_id: str, title: str, query: str, index_id: str) -> dict: + vis_state = { + "title": title, + "type": "histogram", + "aggs": [ + {"id": "1", "enabled": True, "type": "count", "schema": "metric"}, + { + "id": "2", + "enabled": True, + "type": "date_histogram", + "schema": "segment", + "params": {"field": "@timestamp", "interval": "auto", "min_doc_count": 1}, + }, + ], + "params": {"addTooltip": True, "addLegend": False, "scale": "linear", "interpolate": "linear"}, + } + return { + "type": "visualization", + "id": object_id, + "attributes": { + "title": title, + "visState": json.dumps(vis_state, separators=(",", ":")), + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": json.dumps(search_source(query), separators=(",", ":")) + }, + }, + "references": [ + { + "name": "kibanaSavedObjectMeta.searchSourceJSON.index", + "type": "index-pattern", + "id": index_id, + } + ], + } + + +def table_vis(object_id: str, title: str, field: str, query: str, index_id: str) -> dict: + vis_state = { + "title": title, + "type": "table", + "aggs": [ + {"id": "1", "enabled": True, "type": "count", "schema": "metric"}, + { + "id": "2", + "enabled": True, + "type": "terms", + "schema": "bucket", + "params": {"field": field, "size": 10, "order": "desc", "orderBy": "1"}, + }, + ], + "params": { + "perPage": 10, + "showPartialRows": False, + "showMetricsAtAllLevels": False, + "sort": {"columnIndex": 1, "direction": "desc"}, + }, + } + return { + "type": "visualization", + "id": object_id, + "attributes": { + "title": title, + "visState": json.dumps(vis_state, separators=(",", ":")), + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": json.dumps(search_source(query), separators=(",", ":")) + }, + }, + "references": [ + { + "name": "kibanaSavedObjectMeta.searchSourceJSON.index", + "type": "index-pattern", + "id": index_id, + } + ], + } + + +def search_object(object_id: str, title: str, columns: list[str], query: str, index_id: str) -> dict: + return { + "type": "search", + "id": object_id, + "attributes": { + "title": title, + "description": "", + "columns": columns, + "sort": [["@timestamp", "desc"]], + "kibanaSavedObjectMeta": { + "searchSourceJSON": json.dumps(search_source(query), separators=(",", ":")) + }, + }, + "references": [ + { + "name": "kibanaSavedObjectMeta.searchSourceJSON.index", + "type": "index-pattern", + "id": index_id, + } + ], + } + + +def panel(panel_id: str, panel_type: str, grid: dict, index: int) -> dict: + return { + "panelIndex": str(index), + "gridData": grid, + "id": panel_id, + "type": panel_type, + "version": DASHBOARD_VERSION, + "embeddableConfig": {}, + } + + +def dashboard_object(object_id: str, title: str, panels: list[dict]) -> dict: + return { + "type": "dashboard", + "id": object_id, + "attributes": { + "title": title, + "description": "", + "hits": 0, + "panelsJSON": json.dumps(panels, separators=(",", ":")), + "optionsJSON": json.dumps({"useMargins": True, "hidePanelTitles": False}, separators=(",", ":")), + "version": 1, + "timeRestore": False, + "kibanaSavedObjectMeta": { + "searchSourceJSON": json.dumps({"query": {"language": "kuery", "query": ""}, "filter": []}) + }, + }, + } + + +def app_dashboard_objects(app: AppSpec) -> list[dict]: + prefix = f"logs-{app.slug}" + objects = [] + + if app.kind == "journald": + columns = ["@timestamp", "_HOSTNAME", "_SYSTEMD_UNIT", "MESSAGE"] + objects.append(histogram_vis(f"{prefix}-volume", f"{app.title} logs", app.query, app.index_id)) + objects.append(histogram_vis(f"{prefix}-errors", f"{app.title} errors", error_query(app.query), app.index_id)) + objects.append(table_vis(f"{prefix}-top-units", "Top units", "_SYSTEMD_UNIT.keyword", app.query, app.index_id)) + objects.append(search_object(f"{prefix}-recent", "Recent logs", columns, app.query, app.index_id)) + objects.append( + search_object( + f"{prefix}-recent-errors", + "Recent errors", + columns, + error_query(app.query), + app.index_id, + ) + ) + panels = [ + panel(f"{prefix}-volume", "visualization", {"x": 0, "y": 0, "w": 24, "h": 8, "i": "1"}, 1), + panel(f"{prefix}-errors", "visualization", {"x": 0, "y": 8, "w": 24, "h": 6, "i": "2"}, 2), + panel(f"{prefix}-top-units", "visualization", {"x": 0, "y": 14, "w": 12, "h": 8, "i": "3"}, 3), + panel(f"{prefix}-recent", "search", {"x": 12, "y": 14, "w": 12, "h": 8, "i": "4"}, 4), + panel(f"{prefix}-recent-errors", "search", {"x": 0, "y": 22, "w": 24, "h": 8, "i": "5"}, 5), + ] + objects.append(dashboard_object(prefix, f"{app.title} Logs", panels)) + return objects + + columns = ["@timestamp", "kubernetes.pod_name", "kubernetes.container_name", "log", "message"] + objects.append(histogram_vis(f"{prefix}-volume", f"{app.title} logs", app.query, app.index_id)) + objects.append(histogram_vis(f"{prefix}-errors", f"{app.title} errors", error_query(app.query), app.index_id)) + objects.append(table_vis(f"{prefix}-top-pods", "Top pods", "kubernetes.pod_name.keyword", app.query, app.index_id)) + objects.append( + table_vis(f"{prefix}-top-containers", "Top containers", "kubernetes.container_name.keyword", app.query, app.index_id) + ) + objects.append(search_object(f"{prefix}-recent", "Recent logs", columns, app.query, app.index_id)) + objects.append( + search_object( + f"{prefix}-recent-errors", + "Recent errors", + columns, + error_query(app.query), + app.index_id, + ) + ) + panels = [ + panel(f"{prefix}-volume", "visualization", {"x": 0, "y": 0, "w": 24, "h": 8, "i": "1"}, 1), + panel(f"{prefix}-errors", "visualization", {"x": 0, "y": 8, "w": 24, "h": 6, "i": "2"}, 2), + panel(f"{prefix}-top-pods", "visualization", {"x": 0, "y": 14, "w": 12, "h": 8, "i": "3"}, 3), + panel( + f"{prefix}-top-containers", + "visualization", + {"x": 12, "y": 14, "w": 12, "h": 8, "i": "4"}, + 4, + ), + panel(f"{prefix}-recent", "search", {"x": 0, "y": 22, "w": 12, "h": 8, "i": "5"}, 5), + panel(f"{prefix}-recent-errors", "search", {"x": 12, "y": 22, "w": 12, "h": 8, "i": "6"}, 6), + ] + objects.append(dashboard_object(prefix, f"{app.title} Logs", panels)) + return objects + + +def overview_objects() -> list[dict]: + objects = [] + objects.append(histogram_vis("logs-overview-volume", "Logs per minute", "*", "kube-logs")) + objects.append(histogram_vis("logs-overview-errors", "Errors per minute", error_query(), "kube-logs")) + objects.append( + table_vis( + "logs-overview-top-ns", + "Top namespaces", + "kubernetes.namespace_name.keyword", + "*", + "kube-logs", + ) + ) + objects.append(table_vis("logs-overview-top-pods", "Top pods", "kubernetes.pod_name.keyword", "*", "kube-logs")) + objects.append( + search_object( + "logs-overview-recent-errors", + "Recent errors", + ["@timestamp", "kubernetes.namespace_name", "kubernetes.pod_name", "log", "message"], + error_query(), + "kube-logs", + ) + ) + panels = [ + panel("logs-overview-volume", "visualization", {"x": 0, "y": 0, "w": 24, "h": 8, "i": "1"}, 1), + panel("logs-overview-errors", "visualization", {"x": 0, "y": 8, "w": 24, "h": 6, "i": "2"}, 2), + panel("logs-overview-top-ns", "visualization", {"x": 0, "y": 14, "w": 12, "h": 8, "i": "3"}, 3), + panel("logs-overview-top-pods", "visualization", {"x": 12, "y": 14, "w": 12, "h": 8, "i": "4"}, 4), + panel("logs-overview-recent-errors", "search", {"x": 0, "y": 22, "w": 24, "h": 8, "i": "5"}, 5), + ] + objects.append(dashboard_object("logs-overview", "Atlas Logs Overview", panels)) + return objects + + +def build_objects() -> list[dict]: + objects = [ + index_pattern("kube-logs", "kube-*"), + index_pattern("journald-logs", "journald-*"), + ] + + objects.extend(overview_objects()) + + apps = [ + AppSpec("bstein-dev-home", "bstein-dev-home", 'kubernetes.namespace_name: "bstein-dev-home"'), + AppSpec( + "pegasus", + "pegasus", + 'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "pegasus"', + ), + AppSpec("vaultwarden", "vaultwarden", 'kubernetes.namespace_name: "vaultwarden"'), + AppSpec("mailu", "mailu", 'kubernetes.namespace_name: "mailu-mailserver"'), + AppSpec("nextcloud", "nextcloud", 'kubernetes.namespace_name: "nextcloud"'), + AppSpec("gitea", "gitea", 'kubernetes.namespace_name: "gitea"'), + AppSpec("jenkins", "jenkins", 'kubernetes.namespace_name: "jenkins"'), + AppSpec("harbor", "harbor", 'kubernetes.namespace_name: "harbor"'), + AppSpec("vault", "vault", 'kubernetes.namespace_name: "vault"'), + AppSpec("keycloak", "keycloak", 'kubernetes.namespace_name: "sso"'), + AppSpec("jellyfin", "jellyfin", 'kubernetes.namespace_name: "jellyfin"'), + AppSpec("flux-system", "flux-system", 'kubernetes.namespace_name: "flux-system"'), + AppSpec("comms", "comms", 'kubernetes.namespace_name: "comms"'), + AppSpec("lesavka", "lesavka", '_HOSTNAME: "titan-jh"', index_id="journald-logs", kind="journald"), + ] + + for app in apps: + objects.extend(app_dashboard_objects(app)) + + return objects + + +def write_ndjson(objects: list[dict], path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + payload = "\n".join(json_line(obj) for obj in objects) + path.write_text(payload + "\n") + + +def render_configmap(ndjson_path: Path, output_path: Path) -> None: + payload_lines = ndjson_path.read_text().splitlines() + payload = "\n".join(" " + line for line in payload_lines) + relative_path = output_path.relative_to(ROOT) + output_path.write_text(CONFIG_TEMPLATE.format(relative_path=relative_path, payload=payload)) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--build", action="store_true", help="Regenerate saved object NDJSON and ConfigMap") + args = parser.parse_args() + + if args.build: + objects = build_objects() + write_ndjson(objects, NDJSON_PATH) + + if not NDJSON_PATH.exists(): + raise SystemExit(f"Missing NDJSON file: {NDJSON_PATH}. Run with --build first.") + + render_configmap(NDJSON_PATH, CONFIG_PATH) + + +if __name__ == "__main__": + main() diff --git a/services/logging/dashboards/logs.ndjson b/services/logging/dashboards/logs.ndjson new file mode 100644 index 0000000..67466c1 --- /dev/null +++ b/services/logging/dashboards/logs.ndjson @@ -0,0 +1,105 @@ +{"type":"index-pattern","id":"kube-logs","attributes":{"title":"kube-*","timeFieldName":"@timestamp"}} +{"type":"index-pattern","id":"journald-logs","attributes":{"title":"journald-*","timeFieldName":"@timestamp"}} +{"type":"visualization","id":"logs-overview-volume","attributes":{"title":"Logs per minute","visState":"{\"title\":\"Logs per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-errors","attributes":{"title":"Errors per minute","visState":"{\"title\":\"Errors per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-overview-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.namespace_name","kubernetes.pod_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-bstein-dev-home-volume","attributes":{"title":"bstein-dev-home logs","visState":"{\"title\":\"bstein-dev-home logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-bstein-dev-home-errors","attributes":{"title":"bstein-dev-home errors","visState":"{\"title\":\"bstein-dev-home errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-bstein-dev-home-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-bstein-dev-home-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-pegasus-volume","attributes":{"title":"pegasus logs","visState":"{\"title\":\"pegasus logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-pegasus-errors","attributes":{"title":"pegasus errors","visState":"{\"title\":\"pegasus errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-pegasus-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-pegasus-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-vaultwarden-volume","attributes":{"title":"vaultwarden logs","visState":"{\"title\":\"vaultwarden logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vaultwarden-errors","attributes":{"title":"vaultwarden errors","visState":"{\"title\":\"vaultwarden errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-vaultwarden-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-vaultwarden-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-mailu-volume","attributes":{"title":"mailu logs","visState":"{\"title\":\"mailu logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-mailu-errors","attributes":{"title":"mailu errors","visState":"{\"title\":\"mailu errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-mailu-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-mailu-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-nextcloud-volume","attributes":{"title":"nextcloud logs","visState":"{\"title\":\"nextcloud logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-nextcloud-errors","attributes":{"title":"nextcloud errors","visState":"{\"title\":\"nextcloud errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-nextcloud-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-nextcloud-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-gitea-volume","attributes":{"title":"gitea logs","visState":"{\"title\":\"gitea logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-gitea-errors","attributes":{"title":"gitea errors","visState":"{\"title\":\"gitea errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-gitea-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-gitea-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-jenkins-volume","attributes":{"title":"jenkins logs","visState":"{\"title\":\"jenkins logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jenkins-errors","attributes":{"title":"jenkins errors","visState":"{\"title\":\"jenkins errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-jenkins-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-jenkins-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-harbor-volume","attributes":{"title":"harbor logs","visState":"{\"title\":\"harbor logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-harbor-errors","attributes":{"title":"harbor errors","visState":"{\"title\":\"harbor errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-harbor-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-harbor-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-vault-volume","attributes":{"title":"vault logs","visState":"{\"title\":\"vault logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vault-errors","attributes":{"title":"vault errors","visState":"{\"title\":\"vault errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-vault-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-vault-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-keycloak-volume","attributes":{"title":"keycloak logs","visState":"{\"title\":\"keycloak logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-keycloak-errors","attributes":{"title":"keycloak errors","visState":"{\"title\":\"keycloak errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-keycloak-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-keycloak-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-flux-system-volume","attributes":{"title":"flux-system logs","visState":"{\"title\":\"flux-system logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-flux-system-errors","attributes":{"title":"flux-system errors","visState":"{\"title\":\"flux-system errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-flux-system-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-flux-system-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-comms-volume","attributes":{"title":"comms logs","visState":"{\"title\":\"comms logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-comms-errors","attributes":{"title":"comms errors","visState":"{\"title\":\"comms errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-comms-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-comms-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-lesavka-volume","attributes":{"title":"lesavka logs","visState":"{\"title\":\"lesavka logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} +{"type":"visualization","id":"logs-lesavka-errors","attributes":{"title":"lesavka errors","visState":"{\"title\":\"lesavka errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} +{"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} +{"type":"search","id":"logs-lesavka-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} +{"type":"search","id":"logs-lesavka-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} +{"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index c7b520c..5043ccc 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -6,6 +6,7 @@ resources: - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml + - opensearch-dashboards-objects.yaml - opensearch-ism-job.yaml - opensearch-dashboards-setup-job.yaml - opensearch-prune-cronjob.yaml diff --git a/services/logging/opensearch-dashboards-objects.yaml b/services/logging/opensearch-dashboards-objects.yaml new file mode 100644 index 0000000..24b49c8 --- /dev/null +++ b/services/logging/opensearch-dashboards-objects.yaml @@ -0,0 +1,114 @@ +# services/logging/opensearch-dashboards-objects.yaml +# Generated by scripts/dashboards_render_logs.py --build +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-dashboards-objects + namespace: logging +data: + objects.ndjson: | + {"type":"index-pattern","id":"kube-logs","attributes":{"title":"kube-*","timeFieldName":"@timestamp"}} + {"type":"index-pattern","id":"journald-logs","attributes":{"title":"journald-*","timeFieldName":"@timestamp"}} + {"type":"visualization","id":"logs-overview-volume","attributes":{"title":"Logs per minute","visState":"{\"title\":\"Logs per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-errors","attributes":{"title":"Errors per minute","visState":"{\"title\":\"Errors per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-overview-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.namespace_name","kubernetes.pod_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-bstein-dev-home-volume","attributes":{"title":"bstein-dev-home logs","visState":"{\"title\":\"bstein-dev-home logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-bstein-dev-home-errors","attributes":{"title":"bstein-dev-home errors","visState":"{\"title\":\"bstein-dev-home errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-bstein-dev-home-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-bstein-dev-home-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-pegasus-volume","attributes":{"title":"pegasus logs","visState":"{\"title\":\"pegasus logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-pegasus-errors","attributes":{"title":"pegasus errors","visState":"{\"title\":\"pegasus errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-pegasus-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-pegasus-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-vaultwarden-volume","attributes":{"title":"vaultwarden logs","visState":"{\"title\":\"vaultwarden logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vaultwarden-errors","attributes":{"title":"vaultwarden errors","visState":"{\"title\":\"vaultwarden errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-vaultwarden-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-vaultwarden-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-mailu-volume","attributes":{"title":"mailu logs","visState":"{\"title\":\"mailu logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-mailu-errors","attributes":{"title":"mailu errors","visState":"{\"title\":\"mailu errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-mailu-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-mailu-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-nextcloud-volume","attributes":{"title":"nextcloud logs","visState":"{\"title\":\"nextcloud logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-nextcloud-errors","attributes":{"title":"nextcloud errors","visState":"{\"title\":\"nextcloud errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-nextcloud-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-nextcloud-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-gitea-volume","attributes":{"title":"gitea logs","visState":"{\"title\":\"gitea logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-gitea-errors","attributes":{"title":"gitea errors","visState":"{\"title\":\"gitea errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-gitea-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-gitea-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-jenkins-volume","attributes":{"title":"jenkins logs","visState":"{\"title\":\"jenkins logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jenkins-errors","attributes":{"title":"jenkins errors","visState":"{\"title\":\"jenkins errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-jenkins-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-jenkins-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-harbor-volume","attributes":{"title":"harbor logs","visState":"{\"title\":\"harbor logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-harbor-errors","attributes":{"title":"harbor errors","visState":"{\"title\":\"harbor errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-harbor-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-harbor-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-vault-volume","attributes":{"title":"vault logs","visState":"{\"title\":\"vault logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vault-errors","attributes":{"title":"vault errors","visState":"{\"title\":\"vault errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-vault-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-vault-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-keycloak-volume","attributes":{"title":"keycloak logs","visState":"{\"title\":\"keycloak logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-keycloak-errors","attributes":{"title":"keycloak errors","visState":"{\"title\":\"keycloak errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-keycloak-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-keycloak-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-flux-system-volume","attributes":{"title":"flux-system logs","visState":"{\"title\":\"flux-system logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-flux-system-errors","attributes":{"title":"flux-system errors","visState":"{\"title\":\"flux-system errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-flux-system-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-flux-system-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-comms-volume","attributes":{"title":"comms logs","visState":"{\"title\":\"comms logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-comms-errors","attributes":{"title":"comms errors","visState":"{\"title\":\"comms errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-comms-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-comms-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-lesavka-volume","attributes":{"title":"lesavka logs","visState":"{\"title\":\"lesavka logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} + {"type":"visualization","id":"logs-lesavka-errors","attributes":{"title":"lesavka errors","visState":"{\"title\":\"lesavka errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} + {"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} + {"type":"search","id":"logs-lesavka-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} + {"type":"search","id":"logs-lesavka-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} + {"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} diff --git a/services/logging/opensearch-dashboards-setup-job.yaml b/services/logging/opensearch-dashboards-setup-job.yaml index fa308cc..91f396c 100644 --- a/services/logging/opensearch-dashboards-setup-job.yaml +++ b/services/logging/opensearch-dashboards-setup-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-dashboards-setup-1 + name: opensearch-dashboards-setup-2 namespace: logging spec: backoffLimit: 3 @@ -45,19 +45,24 @@ spec: exit 1 fi - create_view() { - view_id="$1" - title="$2" - curl -sS -X POST "${OSD_URL}/api/saved_objects/index-pattern/${view_id}?overwrite=true" \ - -H 'Content-Type: application/json' \ - -H 'osd-xsrf: true' \ - -d "{\"attributes\":{\"title\":\"${title}\",\"timeFieldName\":\"@timestamp\"}}" >/dev/null - } + if [ ! -s /config/objects.ndjson ]; then + echo "Saved objects file not found at /config/objects.ndjson" >&2 + exit 1 + fi - create_view kube-logs "kube-*" - create_view journald-logs "journald-*" + curl -sS -X POST "${OSD_URL}/api/saved_objects/_import?overwrite=true" \ + -H 'osd-xsrf: true' \ + -F file=@/config/objects.ndjson >/dev/null curl -sS -X POST "${OSD_URL}/api/opensearch-dashboards/settings" \ -H 'Content-Type: application/json' \ -H 'osd-xsrf: true' \ -d '{"changes":{"defaultIndex":"kube-logs"}}' >/dev/null + volumeMounts: + - name: objects + mountPath: /config + readOnly: true + volumes: + - name: objects + configMap: + name: opensearch-dashboards-objects -- 2.47.2 From dd705aeb4a64d501615c5c929348328275ec0445 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 22:55:39 -0300 Subject: [PATCH 621/684] logging: expand OpenSearch dashboards --- scripts/dashboards_render_logs.py | 152 +++++++++++++++--- services/logging/dashboards/logs.ndjson | 79 ++++++--- .../opensearch-dashboards-objects.yaml | 79 ++++++--- .../opensearch-dashboards-setup-job.yaml | 24 ++- 4 files changed, 268 insertions(+), 66 deletions(-) diff --git a/scripts/dashboards_render_logs.py b/scripts/dashboards_render_logs.py index dc83f81..9284b93 100755 --- a/scripts/dashboards_render_logs.py +++ b/scripts/dashboards_render_logs.py @@ -34,6 +34,12 @@ data: ) DASHBOARD_VERSION = "7.10.0" +GRID_COLUMNS = 48 +GRID_HALF = GRID_COLUMNS // 2 +H_CHART = 10 +H_ERRORS = 8 +H_TABLE = 12 +H_SEARCH = 14 ERROR_TERMS = ("*error*", "*exception*", "*fail*") @@ -181,10 +187,14 @@ def search_object(object_id: str, title: str, columns: list[str], query: str, in } -def panel(panel_id: str, panel_type: str, grid: dict, index: int) -> dict: +def grid(x: int, y: int, w: int, h: int, i: int) -> dict: + return {"x": x, "y": y, "w": w, "h": h, "i": str(i)} + + +def panel(panel_id: str, panel_type: str, grid_data: dict, index: int) -> dict: return { "panelIndex": str(index), - "gridData": grid, + "gridData": grid_data, "id": panel_id, "type": panel_type, "version": DASHBOARD_VERSION, @@ -231,11 +241,26 @@ def app_dashboard_objects(app: AppSpec) -> list[dict]: ) ) panels = [ - panel(f"{prefix}-volume", "visualization", {"x": 0, "y": 0, "w": 24, "h": 8, "i": "1"}, 1), - panel(f"{prefix}-errors", "visualization", {"x": 0, "y": 8, "w": 24, "h": 6, "i": "2"}, 2), - panel(f"{prefix}-top-units", "visualization", {"x": 0, "y": 14, "w": 12, "h": 8, "i": "3"}, 3), - panel(f"{prefix}-recent", "search", {"x": 12, "y": 14, "w": 12, "h": 8, "i": "4"}, 4), - panel(f"{prefix}-recent-errors", "search", {"x": 0, "y": 22, "w": 24, "h": 8, "i": "5"}, 5), + panel(f"{prefix}-volume", "visualization", grid(0, 0, GRID_COLUMNS, H_CHART, 1), 1), + panel(f"{prefix}-errors", "visualization", grid(0, H_CHART, GRID_COLUMNS, H_ERRORS, 2), 2), + panel( + f"{prefix}-top-units", + "visualization", + grid(0, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 3), + 3, + ), + panel( + f"{prefix}-recent", + "search", + grid(GRID_HALF, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 4), + 4, + ), + panel( + f"{prefix}-recent-errors", + "search", + grid(0, H_CHART + H_ERRORS + H_TABLE, GRID_COLUMNS, H_SEARCH, 5), + 5, + ), ] objects.append(dashboard_object(prefix, f"{app.title} Logs", panels)) return objects @@ -258,17 +283,32 @@ def app_dashboard_objects(app: AppSpec) -> list[dict]: ) ) panels = [ - panel(f"{prefix}-volume", "visualization", {"x": 0, "y": 0, "w": 24, "h": 8, "i": "1"}, 1), - panel(f"{prefix}-errors", "visualization", {"x": 0, "y": 8, "w": 24, "h": 6, "i": "2"}, 2), - panel(f"{prefix}-top-pods", "visualization", {"x": 0, "y": 14, "w": 12, "h": 8, "i": "3"}, 3), + panel(f"{prefix}-volume", "visualization", grid(0, 0, GRID_COLUMNS, H_CHART, 1), 1), + panel(f"{prefix}-errors", "visualization", grid(0, H_CHART, GRID_COLUMNS, H_ERRORS, 2), 2), + panel( + f"{prefix}-top-pods", + "visualization", + grid(0, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 3), + 3, + ), panel( f"{prefix}-top-containers", "visualization", - {"x": 12, "y": 14, "w": 12, "h": 8, "i": "4"}, + grid(GRID_HALF, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 4), 4, ), - panel(f"{prefix}-recent", "search", {"x": 0, "y": 22, "w": 12, "h": 8, "i": "5"}, 5), - panel(f"{prefix}-recent-errors", "search", {"x": 12, "y": 22, "w": 12, "h": 8, "i": "6"}, 6), + panel( + f"{prefix}-recent", + "search", + grid(0, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_SEARCH, 5), + 5, + ), + panel( + f"{prefix}-recent-errors", + "search", + grid(GRID_HALF, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_SEARCH, 6), + 6, + ), ] objects.append(dashboard_object(prefix, f"{app.title} Logs", panels)) return objects @@ -287,7 +327,25 @@ def overview_objects() -> list[dict]: "kube-logs", ) ) + objects.append( + table_vis( + "logs-overview-top-error-ns", + "Top error namespaces", + "kubernetes.namespace_name.keyword", + error_query(), + "kube-logs", + ) + ) objects.append(table_vis("logs-overview-top-pods", "Top pods", "kubernetes.pod_name.keyword", "*", "kube-logs")) + objects.append( + table_vis( + "logs-overview-top-nodes", + "Top nodes", + "kubernetes.node_name.keyword", + "*", + "kube-logs", + ) + ) objects.append( search_object( "logs-overview-recent-errors", @@ -298,11 +356,38 @@ def overview_objects() -> list[dict]: ) ) panels = [ - panel("logs-overview-volume", "visualization", {"x": 0, "y": 0, "w": 24, "h": 8, "i": "1"}, 1), - panel("logs-overview-errors", "visualization", {"x": 0, "y": 8, "w": 24, "h": 6, "i": "2"}, 2), - panel("logs-overview-top-ns", "visualization", {"x": 0, "y": 14, "w": 12, "h": 8, "i": "3"}, 3), - panel("logs-overview-top-pods", "visualization", {"x": 12, "y": 14, "w": 12, "h": 8, "i": "4"}, 4), - panel("logs-overview-recent-errors", "search", {"x": 0, "y": 22, "w": 24, "h": 8, "i": "5"}, 5), + panel("logs-overview-volume", "visualization", grid(0, 0, GRID_COLUMNS, H_CHART, 1), 1), + panel("logs-overview-errors", "visualization", grid(0, H_CHART, GRID_COLUMNS, H_ERRORS, 2), 2), + panel( + "logs-overview-top-ns", + "visualization", + grid(0, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 3), + 3, + ), + panel( + "logs-overview-top-error-ns", + "visualization", + grid(GRID_HALF, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 4), + 4, + ), + panel( + "logs-overview-top-pods", + "visualization", + grid(0, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_TABLE, 5), + 5, + ), + panel( + "logs-overview-top-nodes", + "visualization", + grid(GRID_HALF, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_TABLE, 6), + 6, + ), + panel( + "logs-overview-recent-errors", + "search", + grid(0, H_CHART + H_ERRORS + (H_TABLE * 2), GRID_COLUMNS, H_SEARCH, 7), + 7, + ), ] objects.append(dashboard_object("logs-overview", "Atlas Logs Overview", panels)) return objects @@ -323,6 +408,11 @@ def build_objects() -> list[dict]: "pegasus", 'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "pegasus"', ), + AppSpec( + "jellyfin", + "jellyfin", + 'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "jellyfin"', + ), AppSpec("vaultwarden", "vaultwarden", 'kubernetes.namespace_name: "vaultwarden"'), AppSpec("mailu", "mailu", 'kubernetes.namespace_name: "mailu-mailserver"'), AppSpec("nextcloud", "nextcloud", 'kubernetes.namespace_name: "nextcloud"'), @@ -331,9 +421,33 @@ def build_objects() -> list[dict]: AppSpec("harbor", "harbor", 'kubernetes.namespace_name: "harbor"'), AppSpec("vault", "vault", 'kubernetes.namespace_name: "vault"'), AppSpec("keycloak", "keycloak", 'kubernetes.namespace_name: "sso"'), - AppSpec("jellyfin", "jellyfin", 'kubernetes.namespace_name: "jellyfin"'), AppSpec("flux-system", "flux-system", 'kubernetes.namespace_name: "flux-system"'), AppSpec("comms", "comms", 'kubernetes.namespace_name: "comms"'), + AppSpec( + "element-web", + "element-web", + 'kubernetes.namespace_name: "comms" and kubernetes.container_name: "element-web"', + ), + AppSpec( + "element-call", + "element-call", + 'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "element-call"', + ), + AppSpec( + "matrix-synapse", + "matrix-synapse", + 'kubernetes.namespace_name: "comms" and kubernetes.container_name: "synapse"', + ), + AppSpec( + "livekit", + "livekit", + 'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "livekit"', + ), + AppSpec( + "coturn", + "coturn", + 'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "coturn"', + ), AppSpec("lesavka", "lesavka", '_HOSTNAME: "titan-jh"', index_id="journald-logs", kind="journald"), ] diff --git a/services/logging/dashboards/logs.ndjson b/services/logging/dashboards/logs.ndjson index 67466c1..56f0e5e 100644 --- a/services/logging/dashboards/logs.ndjson +++ b/services/logging/dashboards/logs.ndjson @@ -3,103 +3,140 @@ {"type":"visualization","id":"logs-overview-volume","attributes":{"title":"Logs per minute","visState":"{\"title\":\"Logs per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-errors","attributes":{"title":"Errors per minute","visState":"{\"title\":\"Errors per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-error-ns","attributes":{"title":"Top error namespaces","visState":"{\"title\":\"Top error namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-nodes","attributes":{"title":"Top nodes","visState":"{\"title\":\"Top nodes\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.node_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-overview-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.namespace_name","kubernetes.pod_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-overview-top-error-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":12,\"i\":\"5\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":12,\"i\":\"6\"},\"id\":\"logs-overview-top-nodes\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":42,\"w\":48,\"h\":14,\"i\":\"7\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-bstein-dev-home-volume","attributes":{"title":"bstein-dev-home logs","visState":"{\"title\":\"bstein-dev-home logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-errors","attributes":{"title":"bstein-dev-home errors","visState":"{\"title\":\"bstein-dev-home errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-pegasus-volume","attributes":{"title":"pegasus logs","visState":"{\"title\":\"pegasus logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-errors","attributes":{"title":"pegasus errors","visState":"{\"title\":\"pegasus errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vaultwarden-volume","attributes":{"title":"vaultwarden logs","visState":"{\"title\":\"vaultwarden logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-errors","attributes":{"title":"vaultwarden errors","visState":"{\"title\":\"vaultwarden errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-mailu-volume","attributes":{"title":"mailu logs","visState":"{\"title\":\"mailu logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-errors","attributes":{"title":"mailu errors","visState":"{\"title\":\"mailu errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-nextcloud-volume","attributes":{"title":"nextcloud logs","visState":"{\"title\":\"nextcloud logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-errors","attributes":{"title":"nextcloud errors","visState":"{\"title\":\"nextcloud errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-gitea-volume","attributes":{"title":"gitea logs","visState":"{\"title\":\"gitea logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-errors","attributes":{"title":"gitea errors","visState":"{\"title\":\"gitea errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-jenkins-volume","attributes":{"title":"jenkins logs","visState":"{\"title\":\"jenkins logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-errors","attributes":{"title":"jenkins errors","visState":"{\"title\":\"jenkins errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-harbor-volume","attributes":{"title":"harbor logs","visState":"{\"title\":\"harbor logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-errors","attributes":{"title":"harbor errors","visState":"{\"title\":\"harbor errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vault-volume","attributes":{"title":"vault logs","visState":"{\"title\":\"vault logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-errors","attributes":{"title":"vault errors","visState":"{\"title\":\"vault errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-keycloak-volume","attributes":{"title":"keycloak logs","visState":"{\"title\":\"keycloak logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-errors","attributes":{"title":"keycloak errors","visState":"{\"title\":\"keycloak errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} -{"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-flux-system-volume","attributes":{"title":"flux-system logs","visState":"{\"title\":\"flux-system logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-errors","attributes":{"title":"flux-system errors","visState":"{\"title\":\"flux-system errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-comms-volume","attributes":{"title":"comms logs","visState":"{\"title\":\"comms logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-errors","attributes":{"title":"comms errors","visState":"{\"title\":\"comms errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-element-web-volume","attributes":{"title":"element-web logs","visState":"{\"title\":\"element-web logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-web-errors","attributes":{"title":"element-web errors","visState":"{\"title\":\"element-web errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-web-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-web-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-element-web-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-element-web-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-element-web","attributes":{"title":"element-web Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-web-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-web-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-web-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-web-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-web-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-web-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-element-call-volume","attributes":{"title":"element-call logs","visState":"{\"title\":\"element-call logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-call-errors","attributes":{"title":"element-call errors","visState":"{\"title\":\"element-call errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-call-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-call-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-element-call-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-element-call-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-element-call","attributes":{"title":"element-call Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-call-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-call-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-call-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-call-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-call-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-call-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-matrix-synapse-volume","attributes":{"title":"matrix-synapse logs","visState":"{\"title\":\"matrix-synapse logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-matrix-synapse-errors","attributes":{"title":"matrix-synapse errors","visState":"{\"title\":\"matrix-synapse errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-matrix-synapse-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-matrix-synapse-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-matrix-synapse-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-matrix-synapse-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-matrix-synapse","attributes":{"title":"matrix-synapse Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-matrix-synapse-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-matrix-synapse-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-matrix-synapse-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-matrix-synapse-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-matrix-synapse-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-matrix-synapse-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-livekit-volume","attributes":{"title":"livekit logs","visState":"{\"title\":\"livekit logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-livekit-errors","attributes":{"title":"livekit errors","visState":"{\"title\":\"livekit errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-livekit-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-livekit-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-livekit-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-livekit-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-livekit","attributes":{"title":"livekit Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-livekit-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-livekit-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-livekit-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-livekit-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-livekit-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-livekit-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"visualization","id":"logs-coturn-volume","attributes":{"title":"coturn logs","visState":"{\"title\":\"coturn logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-coturn-errors","attributes":{"title":"coturn errors","visState":"{\"title\":\"coturn errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-coturn-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-coturn-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-coturn-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"search","id":"logs-coturn-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"dashboard","id":"logs-coturn","attributes":{"title":"coturn Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-coturn-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-coturn-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-coturn-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-coturn-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-coturn-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-coturn-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-lesavka-volume","attributes":{"title":"lesavka logs","visState":"{\"title\":\"lesavka logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"visualization","id":"logs-lesavka-errors","attributes":{"title":"lesavka errors","visState":"{\"title\":\"lesavka errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} -{"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":48,\"h\":14,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} diff --git a/services/logging/opensearch-dashboards-objects.yaml b/services/logging/opensearch-dashboards-objects.yaml index 24b49c8..6cbef5b 100644 --- a/services/logging/opensearch-dashboards-objects.yaml +++ b/services/logging/opensearch-dashboards-objects.yaml @@ -12,103 +12,140 @@ data: {"type":"visualization","id":"logs-overview-volume","attributes":{"title":"Logs per minute","visState":"{\"title\":\"Logs per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-errors","attributes":{"title":"Errors per minute","visState":"{\"title\":\"Errors per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-error-ns","attributes":{"title":"Top error namespaces","visState":"{\"title\":\"Top error namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-nodes","attributes":{"title":"Top nodes","visState":"{\"title\":\"Top nodes\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.node_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-overview-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.namespace_name","kubernetes.pod_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-overview-top-error-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":12,\"i\":\"5\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":12,\"i\":\"6\"},\"id\":\"logs-overview-top-nodes\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":42,\"w\":48,\"h\":14,\"i\":\"7\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-bstein-dev-home-volume","attributes":{"title":"bstein-dev-home logs","visState":"{\"title\":\"bstein-dev-home logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-errors","attributes":{"title":"bstein-dev-home errors","visState":"{\"title\":\"bstein-dev-home errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-pegasus-volume","attributes":{"title":"pegasus logs","visState":"{\"title\":\"pegasus logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-errors","attributes":{"title":"pegasus errors","visState":"{\"title\":\"pegasus errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vaultwarden-volume","attributes":{"title":"vaultwarden logs","visState":"{\"title\":\"vaultwarden logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-errors","attributes":{"title":"vaultwarden errors","visState":"{\"title\":\"vaultwarden errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-mailu-volume","attributes":{"title":"mailu logs","visState":"{\"title\":\"mailu logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-errors","attributes":{"title":"mailu errors","visState":"{\"title\":\"mailu errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-nextcloud-volume","attributes":{"title":"nextcloud logs","visState":"{\"title\":\"nextcloud logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-errors","attributes":{"title":"nextcloud errors","visState":"{\"title\":\"nextcloud errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-gitea-volume","attributes":{"title":"gitea logs","visState":"{\"title\":\"gitea logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-errors","attributes":{"title":"gitea errors","visState":"{\"title\":\"gitea errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-jenkins-volume","attributes":{"title":"jenkins logs","visState":"{\"title\":\"jenkins logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-errors","attributes":{"title":"jenkins errors","visState":"{\"title\":\"jenkins errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-harbor-volume","attributes":{"title":"harbor logs","visState":"{\"title\":\"harbor logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-errors","attributes":{"title":"harbor errors","visState":"{\"title\":\"harbor errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vault-volume","attributes":{"title":"vault logs","visState":"{\"title\":\"vault logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-errors","attributes":{"title":"vault errors","visState":"{\"title\":\"vault errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-keycloak-volume","attributes":{"title":"keycloak logs","visState":"{\"title\":\"keycloak logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-errors","attributes":{"title":"keycloak errors","visState":"{\"title\":\"keycloak errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} - {"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-flux-system-volume","attributes":{"title":"flux-system logs","visState":"{\"title\":\"flux-system logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-errors","attributes":{"title":"flux-system errors","visState":"{\"title\":\"flux-system errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-comms-volume","attributes":{"title":"comms logs","visState":"{\"title\":\"comms logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-errors","attributes":{"title":"comms errors","visState":"{\"title\":\"comms errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":12,\"h\":8,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":12,\"y\":22,\"w\":12,\"h\":8,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-element-web-volume","attributes":{"title":"element-web logs","visState":"{\"title\":\"element-web logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-web-errors","attributes":{"title":"element-web errors","visState":"{\"title\":\"element-web errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-web-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-web-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-element-web-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-element-web-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-element-web","attributes":{"title":"element-web Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-web-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-web-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-web-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-web-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-web-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-web-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-element-call-volume","attributes":{"title":"element-call logs","visState":"{\"title\":\"element-call logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-call-errors","attributes":{"title":"element-call errors","visState":"{\"title\":\"element-call errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-call-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-call-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-element-call-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-element-call-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-element-call","attributes":{"title":"element-call Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-call-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-call-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-call-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-call-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-call-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-call-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-matrix-synapse-volume","attributes":{"title":"matrix-synapse logs","visState":"{\"title\":\"matrix-synapse logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-matrix-synapse-errors","attributes":{"title":"matrix-synapse errors","visState":"{\"title\":\"matrix-synapse errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-matrix-synapse-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-matrix-synapse-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-matrix-synapse-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-matrix-synapse-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-matrix-synapse","attributes":{"title":"matrix-synapse Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-matrix-synapse-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-matrix-synapse-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-matrix-synapse-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-matrix-synapse-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-matrix-synapse-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-matrix-synapse-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-livekit-volume","attributes":{"title":"livekit logs","visState":"{\"title\":\"livekit logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-livekit-errors","attributes":{"title":"livekit errors","visState":"{\"title\":\"livekit errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-livekit-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-livekit-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-livekit-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-livekit-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-livekit","attributes":{"title":"livekit Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-livekit-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-livekit-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-livekit-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-livekit-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-livekit-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-livekit-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"visualization","id":"logs-coturn-volume","attributes":{"title":"coturn logs","visState":"{\"title\":\"coturn logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-coturn-errors","attributes":{"title":"coturn errors","visState":"{\"title\":\"coturn errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-coturn-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-coturn-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-coturn-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"search","id":"logs-coturn-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"dashboard","id":"logs-coturn","attributes":{"title":"coturn Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-coturn-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-coturn-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-coturn-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-coturn-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-coturn-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-coturn-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-lesavka-volume","attributes":{"title":"lesavka logs","visState":"{\"title\":\"lesavka logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"visualization","id":"logs-lesavka-errors","attributes":{"title":"lesavka errors","visState":"{\"title\":\"lesavka errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} - {"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":8,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":8,\"w\":24,\"h\":6,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":14,\"w\":12,\"h\":8,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":12,\"y\":14,\"w\":12,\"h\":8,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":22,\"w\":24,\"h\":8,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":48,\"h\":14,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} diff --git a/services/logging/opensearch-dashboards-setup-job.yaml b/services/logging/opensearch-dashboards-setup-job.yaml index 91f396c..ee0bf7b 100644 --- a/services/logging/opensearch-dashboards-setup-job.yaml +++ b/services/logging/opensearch-dashboards-setup-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-dashboards-setup-2 + name: opensearch-dashboards-setup-3 namespace: logging spec: backoffLimit: 3 @@ -50,14 +50,28 @@ spec: exit 1 fi - curl -sS -X POST "${OSD_URL}/api/saved_objects/_import?overwrite=true" \ + import_code="$(curl -sS -o /tmp/import.json -w "%{http_code}" -X POST \ + "${OSD_URL}/api/saved_objects/_import?overwrite=true" \ -H 'osd-xsrf: true' \ - -F file=@/config/objects.ndjson >/dev/null + -F file=@/config/objects.ndjson)" - curl -sS -X POST "${OSD_URL}/api/opensearch-dashboards/settings" \ + if [ "${import_code}" != "200" ]; then + echo "Saved object import failed with status ${import_code}:" >&2 + cat /tmp/import.json >&2 + exit 1 + fi + + settings_code="$(curl -sS -o /tmp/settings.json -w "%{http_code}" -X POST \ + "${OSD_URL}/api/opensearch-dashboards/settings" \ -H 'Content-Type: application/json' \ -H 'osd-xsrf: true' \ - -d '{"changes":{"defaultIndex":"kube-logs"}}' >/dev/null + -d '{"changes":{"defaultIndex":"kube-logs"}}')" + + if [ "${settings_code}" != "200" ]; then + echo "Default index update failed with status ${settings_code}:" >&2 + cat /tmp/settings.json >&2 + exit 1 + fi volumeMounts: - name: objects mountPath: /config -- 2.47.2 From 5072cd0b5a39098734b755466d56a7eaad9abe3b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 10 Jan 2026 02:04:39 +0000 Subject: [PATCH 622/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 2a717a3..d8c31c1 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-86 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From c531c206c5292343addb1a27c40080c65436cb0f Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 10 Jan 2026 02:05:39 +0000 Subject: [PATCH 623/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 3a8ac2f..7b855ff 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-86 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 1c1e1a9..2d35f90 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-86 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From b3e03623bdee0bd1a8b7969e10b39fa0644c70fc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 9 Jan 2026 23:27:07 -0300 Subject: [PATCH 624/684] logging: seed OpenSearch observability --- scripts/dashboards_render_logs.py | 134 +- scripts/logging_render_observability.py | 313 +++++ services/logging/dashboards/logs.ndjson | 122 +- services/logging/kustomization.yaml | 2 + .../logging/observability/applications.json | 135 +++ .../logging/observability/saved_queries.json | 758 ++++++++++++ .../observability/saved_visualizations.json | 173 +++ .../opensearch-dashboards-objects.yaml | 122 +- .../opensearch-dashboards-setup-job.yaml | 2 +- .../opensearch-observability-objects.yaml | 1077 +++++++++++++++++ .../opensearch-observability-setup-job.yaml | 193 +++ 11 files changed, 2819 insertions(+), 212 deletions(-) create mode 100755 scripts/logging_render_observability.py create mode 100644 services/logging/observability/applications.json create mode 100644 services/logging/observability/saved_queries.json create mode 100644 services/logging/observability/saved_visualizations.json create mode 100644 services/logging/opensearch-observability-objects.yaml create mode 100644 services/logging/opensearch-observability-setup-job.yaml diff --git a/scripts/dashboards_render_logs.py b/scripts/dashboards_render_logs.py index 9284b93..48b592d 100755 --- a/scripts/dashboards_render_logs.py +++ b/scripts/dashboards_render_logs.py @@ -35,11 +35,12 @@ data: DASHBOARD_VERSION = "7.10.0" GRID_COLUMNS = 48 -GRID_HALF = GRID_COLUMNS // 2 H_CHART = 10 H_ERRORS = 8 -H_TABLE = 12 -H_SEARCH = 14 +H_TABLE = 16 +H_SEARCH = 18 +TABLE_SIZE = 15 +TABLE_PER_PAGE = 15 ERROR_TERMS = ("*error*", "*exception*", "*fail*") @@ -131,11 +132,11 @@ def table_vis(object_id: str, title: str, field: str, query: str, index_id: str) "enabled": True, "type": "terms", "schema": "bucket", - "params": {"field": field, "size": 10, "order": "desc", "orderBy": "1"}, + "params": {"field": field, "size": TABLE_SIZE, "order": "desc", "orderBy": "1"}, }, ], "params": { - "perPage": 10, + "perPage": TABLE_PER_PAGE, "showPartialRows": False, "showMetricsAtAllLevels": False, "sort": {"columnIndex": 1, "direction": "desc"}, @@ -202,6 +203,15 @@ def panel(panel_id: str, panel_type: str, grid_data: dict, index: int) -> dict: } +def full_width_panels(specs: list[tuple[str, str, int]]) -> list[dict]: + panels = [] + y = 0 + for index, (panel_id, panel_type, height) in enumerate(specs, start=1): + panels.append(panel(panel_id, panel_type, grid(0, y, GRID_COLUMNS, height, index), index)) + y += height + return panels + + def dashboard_object(object_id: str, title: str, panels: list[dict]) -> dict: return { "type": "dashboard", @@ -240,28 +250,15 @@ def app_dashboard_objects(app: AppSpec) -> list[dict]: app.index_id, ) ) - panels = [ - panel(f"{prefix}-volume", "visualization", grid(0, 0, GRID_COLUMNS, H_CHART, 1), 1), - panel(f"{prefix}-errors", "visualization", grid(0, H_CHART, GRID_COLUMNS, H_ERRORS, 2), 2), - panel( - f"{prefix}-top-units", - "visualization", - grid(0, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 3), - 3, - ), - panel( - f"{prefix}-recent", - "search", - grid(GRID_HALF, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 4), - 4, - ), - panel( - f"{prefix}-recent-errors", - "search", - grid(0, H_CHART + H_ERRORS + H_TABLE, GRID_COLUMNS, H_SEARCH, 5), - 5, - ), - ] + panels = full_width_panels( + [ + (f"{prefix}-volume", "visualization", H_CHART), + (f"{prefix}-errors", "visualization", H_ERRORS), + (f"{prefix}-top-units", "visualization", H_TABLE), + (f"{prefix}-recent", "search", H_SEARCH), + (f"{prefix}-recent-errors", "search", H_SEARCH), + ] + ) objects.append(dashboard_object(prefix, f"{app.title} Logs", panels)) return objects @@ -282,34 +279,16 @@ def app_dashboard_objects(app: AppSpec) -> list[dict]: app.index_id, ) ) - panels = [ - panel(f"{prefix}-volume", "visualization", grid(0, 0, GRID_COLUMNS, H_CHART, 1), 1), - panel(f"{prefix}-errors", "visualization", grid(0, H_CHART, GRID_COLUMNS, H_ERRORS, 2), 2), - panel( - f"{prefix}-top-pods", - "visualization", - grid(0, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 3), - 3, - ), - panel( - f"{prefix}-top-containers", - "visualization", - grid(GRID_HALF, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 4), - 4, - ), - panel( - f"{prefix}-recent", - "search", - grid(0, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_SEARCH, 5), - 5, - ), - panel( - f"{prefix}-recent-errors", - "search", - grid(GRID_HALF, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_SEARCH, 6), - 6, - ), - ] + panels = full_width_panels( + [ + (f"{prefix}-volume", "visualization", H_CHART), + (f"{prefix}-errors", "visualization", H_ERRORS), + (f"{prefix}-top-pods", "visualization", H_TABLE), + (f"{prefix}-top-containers", "visualization", H_TABLE), + (f"{prefix}-recent", "search", H_SEARCH), + (f"{prefix}-recent-errors", "search", H_SEARCH), + ] + ) objects.append(dashboard_object(prefix, f"{app.title} Logs", panels)) return objects @@ -355,40 +334,17 @@ def overview_objects() -> list[dict]: "kube-logs", ) ) - panels = [ - panel("logs-overview-volume", "visualization", grid(0, 0, GRID_COLUMNS, H_CHART, 1), 1), - panel("logs-overview-errors", "visualization", grid(0, H_CHART, GRID_COLUMNS, H_ERRORS, 2), 2), - panel( - "logs-overview-top-ns", - "visualization", - grid(0, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 3), - 3, - ), - panel( - "logs-overview-top-error-ns", - "visualization", - grid(GRID_HALF, H_CHART + H_ERRORS, GRID_HALF, H_TABLE, 4), - 4, - ), - panel( - "logs-overview-top-pods", - "visualization", - grid(0, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_TABLE, 5), - 5, - ), - panel( - "logs-overview-top-nodes", - "visualization", - grid(GRID_HALF, H_CHART + H_ERRORS + H_TABLE, GRID_HALF, H_TABLE, 6), - 6, - ), - panel( - "logs-overview-recent-errors", - "search", - grid(0, H_CHART + H_ERRORS + (H_TABLE * 2), GRID_COLUMNS, H_SEARCH, 7), - 7, - ), - ] + panels = full_width_panels( + [ + ("logs-overview-volume", "visualization", H_CHART), + ("logs-overview-errors", "visualization", H_ERRORS), + ("logs-overview-top-ns", "visualization", H_TABLE), + ("logs-overview-top-error-ns", "visualization", H_TABLE), + ("logs-overview-top-pods", "visualization", H_TABLE), + ("logs-overview-top-nodes", "visualization", H_TABLE), + ("logs-overview-recent-errors", "search", H_SEARCH), + ] + ) objects.append(dashboard_object("logs-overview", "Atlas Logs Overview", panels)) return objects diff --git a/scripts/logging_render_observability.py b/scripts/logging_render_observability.py new file mode 100755 index 0000000..bd3455d --- /dev/null +++ b/scripts/logging_render_observability.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +"""Generate OpenSearch Observability seed objects and render them into ConfigMaps. + +Usage: + scripts/logging_render_observability.py --build # rebuild JSON + ConfigMap + scripts/logging_render_observability.py # re-render ConfigMap from JSON +""" + +from __future__ import annotations + +import argparse +import json +import textwrap +from dataclasses import dataclass +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +OBS_DIR = ROOT / "services" / "logging" / "observability" +APPS_PATH = OBS_DIR / "applications.json" +QUERIES_PATH = OBS_DIR / "saved_queries.json" +VIS_PATH = OBS_DIR / "saved_visualizations.json" +CONFIG_PATH = ROOT / "services" / "logging" / "opensearch-observability-objects.yaml" + +CONFIG_TEMPLATE = textwrap.dedent( + """# {relative_path} +# Generated by scripts/logging_render_observability.py --build +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-observability-objects + namespace: logging +data: + applications.json: | +{applications} + saved_queries.json: | +{queries} + saved_visualizations.json: | +{visualizations} +""" +) + +DEFAULT_RANGE = {"start": "now-24h", "end": "now", "text": ""} +DEFAULT_TIMESTAMP = {"name": "@timestamp", "type": "timestamp"} +DEFAULT_FIELDS = {"text": "", "tokens": []} + + +@dataclass(frozen=True) +class AppSpec: + name: str + base_query: str + kind: str = "kube" + description: str = "" + + +@dataclass(frozen=True) +class QuerySpec: + name: str + query: str + description: str = "" + + +@dataclass(frozen=True) +class VisualizationSpec: + name: str + query: str + vis_type: str + description: str = "" + + +def source_query(index: str, where: str | None = None) -> str: + query = f"source = {index}" + if where: + query += f" | where {where}" + return query + + +def error_filter(fields: list[str]) -> str: + parts = [f"match({field}, 'error|exception|fail')" for field in fields] + return " or ".join(parts) + + +def saved_query(spec: QuerySpec) -> dict: + return { + "name": spec.name, + "description": spec.description, + "query": spec.query, + "selected_date_range": DEFAULT_RANGE, + "selected_timestamp": DEFAULT_TIMESTAMP, + "selected_fields": DEFAULT_FIELDS, + } + + +def saved_visualization(spec: VisualizationSpec) -> dict: + return { + "name": spec.name, + "description": spec.description, + "query": spec.query, + "type": spec.vis_type, + "selected_date_range": DEFAULT_RANGE, + "selected_timestamp": DEFAULT_TIMESTAMP, + "selected_fields": DEFAULT_FIELDS, + } + + +def build_objects() -> tuple[list[dict], list[dict], list[dict]]: + kube_error = error_filter(["log", "message"]) + journald_error = error_filter(["MESSAGE"]) + + apps = [ + AppSpec("bstein-dev-home", source_query("kube-*", "kubernetes.namespace_name = 'bstein-dev-home'")), + AppSpec( + "pegasus", + source_query( + "kube-*", + "kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", + ), + ), + AppSpec( + "jellyfin", + source_query( + "kube-*", + "kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", + ), + ), + AppSpec("vaultwarden", source_query("kube-*", "kubernetes.namespace_name = 'vaultwarden'")), + AppSpec("mailu", source_query("kube-*", "kubernetes.namespace_name = 'mailu-mailserver'")), + AppSpec("nextcloud", source_query("kube-*", "kubernetes.namespace_name = 'nextcloud'")), + AppSpec("gitea", source_query("kube-*", "kubernetes.namespace_name = 'gitea'")), + AppSpec("jenkins", source_query("kube-*", "kubernetes.namespace_name = 'jenkins'")), + AppSpec("harbor", source_query("kube-*", "kubernetes.namespace_name = 'harbor'")), + AppSpec("vault", source_query("kube-*", "kubernetes.namespace_name = 'vault'")), + AppSpec("keycloak", source_query("kube-*", "kubernetes.namespace_name = 'sso'")), + AppSpec("flux-system", source_query("kube-*", "kubernetes.namespace_name = 'flux-system'")), + AppSpec("comms", source_query("kube-*", "kubernetes.namespace_name = 'comms'")), + AppSpec( + "element-web", + source_query( + "kube-*", + "kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", + ), + ), + AppSpec( + "element-call", + source_query( + "kube-*", + "kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", + ), + ), + AppSpec( + "matrix-synapse", + source_query( + "kube-*", + "kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", + ), + ), + AppSpec( + "livekit", + source_query( + "kube-*", + "kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", + ), + ), + AppSpec( + "coturn", + source_query( + "kube-*", + "kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", + ), + ), + AppSpec( + "lesavka", + source_query("journald-*", "_HOSTNAME = 'titan-jh'"), + kind="journald", + ), + ] + + applications = [ + { + "name": app.name, + "description": app.description, + "baseQuery": app.base_query, + "servicesEntities": [], + "traceGroups": [], + } + for app in apps + ] + + queries = [ + saved_query(QuerySpec("kube logs", source_query("kube-*"))), + saved_query(QuerySpec("kube errors", f"{source_query('kube-*')} | where {kube_error}")), + saved_query(QuerySpec("journald logs", source_query("journald-*"))), + saved_query(QuerySpec("journald errors", f"{source_query('journald-*')} | where {journald_error}")), + ] + + for app in apps: + query_base = app.base_query + error_clause = journald_error if app.kind == "journald" else kube_error + queries.append(saved_query(QuerySpec(f"{app.name} logs", query_base))) + queries.append(saved_query(QuerySpec(f"{app.name} errors", f"{query_base} | where {error_clause}"))) + + visualizations = [ + saved_visualization( + VisualizationSpec( + "[Kube] Logs per hour", + "source = kube-* | stats count() as log_count by span(`@timestamp`, 1h)", + "line", + ) + ), + saved_visualization( + VisualizationSpec( + "[Kube] Errors per hour", + f"source = kube-* | where {kube_error} | stats count() as error_count by span(`@timestamp`, 1h)", + "line", + ) + ), + saved_visualization( + VisualizationSpec( + "[Kube] Top namespaces", + "source = kube-* | stats count() as log_count by kubernetes.namespace_name | sort - log_count", + "bar", + ) + ), + saved_visualization( + VisualizationSpec( + "[Kube] Top error namespaces", + f"source = kube-* | where {kube_error} | stats count() as error_count by kubernetes.namespace_name | sort - error_count", + "bar", + ) + ), + saved_visualization( + VisualizationSpec( + "[Kube] Top pods", + "source = kube-* | stats count() as log_count by kubernetes.pod_name | sort - log_count", + "bar", + ) + ), + saved_visualization( + VisualizationSpec( + "[Kube] Top error pods", + f"source = kube-* | where {kube_error} | stats count() as error_count by kubernetes.pod_name | sort - error_count", + "bar", + ) + ), + saved_visualization( + VisualizationSpec( + "[Kube] Top nodes", + "source = kube-* | stats count() as log_count by kubernetes.node_name | sort - log_count", + "bar", + ) + ), + saved_visualization( + VisualizationSpec( + "[Journald] Top units", + "source = journald-* | stats count() as log_count by _SYSTEMD_UNIT | sort - log_count", + "bar", + ) + ), + saved_visualization( + VisualizationSpec( + "[Journald] Top error units", + f"source = journald-* | where {journald_error} | stats count() as error_count by _SYSTEMD_UNIT | sort - error_count", + "bar", + ) + ), + ] + + return applications, queries, visualizations + + +def write_json(payload: list[dict], path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, indent=2) + "\n") + + +def render_configmap(apps_path: Path, queries_path: Path, vis_path: Path, output_path: Path) -> None: + relative_path = output_path.relative_to(ROOT) + applications = indent_payload(apps_path) + queries = indent_payload(queries_path) + visualizations = indent_payload(vis_path) + output_path.write_text( + CONFIG_TEMPLATE.format( + relative_path=relative_path, + applications=applications, + queries=queries, + visualizations=visualizations, + ) + ) + + +def indent_payload(path: Path) -> str: + lines = path.read_text().splitlines() + return "\n".join(" " + line for line in lines) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--build", action="store_true", help="Regenerate JSON payloads and ConfigMap") + args = parser.parse_args() + + if args.build: + applications, queries, visualizations = build_objects() + write_json(applications, APPS_PATH) + write_json(queries, QUERIES_PATH) + write_json(visualizations, VIS_PATH) + + if not (APPS_PATH.exists() and QUERIES_PATH.exists() and VIS_PATH.exists()): + raise SystemExit("Missing observability JSON payloads. Run with --build first.") + + render_configmap(APPS_PATH, QUERIES_PATH, VIS_PATH, CONFIG_PATH) + + +if __name__ == "__main__": + main() diff --git a/services/logging/dashboards/logs.ndjson b/services/logging/dashboards/logs.ndjson index 56f0e5e..a3620c4 100644 --- a/services/logging/dashboards/logs.ndjson +++ b/services/logging/dashboards/logs.ndjson @@ -2,141 +2,141 @@ {"type":"index-pattern","id":"journald-logs","attributes":{"title":"journald-*","timeFieldName":"@timestamp"}} {"type":"visualization","id":"logs-overview-volume","attributes":{"title":"Logs per minute","visState":"{\"title\":\"Logs per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-errors","attributes":{"title":"Errors per minute","visState":"{\"title\":\"Errors per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-overview-top-error-ns","attributes":{"title":"Top error namespaces","visState":"{\"title\":\"Top error namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-overview-top-nodes","attributes":{"title":"Top nodes","visState":"{\"title\":\"Top nodes\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.node_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-error-ns","attributes":{"title":"Top error namespaces","visState":"{\"title\":\"Top error namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-overview-top-nodes","attributes":{"title":"Top nodes","visState":"{\"title\":\"Top nodes\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.node_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-overview-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.namespace_name","kubernetes.pod_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-overview-top-error-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":12,\"i\":\"5\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":12,\"i\":\"6\"},\"id\":\"logs-overview-top-nodes\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":42,\"w\":48,\"h\":14,\"i\":\"7\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-overview-top-error-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":16,\"i\":\"5\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":66,\"w\":48,\"h\":16,\"i\":\"6\"},\"id\":\"logs-overview-top-nodes\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":82,\"w\":48,\"h\":18,\"i\":\"7\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-bstein-dev-home-volume","attributes":{"title":"bstein-dev-home logs","visState":"{\"title\":\"bstein-dev-home logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-errors","attributes":{"title":"bstein-dev-home errors","visState":"{\"title\":\"bstein-dev-home errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-pegasus-volume","attributes":{"title":"pegasus logs","visState":"{\"title\":\"pegasus logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-errors","attributes":{"title":"pegasus errors","visState":"{\"title\":\"pegasus errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vaultwarden-volume","attributes":{"title":"vaultwarden logs","visState":"{\"title\":\"vaultwarden logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-errors","attributes":{"title":"vaultwarden errors","visState":"{\"title\":\"vaultwarden errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-mailu-volume","attributes":{"title":"mailu logs","visState":"{\"title\":\"mailu logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-errors","attributes":{"title":"mailu errors","visState":"{\"title\":\"mailu errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-nextcloud-volume","attributes":{"title":"nextcloud logs","visState":"{\"title\":\"nextcloud logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-errors","attributes":{"title":"nextcloud errors","visState":"{\"title\":\"nextcloud errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-gitea-volume","attributes":{"title":"gitea logs","visState":"{\"title\":\"gitea logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-errors","attributes":{"title":"gitea errors","visState":"{\"title\":\"gitea errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-jenkins-volume","attributes":{"title":"jenkins logs","visState":"{\"title\":\"jenkins logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-errors","attributes":{"title":"jenkins errors","visState":"{\"title\":\"jenkins errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-harbor-volume","attributes":{"title":"harbor logs","visState":"{\"title\":\"harbor logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-errors","attributes":{"title":"harbor errors","visState":"{\"title\":\"harbor errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vault-volume","attributes":{"title":"vault logs","visState":"{\"title\":\"vault logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-errors","attributes":{"title":"vault errors","visState":"{\"title\":\"vault errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-keycloak-volume","attributes":{"title":"keycloak logs","visState":"{\"title\":\"keycloak logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-errors","attributes":{"title":"keycloak errors","visState":"{\"title\":\"keycloak errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-flux-system-volume","attributes":{"title":"flux-system logs","visState":"{\"title\":\"flux-system logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-errors","attributes":{"title":"flux-system errors","visState":"{\"title\":\"flux-system errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-comms-volume","attributes":{"title":"comms logs","visState":"{\"title\":\"comms logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-errors","attributes":{"title":"comms errors","visState":"{\"title\":\"comms errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-element-web-volume","attributes":{"title":"element-web logs","visState":"{\"title\":\"element-web logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-element-web-errors","attributes":{"title":"element-web errors","visState":"{\"title\":\"element-web errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-element-web-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-element-web-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-web-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-web-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-web-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-web-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-element-web","attributes":{"title":"element-web Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-web-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-web-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-web-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-web-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-web-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-web-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-element-web","attributes":{"title":"element-web Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-web-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-web-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-element-web-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-element-web-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-element-web-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-element-web-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-element-call-volume","attributes":{"title":"element-call logs","visState":"{\"title\":\"element-call logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-element-call-errors","attributes":{"title":"element-call errors","visState":"{\"title\":\"element-call errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-element-call-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-element-call-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-call-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-element-call-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-call-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-call-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-element-call","attributes":{"title":"element-call Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-call-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-call-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-call-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-call-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-call-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-call-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-element-call","attributes":{"title":"element-call Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-call-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-call-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-element-call-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-element-call-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-element-call-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-element-call-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-matrix-synapse-volume","attributes":{"title":"matrix-synapse logs","visState":"{\"title\":\"matrix-synapse logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-matrix-synapse-errors","attributes":{"title":"matrix-synapse errors","visState":"{\"title\":\"matrix-synapse errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-matrix-synapse-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-matrix-synapse-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-matrix-synapse-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-matrix-synapse-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-matrix-synapse-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-matrix-synapse-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-matrix-synapse","attributes":{"title":"matrix-synapse Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-matrix-synapse-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-matrix-synapse-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-matrix-synapse-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-matrix-synapse-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-matrix-synapse-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-matrix-synapse-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-matrix-synapse","attributes":{"title":"matrix-synapse Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-matrix-synapse-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-matrix-synapse-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-matrix-synapse-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-matrix-synapse-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-matrix-synapse-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-matrix-synapse-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-livekit-volume","attributes":{"title":"livekit logs","visState":"{\"title\":\"livekit logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-livekit-errors","attributes":{"title":"livekit errors","visState":"{\"title\":\"livekit errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-livekit-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-livekit-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-livekit-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-livekit-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-livekit-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-livekit-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-livekit","attributes":{"title":"livekit Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-livekit-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-livekit-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-livekit-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-livekit-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-livekit-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-livekit-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-livekit","attributes":{"title":"livekit Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-livekit-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-livekit-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-livekit-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-livekit-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-livekit-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-livekit-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-coturn-volume","attributes":{"title":"coturn logs","visState":"{\"title\":\"coturn logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-coturn-errors","attributes":{"title":"coturn errors","visState":"{\"title\":\"coturn errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-coturn-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"visualization","id":"logs-coturn-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-coturn-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} +{"type":"visualization","id":"logs-coturn-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-coturn-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-coturn-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} -{"type":"dashboard","id":"logs-coturn","attributes":{"title":"coturn Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-coturn-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-coturn-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-coturn-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-coturn-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-coturn-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-coturn-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-coturn","attributes":{"title":"coturn Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-coturn-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-coturn-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-coturn-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-coturn-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-coturn-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-coturn-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-lesavka-volume","attributes":{"title":"lesavka logs","visState":"{\"title\":\"lesavka logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"visualization","id":"logs-lesavka-errors","attributes":{"title":"lesavka errors","visState":"{\"title\":\"lesavka errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} -{"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} +{"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} -{"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":48,\"h\":14,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} +{"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":18,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":52,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index 5043ccc..94fc1a8 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -7,8 +7,10 @@ resources: - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml - opensearch-dashboards-objects.yaml + - opensearch-observability-objects.yaml - opensearch-ism-job.yaml - opensearch-dashboards-setup-job.yaml + - opensearch-observability-setup-job.yaml - opensearch-prune-cronjob.yaml - fluent-bit-helmrelease.yaml - oauth2-proxy.yaml diff --git a/services/logging/observability/applications.json b/services/logging/observability/applications.json new file mode 100644 index 0000000..fc29e5a --- /dev/null +++ b/services/logging/observability/applications.json @@ -0,0 +1,135 @@ +[ + { + "name": "bstein-dev-home", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "pegasus", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "jellyfin", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "vaultwarden", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "mailu", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "nextcloud", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'nextcloud'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "gitea", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'gitea'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "jenkins", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jenkins'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "harbor", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'harbor'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "vault", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vault'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "keycloak", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'sso'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "flux-system", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'flux-system'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "comms", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "element-web", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "element-call", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "matrix-synapse", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "livekit", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "coturn", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "lesavka", + "description": "", + "baseQuery": "source = journald-* | where _HOSTNAME = 'titan-jh'", + "servicesEntities": [], + "traceGroups": [] + } +] diff --git a/services/logging/observability/saved_queries.json b/services/logging/observability/saved_queries.json new file mode 100644 index 0000000..072e92b --- /dev/null +++ b/services/logging/observability/saved_queries.json @@ -0,0 +1,758 @@ +[ + { + "name": "kube logs", + "description": "", + "query": "source = kube-*", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "kube errors", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "journald logs", + "description": "", + "query": "source = journald-*", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "journald errors", + "description": "", + "query": "source = journald-* | where match(MESSAGE, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "bstein-dev-home logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "bstein-dev-home errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "pegasus logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "pegasus errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jellyfin logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jellyfin errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vaultwarden logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vaultwarden errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "mailu logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "mailu errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "nextcloud logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'nextcloud'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "nextcloud errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'nextcloud' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "gitea logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'gitea'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "gitea errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'gitea' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jenkins logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jenkins'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jenkins errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jenkins' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "harbor logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'harbor'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "harbor errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'harbor' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vault logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vault'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vault errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vault' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "keycloak logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'sso'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "keycloak errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'sso' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "flux-system logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'flux-system'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "flux-system errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'flux-system' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "comms logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "comms errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-web logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-web errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-call logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-call errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "matrix-synapse logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "matrix-synapse errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "livekit logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "livekit errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "coturn logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "coturn errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "lesavka logs", + "description": "", + "query": "source = journald-* | where _HOSTNAME = 'titan-jh'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "lesavka errors", + "description": "", + "query": "source = journald-* | where _HOSTNAME = 'titan-jh' | where match(MESSAGE, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + } +] diff --git a/services/logging/observability/saved_visualizations.json b/services/logging/observability/saved_visualizations.json new file mode 100644 index 0000000..8a0171d --- /dev/null +++ b/services/logging/observability/saved_visualizations.json @@ -0,0 +1,173 @@ +[ + { + "name": "[Kube] Logs per hour", + "description": "", + "query": "source = kube-* | stats count() as log_count by span(`@timestamp`, 1h)", + "type": "line", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Errors per hour", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail') | stats count() as error_count by span(`@timestamp`, 1h)", + "type": "line", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top namespaces", + "description": "", + "query": "source = kube-* | stats count() as log_count by kubernetes.namespace_name | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top error namespaces", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail') | stats count() as error_count by kubernetes.namespace_name | sort - error_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top pods", + "description": "", + "query": "source = kube-* | stats count() as log_count by kubernetes.pod_name | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top error pods", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail') | stats count() as error_count by kubernetes.pod_name | sort - error_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top nodes", + "description": "", + "query": "source = kube-* | stats count() as log_count by kubernetes.node_name | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Journald] Top units", + "description": "", + "query": "source = journald-* | stats count() as log_count by _SYSTEMD_UNIT | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Journald] Top error units", + "description": "", + "query": "source = journald-* | where match(MESSAGE, 'error|exception|fail') | stats count() as error_count by _SYSTEMD_UNIT | sort - error_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + } +] diff --git a/services/logging/opensearch-dashboards-objects.yaml b/services/logging/opensearch-dashboards-objects.yaml index 6cbef5b..fafcd33 100644 --- a/services/logging/opensearch-dashboards-objects.yaml +++ b/services/logging/opensearch-dashboards-objects.yaml @@ -11,141 +11,141 @@ data: {"type":"index-pattern","id":"journald-logs","attributes":{"title":"journald-*","timeFieldName":"@timestamp"}} {"type":"visualization","id":"logs-overview-volume","attributes":{"title":"Logs per minute","visState":"{\"title\":\"Logs per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-overview-errors","attributes":{"title":"Errors per minute","visState":"{\"title\":\"Errors per minute\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-overview-top-error-ns","attributes":{"title":"Top error namespaces","visState":"{\"title\":\"Top error namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-overview-top-nodes","attributes":{"title":"Top nodes","visState":"{\"title\":\"Top nodes\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.node_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-ns","attributes":{"title":"Top namespaces","visState":"{\"title\":\"Top namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-error-ns","attributes":{"title":"Top error namespaces","visState":"{\"title\":\"Top error namespaces\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.namespace_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-overview-top-nodes","attributes":{"title":"Top nodes","visState":"{\"title\":\"Top nodes\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.node_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"*\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-overview-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.namespace_name","kubernetes.pod_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-overview-top-error-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":12,\"i\":\"5\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":12,\"i\":\"6\"},\"id\":\"logs-overview-top-nodes\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":42,\"w\":48,\"h\":14,\"i\":\"7\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-overview","attributes":{"title":"Atlas Logs Overview","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-overview-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-overview-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-overview-top-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-overview-top-error-ns\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":16,\"i\":\"5\"},\"id\":\"logs-overview-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":66,\"w\":48,\"h\":16,\"i\":\"6\"},\"id\":\"logs-overview-top-nodes\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":82,\"w\":48,\"h\":18,\"i\":\"7\"},\"id\":\"logs-overview-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-bstein-dev-home-volume","attributes":{"title":"bstein-dev-home logs","visState":"{\"title\":\"bstein-dev-home logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-bstein-dev-home-errors","attributes":{"title":"bstein-dev-home errors","visState":"{\"title\":\"bstein-dev-home errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-bstein-dev-home-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-bstein-dev-home-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"bstein-dev-home\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-bstein-dev-home-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"bstein-dev-home\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-bstein-dev-home","attributes":{"title":"bstein-dev-home Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-bstein-dev-home-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-bstein-dev-home-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-bstein-dev-home-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-bstein-dev-home-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-bstein-dev-home-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-bstein-dev-home-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-pegasus-volume","attributes":{"title":"pegasus logs","visState":"{\"title\":\"pegasus logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-pegasus-errors","attributes":{"title":"pegasus errors","visState":"{\"title\":\"pegasus errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-pegasus-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-pegasus-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-pegasus-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"pegasus\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-pegasus","attributes":{"title":"pegasus Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-pegasus-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-pegasus-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-pegasus-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-pegasus-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-pegasus-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-pegasus-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-jellyfin-volume","attributes":{"title":"jellyfin logs","visState":"{\"title\":\"jellyfin logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jellyfin-errors","attributes":{"title":"jellyfin errors","visState":"{\"title\":\"jellyfin errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jellyfin-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jellyfin-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jellyfin-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jellyfin\\\" and kubernetes.labels.app: \\\"jellyfin\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-jellyfin","attributes":{"title":"jellyfin Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jellyfin-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jellyfin-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-jellyfin-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-jellyfin-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-jellyfin-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-jellyfin-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vaultwarden-volume","attributes":{"title":"vaultwarden logs","visState":"{\"title\":\"vaultwarden logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vaultwarden-errors","attributes":{"title":"vaultwarden errors","visState":"{\"title\":\"vaultwarden errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vaultwarden-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vaultwarden-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vaultwarden\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vaultwarden-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vaultwarden\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-vaultwarden","attributes":{"title":"vaultwarden Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vaultwarden-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vaultwarden-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-vaultwarden-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-vaultwarden-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-vaultwarden-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-vaultwarden-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-mailu-volume","attributes":{"title":"mailu logs","visState":"{\"title\":\"mailu logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-mailu-errors","attributes":{"title":"mailu errors","visState":"{\"title\":\"mailu errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-mailu-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-mailu-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"mailu-mailserver\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-mailu-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"mailu-mailserver\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-mailu","attributes":{"title":"mailu Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-mailu-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-mailu-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-mailu-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-mailu-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-mailu-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-mailu-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-nextcloud-volume","attributes":{"title":"nextcloud logs","visState":"{\"title\":\"nextcloud logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-nextcloud-errors","attributes":{"title":"nextcloud errors","visState":"{\"title\":\"nextcloud errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-nextcloud-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-nextcloud-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"nextcloud\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-nextcloud-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"nextcloud\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-nextcloud","attributes":{"title":"nextcloud Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-nextcloud-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-nextcloud-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-nextcloud-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-nextcloud-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-nextcloud-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-nextcloud-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-gitea-volume","attributes":{"title":"gitea logs","visState":"{\"title\":\"gitea logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-gitea-errors","attributes":{"title":"gitea errors","visState":"{\"title\":\"gitea errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-gitea-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-gitea-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"gitea\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-gitea-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"gitea\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-gitea","attributes":{"title":"gitea Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-gitea-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-gitea-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-gitea-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-gitea-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-gitea-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-gitea-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-jenkins-volume","attributes":{"title":"jenkins logs","visState":"{\"title\":\"jenkins logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-jenkins-errors","attributes":{"title":"jenkins errors","visState":"{\"title\":\"jenkins errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jenkins-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-jenkins-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"jenkins\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-jenkins-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"jenkins\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-jenkins","attributes":{"title":"jenkins Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-jenkins-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-jenkins-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-jenkins-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-jenkins-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-jenkins-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-jenkins-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-harbor-volume","attributes":{"title":"harbor logs","visState":"{\"title\":\"harbor logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-harbor-errors","attributes":{"title":"harbor errors","visState":"{\"title\":\"harbor errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-harbor-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-harbor-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"harbor\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-harbor-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"harbor\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-harbor","attributes":{"title":"harbor Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-harbor-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-harbor-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-harbor-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-harbor-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-harbor-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-harbor-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-vault-volume","attributes":{"title":"vault logs","visState":"{\"title\":\"vault logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-vault-errors","attributes":{"title":"vault errors","visState":"{\"title\":\"vault errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vault-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-vault-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"vault\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-vault-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"vault\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-vault","attributes":{"title":"vault Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-vault-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-vault-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-vault-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-vault-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-vault-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-vault-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-keycloak-volume","attributes":{"title":"keycloak logs","visState":"{\"title\":\"keycloak logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-keycloak-errors","attributes":{"title":"keycloak errors","visState":"{\"title\":\"keycloak errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-keycloak-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-keycloak-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"sso\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-keycloak-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"sso\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-keycloak","attributes":{"title":"keycloak Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-keycloak-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-keycloak-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-keycloak-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-keycloak-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-keycloak-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-keycloak-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-flux-system-volume","attributes":{"title":"flux-system logs","visState":"{\"title\":\"flux-system logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-flux-system-errors","attributes":{"title":"flux-system errors","visState":"{\"title\":\"flux-system errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-flux-system-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-flux-system-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"flux-system\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-flux-system-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"flux-system\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-flux-system","attributes":{"title":"flux-system Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-flux-system-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-flux-system-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-flux-system-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-flux-system-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-flux-system-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-flux-system-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-comms-volume","attributes":{"title":"comms logs","visState":"{\"title\":\"comms logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-comms-errors","attributes":{"title":"comms errors","visState":"{\"title\":\"comms errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-comms-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-comms-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-comms-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-comms","attributes":{"title":"comms Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-comms-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-comms-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-comms-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-comms-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-comms-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-comms-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-element-web-volume","attributes":{"title":"element-web logs","visState":"{\"title\":\"element-web logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-element-web-errors","attributes":{"title":"element-web errors","visState":"{\"title\":\"element-web errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-element-web-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-element-web-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-web-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-web-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-web-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-web-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"element-web\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-element-web","attributes":{"title":"element-web Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-web-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-web-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-web-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-web-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-web-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-web-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-element-web","attributes":{"title":"element-web Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-web-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-web-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-element-web-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-element-web-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-element-web-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-element-web-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-element-call-volume","attributes":{"title":"element-call logs","visState":"{\"title\":\"element-call logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-element-call-errors","attributes":{"title":"element-call errors","visState":"{\"title\":\"element-call errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-element-call-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-element-call-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-call-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-element-call-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-call-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-element-call-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"element-call\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-element-call","attributes":{"title":"element-call Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-call-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-call-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-element-call-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-element-call-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-element-call-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-element-call-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-element-call","attributes":{"title":"element-call Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-element-call-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-element-call-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-element-call-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-element-call-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-element-call-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-element-call-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-matrix-synapse-volume","attributes":{"title":"matrix-synapse logs","visState":"{\"title\":\"matrix-synapse logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-matrix-synapse-errors","attributes":{"title":"matrix-synapse errors","visState":"{\"title\":\"matrix-synapse errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-matrix-synapse-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-matrix-synapse-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-matrix-synapse-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-matrix-synapse-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-matrix-synapse-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-matrix-synapse-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.container_name: \\\"synapse\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-matrix-synapse","attributes":{"title":"matrix-synapse Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-matrix-synapse-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-matrix-synapse-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-matrix-synapse-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-matrix-synapse-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-matrix-synapse-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-matrix-synapse-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-matrix-synapse","attributes":{"title":"matrix-synapse Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-matrix-synapse-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-matrix-synapse-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-matrix-synapse-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-matrix-synapse-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-matrix-synapse-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-matrix-synapse-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-livekit-volume","attributes":{"title":"livekit logs","visState":"{\"title\":\"livekit logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-livekit-errors","attributes":{"title":"livekit errors","visState":"{\"title\":\"livekit errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-livekit-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-livekit-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-livekit-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-livekit-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-livekit-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-livekit-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"livekit\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-livekit","attributes":{"title":"livekit Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-livekit-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-livekit-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-livekit-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-livekit-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-livekit-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-livekit-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-livekit","attributes":{"title":"livekit Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-livekit-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-livekit-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-livekit-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-livekit-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-livekit-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-livekit-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-coturn-volume","attributes":{"title":"coturn logs","visState":"{\"title\":\"coturn logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"visualization","id":"logs-coturn-errors","attributes":{"title":"coturn errors","visState":"{\"title\":\"coturn errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-coturn-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"visualization","id":"logs-coturn-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-coturn-top-pods","attributes":{"title":"Top pods","visState":"{\"title\":\"Top pods\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.pod_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} + {"type":"visualization","id":"logs-coturn-top-containers","attributes":{"title":"Top containers","visState":"{\"title\":\"Top containers\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"kubernetes.container_name.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-coturn-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} {"type":"search","id":"logs-coturn-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","kubernetes.pod_name","kubernetes.container_name","log","message"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(kubernetes.namespace_name: \\\"comms\\\" and kubernetes.labels.app: \\\"coturn\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"kube-logs"}]} - {"type":"dashboard","id":"logs-coturn","attributes":{"title":"coturn Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-coturn-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-coturn-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-coturn-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-coturn-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":24,\"h\":14,\"i\":\"5\"},\"id\":\"logs-coturn-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":24,\"y\":30,\"w\":24,\"h\":14,\"i\":\"6\"},\"id\":\"logs-coturn-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-coturn","attributes":{"title":"coturn Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-coturn-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-coturn-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-coturn-top-pods\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":16,\"i\":\"4\"},\"id\":\"logs-coturn-top-containers\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":50,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-coturn-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":68,\"w\":48,\"h\":18,\"i\":\"6\"},\"id\":\"logs-coturn-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} {"type":"visualization","id":"logs-lesavka-volume","attributes":{"title":"lesavka logs","visState":"{\"title\":\"lesavka logs\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"visualization","id":"logs-lesavka-errors","attributes":{"title":"lesavka errors","visState":"{\"title\":\"lesavka errors\",\"type\":\"histogram\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"@timestamp\",\"interval\":\"auto\",\"min_doc_count\":1}}],\"params\":{\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"interpolate\":\"linear\"}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} - {"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} + {"type":"visualization","id":"logs-lesavka-top-units","attributes":{"title":"Top units","visState":"{\"title\":\"Top units\",\"type\":\"table\",\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\"},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_SYSTEMD_UNIT.keyword\",\"size\":15,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"params\":{\"perPage\":15,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":1,\"direction\":\"desc\"}}}","uiStateJSON":"{}","description":"","version":1,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent","attributes":{"title":"Recent logs","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"_HOSTNAME: \\\"titan-jh\\\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} {"type":"search","id":"logs-lesavka-recent-errors","attributes":{"title":"Recent errors","description":"","columns":["@timestamp","_HOSTNAME","_SYSTEMD_UNIT","MESSAGE"],"sort":[["@timestamp","desc"]],"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"(_HOSTNAME: \\\"titan-jh\\\") and ((log : \\\"*error*\\\" or message : \\\"*error*\\\") or (log : \\\"*exception*\\\" or message : \\\"*exception*\\\") or (log : \\\"*fail*\\\" or message : \\\"*fail*\\\"))\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"}},"references":[{"name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern","id":"journald-logs"}]} - {"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":24,\"h\":12,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":24,\"y\":18,\"w\":24,\"h\":12,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":30,\"w\":48,\"h\":14,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} + {"type":"dashboard","id":"logs-lesavka","attributes":{"title":"lesavka Logs","description":"","hits":0,"panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":48,\"h\":10,\"i\":\"1\"},\"id\":\"logs-lesavka-volume\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"2\",\"gridData\":{\"x\":0,\"y\":10,\"w\":48,\"h\":8,\"i\":\"2\"},\"id\":\"logs-lesavka-errors\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":18,\"w\":48,\"h\":16,\"i\":\"3\"},\"id\":\"logs-lesavka-top-units\",\"type\":\"visualization\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":34,\"w\":48,\"h\":18,\"i\":\"4\"},\"id\":\"logs-lesavka-recent\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":52,\"w\":48,\"h\":18,\"i\":\"5\"},\"id\":\"logs-lesavka-recent-errors\",\"type\":\"search\",\"version\":\"7.10.0\",\"embeddableConfig\":{}}]","optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","version":1,"timeRestore":false,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\": {\"language\": \"kuery\", \"query\": \"\"}, \"filter\": []}"}}} diff --git a/services/logging/opensearch-dashboards-setup-job.yaml b/services/logging/opensearch-dashboards-setup-job.yaml index ee0bf7b..06149d7 100644 --- a/services/logging/opensearch-dashboards-setup-job.yaml +++ b/services/logging/opensearch-dashboards-setup-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-dashboards-setup-3 + name: opensearch-dashboards-setup-4 namespace: logging spec: backoffLimit: 3 diff --git a/services/logging/opensearch-observability-objects.yaml b/services/logging/opensearch-observability-objects.yaml new file mode 100644 index 0000000..0fd31c5 --- /dev/null +++ b/services/logging/opensearch-observability-objects.yaml @@ -0,0 +1,1077 @@ +# services/logging/opensearch-observability-objects.yaml +# Generated by scripts/logging_render_observability.py --build +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-observability-objects + namespace: logging +data: + applications.json: | + [ + { + "name": "bstein-dev-home", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "pegasus", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "jellyfin", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "vaultwarden", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "mailu", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "nextcloud", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'nextcloud'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "gitea", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'gitea'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "jenkins", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jenkins'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "harbor", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'harbor'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "vault", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vault'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "keycloak", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'sso'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "flux-system", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'flux-system'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "comms", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "element-web", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "element-call", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "matrix-synapse", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "livekit", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "coturn", + "description": "", + "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", + "servicesEntities": [], + "traceGroups": [] + }, + { + "name": "lesavka", + "description": "", + "baseQuery": "source = journald-* | where _HOSTNAME = 'titan-jh'", + "servicesEntities": [], + "traceGroups": [] + } + ] + saved_queries.json: | + [ + { + "name": "kube logs", + "description": "", + "query": "source = kube-*", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "kube errors", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "journald logs", + "description": "", + "query": "source = journald-*", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "journald errors", + "description": "", + "query": "source = journald-* | where match(MESSAGE, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "bstein-dev-home logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "bstein-dev-home errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "pegasus logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "pegasus errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jellyfin logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jellyfin errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vaultwarden logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vaultwarden errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "mailu logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "mailu errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "nextcloud logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'nextcloud'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "nextcloud errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'nextcloud' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "gitea logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'gitea'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "gitea errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'gitea' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jenkins logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jenkins'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "jenkins errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'jenkins' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "harbor logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'harbor'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "harbor errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'harbor' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vault logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vault'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "vault errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'vault' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "keycloak logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'sso'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "keycloak errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'sso' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "flux-system logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'flux-system'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "flux-system errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'flux-system' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "comms logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "comms errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-web logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-web errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-call logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "element-call errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "matrix-synapse logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "matrix-synapse errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "livekit logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "livekit errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "coturn logs", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "coturn errors", + "description": "", + "query": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn' | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "lesavka logs", + "description": "", + "query": "source = journald-* | where _HOSTNAME = 'titan-jh'", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "lesavka errors", + "description": "", + "query": "source = journald-* | where _HOSTNAME = 'titan-jh' | where match(MESSAGE, 'error|exception|fail')", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + } + ] + saved_visualizations.json: | + [ + { + "name": "[Kube] Logs per hour", + "description": "", + "query": "source = kube-* | stats count() as log_count by span(`@timestamp`, 1h)", + "type": "line", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Errors per hour", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail') | stats count() as error_count by span(`@timestamp`, 1h)", + "type": "line", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top namespaces", + "description": "", + "query": "source = kube-* | stats count() as log_count by kubernetes.namespace_name | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top error namespaces", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail') | stats count() as error_count by kubernetes.namespace_name | sort - error_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top pods", + "description": "", + "query": "source = kube-* | stats count() as log_count by kubernetes.pod_name | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top error pods", + "description": "", + "query": "source = kube-* | where match(log, 'error|exception|fail') or match(message, 'error|exception|fail') | stats count() as error_count by kubernetes.pod_name | sort - error_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Kube] Top nodes", + "description": "", + "query": "source = kube-* | stats count() as log_count by kubernetes.node_name | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Journald] Top units", + "description": "", + "query": "source = journald-* | stats count() as log_count by _SYSTEMD_UNIT | sort - log_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + }, + { + "name": "[Journald] Top error units", + "description": "", + "query": "source = journald-* | where match(MESSAGE, 'error|exception|fail') | stats count() as error_count by _SYSTEMD_UNIT | sort - error_count", + "type": "bar", + "selected_date_range": { + "start": "now-24h", + "end": "now", + "text": "" + }, + "selected_timestamp": { + "name": "@timestamp", + "type": "timestamp" + }, + "selected_fields": { + "text": "", + "tokens": [] + } + } + ] diff --git a/services/logging/opensearch-observability-setup-job.yaml b/services/logging/opensearch-observability-setup-job.yaml new file mode 100644 index 0000000..cf96878 --- /dev/null +++ b/services/logging/opensearch-observability-setup-job.yaml @@ -0,0 +1,193 @@ +# services/logging/opensearch-observability-setup-job.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-observability-script + namespace: logging +data: + seed.py: | + import json + import os + import time + import urllib.error + import urllib.request + + OSD_URL = os.environ.get( + "OSD_URL", + "http://opensearch-dashboards.logging.svc.cluster.local:5601", + ).rstrip("/") + OBJECT_DIR = "/config" + + def request_json(method, path, payload=None): + url = f"{OSD_URL}{path}" + data = None + headers = {"osd-xsrf": "true"} + if payload is not None: + data = json.dumps(payload).encode("utf-8") + headers["Content-Type"] = "application/json" + + req = urllib.request.Request(url, data=data, method=method) + for key, value in headers.items(): + req.add_header(key, value) + + try: + with urllib.request.urlopen(req, timeout=30) as response: + body = response.read().decode("utf-8") + except urllib.error.HTTPError as exc: + detail = exc.read().decode("utf-8") + raise SystemExit(f"{method} {path} failed: {exc.code} {detail}") + + if not body: + return {} + return json.loads(body) + + + def wait_ready(): + for _ in range(60): + try: + request_json("GET", "/api/status") + return + except Exception: + time.sleep(5) + raise SystemExit("OpenSearch Dashboards did not become ready in time") + + + def load_payload(name): + path = os.path.join(OBJECT_DIR, name) + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + + def index_by_name(items, key): + lookup = {} + for item in items: + obj = item.get(key, {}) + name = obj.get("name") + if not name: + continue + lookup.setdefault(name, item) + return lookup + + + def ensure_applications(apps): + existing = request_json("GET", "/api/observability/application/").get("data", []) + existing_by_name = {app.get("name"): app for app in existing if app.get("name")} + + for app in apps: + name = app.get("name") + if not name: + continue + current = existing_by_name.get(name) + if not current: + request_json("POST", "/api/observability/application/", app) + print(f"created application: {name}") + continue + + if app.get("baseQuery") != current.get("baseQuery"): + print(f"baseQuery differs for {name}; skipping update") + + update_body = {} + for key in ("description", "servicesEntities", "traceGroups"): + if app.get(key, "") != current.get(key, ""): + update_body[key] = app.get(key, "") + + if update_body: + request_json( + "PUT", + "/api/observability/application/", + {"appId": current["id"], "updateBody": update_body}, + ) + print(f"updated application: {name}") + + + def ensure_saved_objects(objects, object_type, endpoint): + existing = request_json( + "GET", + f"/api/observability/event_analytics/saved_objects?objectType={object_type}", + ).get("observabilityObjectList", []) + key = "savedQuery" if object_type == "savedQuery" else "savedVisualization" + existing_by_name = index_by_name(existing, key) + + for obj in objects: + name = obj.get("name") + if not name: + continue + current = existing_by_name.get(name) + if not current: + request_json("POST", endpoint, {"object": obj}) + print(f"created {object_type}: {name}") + continue + + current_body = current.get(key, {}) + if current_body != obj: + request_json( + "PUT", + endpoint, + {"object_id": current["objectId"], "object": obj}, + ) + print(f"updated {object_type}: {name}") + + + def main(): + wait_ready() + + applications = load_payload("applications.json") + queries = load_payload("saved_queries.json") + visualizations = load_payload("saved_visualizations.json") + + ensure_applications(applications) + ensure_saved_objects(queries, "savedQuery", "/api/observability/event_analytics/saved_objects/query") + ensure_saved_objects( + visualizations, + "savedVisualization", + "/api/observability/event_analytics/saved_objects/vis", + ) + + + if __name__ == "__main__": + main() +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: opensearch-observability-setup-1 + namespace: logging +spec: + backoffLimit: 3 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: OnFailure + nodeSelector: + node-role.kubernetes.io/worker: "true" + hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + containers: + - name: setup + image: python:3.11-alpine + command: ["python", "/scripts/seed.py"] + env: + - name: OSD_URL + value: http://opensearch-dashboards.logging.svc.cluster.local:5601 + volumeMounts: + - name: scripts + mountPath: /scripts + readOnly: true + - name: objects + mountPath: /config + readOnly: true + volumes: + - name: scripts + configMap: + name: opensearch-observability-script + - name: objects + configMap: + name: opensearch-observability-objects -- 2.47.2 From c60f40584622ec8c3b75912c2a0c02f0d28319c9 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 10 Jan 2026 03:03:44 +0000 Subject: [PATCH 625/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index d8c31c1..d9a50bd 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-86 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-87 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 67415e665cf6e1512f6e4cb91a7631dc5c0b5efe Mon Sep 17 00:00:00 2001 From: flux-bot Date: Sat, 10 Jan 2026 03:05:43 +0000 Subject: [PATCH 626/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 7b855ff..f3b3ef6 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-86 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-87 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 2d35f90..f88cf41 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-86 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-87 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From af9ab30849620f8fa9ba5e01874e33bc872ce799 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 00:12:55 -0300 Subject: [PATCH 627/684] logging: add trace analytics ingestion --- .../sources/helm/kustomization.yaml | 1 + .../sources/helm/opentelemetry.yaml | 9 ++ scripts/logging_render_observability.py | 2 +- .../logging/data-prepper-helmrelease.yaml | 75 ++++++++++++++++ services/logging/kustomization.yaml | 6 +- .../logging/observability/applications.json | 76 ++++++++++++---- services/logging/opensearch-ism-job.yaml | 12 ++- .../opensearch-observability-objects.yaml | 76 ++++++++++++---- .../opensearch-observability-setup-job.yaml | 2 +- .../logging/opensearch-prune-cronjob.yaml | 2 +- .../logging/otel-collector-helmrelease.yaml | 87 +++++++++++++++++++ 11 files changed, 304 insertions(+), 44 deletions(-) create mode 100644 infrastructure/sources/helm/opentelemetry.yaml create mode 100644 services/logging/data-prepper-helmrelease.yaml create mode 100644 services/logging/otel-collector-helmrelease.yaml diff --git a/infrastructure/sources/helm/kustomization.yaml b/infrastructure/sources/helm/kustomization.yaml index 97fd70e..c8d20bb 100644 --- a/infrastructure/sources/helm/kustomization.yaml +++ b/infrastructure/sources/helm/kustomization.yaml @@ -8,6 +8,7 @@ resources: - jetstack.yaml - jenkins.yaml - mailu.yaml + - opentelemetry.yaml - opensearch.yaml - harbor.yaml - prometheus.yaml diff --git a/infrastructure/sources/helm/opentelemetry.yaml b/infrastructure/sources/helm/opentelemetry.yaml new file mode 100644 index 0000000..03d0b00 --- /dev/null +++ b/infrastructure/sources/helm/opentelemetry.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/opentelemetry.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: opentelemetry + namespace: flux-system +spec: + interval: 1h + url: https://open-telemetry.github.io/opentelemetry-helm-charts diff --git a/scripts/logging_render_observability.py b/scripts/logging_render_observability.py index bd3455d..679e340 100755 --- a/scripts/logging_render_observability.py +++ b/scripts/logging_render_observability.py @@ -180,7 +180,7 @@ def build_objects() -> tuple[list[dict], list[dict], list[dict]]: "description": app.description, "baseQuery": app.base_query, "servicesEntities": [], - "traceGroups": [], + "traceGroups": [app.name], } for app in apps ] diff --git a/services/logging/data-prepper-helmrelease.yaml b/services/logging/data-prepper-helmrelease.yaml new file mode 100644 index 0000000..900b9ca --- /dev/null +++ b/services/logging/data-prepper-helmrelease.yaml @@ -0,0 +1,75 @@ +# services/logging/data-prepper-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: data-prepper + namespace: logging +spec: + interval: 15m + timeout: 10m + chart: + spec: + chart: data-prepper + version: "~0.3.1" + sourceRef: + kind: HelmRepository + name: opensearch + namespace: flux-system + values: + fullnameOverride: data-prepper + replicaCount: 1 + config: + data-prepper-config.yaml: | + ssl: false + pipelineConfig: + enabled: true + config: + entry-pipeline: + delay: "100" + source: + otel_trace_source: + ssl: false + sink: + - pipeline: + name: "raw-pipeline" + - pipeline: + name: "service-map-pipeline" + raw-pipeline: + source: + pipeline: + name: "entry-pipeline" + processor: + - otel_traces: + sink: + - opensearch: + hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] + index_type: trace-analytics-raw + service-map-pipeline: + delay: "100" + source: + pipeline: + name: "entry-pipeline" + processor: + - service_map: + sink: + - opensearch: + hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] + index_type: trace-analytics-service-map + resources: + requests: + cpu: "200m" + memory: "512Mi" + limits: + memory: "1Gi" + nodeSelector: + node-role.kubernetes.io/worker: "true" + hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index 94fc1a8..a4e0bab 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -3,11 +3,13 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml + - opensearch-dashboards-objects.yaml + - opensearch-observability-objects.yaml - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml - - opensearch-dashboards-objects.yaml - - opensearch-observability-objects.yaml + - data-prepper-helmrelease.yaml + - otel-collector-helmrelease.yaml - opensearch-ism-job.yaml - opensearch-dashboards-setup-job.yaml - opensearch-observability-setup-job.yaml diff --git a/services/logging/observability/applications.json b/services/logging/observability/applications.json index fc29e5a..8a0b397 100644 --- a/services/logging/observability/applications.json +++ b/services/logging/observability/applications.json @@ -4,132 +4,170 @@ "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "bstein-dev-home" + ] }, { "name": "pegasus", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "pegasus" + ] }, { "name": "jellyfin", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "jellyfin" + ] }, { "name": "vaultwarden", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "vaultwarden" + ] }, { "name": "mailu", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "mailu" + ] }, { "name": "nextcloud", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'nextcloud'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "nextcloud" + ] }, { "name": "gitea", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'gitea'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "gitea" + ] }, { "name": "jenkins", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jenkins'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "jenkins" + ] }, { "name": "harbor", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'harbor'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "harbor" + ] }, { "name": "vault", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vault'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "vault" + ] }, { "name": "keycloak", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'sso'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "keycloak" + ] }, { "name": "flux-system", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'flux-system'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "flux-system" + ] }, { "name": "comms", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "comms" + ] }, { "name": "element-web", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "element-web" + ] }, { "name": "element-call", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "element-call" + ] }, { "name": "matrix-synapse", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "matrix-synapse" + ] }, { "name": "livekit", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "livekit" + ] }, { "name": "coturn", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "coturn" + ] }, { "name": "lesavka", "description": "", "baseQuery": "source = journald-* | where _HOSTNAME = 'titan-jh'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "lesavka" + ] } ] diff --git a/services/logging/opensearch-ism-job.yaml b/services/logging/opensearch-ism-job.yaml index c800677..3313571 100644 --- a/services/logging/opensearch-ism-job.yaml +++ b/services/logging/opensearch-ism-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: opensearch-ism-setup-4 + name: opensearch-ism-setup-5 namespace: logging spec: backoffLimit: 3 @@ -48,6 +48,11 @@ spec: -H 'Content-Type: application/json' \ -d "${policy}" >/dev/null + trace_policy='{"policy":{"description":"Delete trace analytics after 30 days","schema_version":1,"default_state":"hot","states":[{"name":"hot","actions":[],"transitions":[{"state_name":"delete","conditions":{"min_index_age":"30d"}}]},{"name":"delete","actions":[{"delete":{}}],"transitions":[]}]}}' + curl -sS -X PUT "${OS_URL}/_plugins/_ism/policies/trace-analytics-30d" \ + -H 'Content-Type: application/json' \ + -d "${trace_policy}" >/dev/null + kube_template='{"index_patterns":["kube-*"],"priority":200,"template":{"settings":{"index.number_of_shards":1,"index.number_of_replicas":0,"index.refresh_interval":"30s","plugins.index_state_management.policy_id":"logging-180d"},"mappings":{"properties":{"@timestamp":{"type":"date"}}}}}' curl -sS -X PUT "${OS_URL}/_index_template/kube-logs" \ -H 'Content-Type: application/json' \ @@ -58,6 +63,11 @@ spec: -H 'Content-Type: application/json' \ -d "${journal_template}" >/dev/null + trace_template='{"index_patterns":["trace-analytics-*"],"priority":200,"template":{"settings":{"index.number_of_shards":1,"index.number_of_replicas":0,"index.refresh_interval":"30s","plugins.index_state_management.policy_id":"trace-analytics-30d"}}}' + curl -sS -X PUT "${OS_URL}/_index_template/trace-analytics" \ + -H 'Content-Type: application/json' \ + -d "${trace_template}" >/dev/null + curl -sS -X PUT "${OS_URL}/_all/_settings" \ -H 'Content-Type: application/json' \ -d '{"index":{"number_of_replicas":0}}' >/dev/null diff --git a/services/logging/opensearch-observability-objects.yaml b/services/logging/opensearch-observability-objects.yaml index 0fd31c5..19ed195 100644 --- a/services/logging/opensearch-observability-objects.yaml +++ b/services/logging/opensearch-observability-objects.yaml @@ -13,133 +13,171 @@ data: "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'bstein-dev-home'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "bstein-dev-home" + ] }, { "name": "pegasus", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "pegasus" + ] }, { "name": "jellyfin", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "jellyfin" + ] }, { "name": "vaultwarden", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vaultwarden'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "vaultwarden" + ] }, { "name": "mailu", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'mailu-mailserver'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "mailu" + ] }, { "name": "nextcloud", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'nextcloud'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "nextcloud" + ] }, { "name": "gitea", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'gitea'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "gitea" + ] }, { "name": "jenkins", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'jenkins'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "jenkins" + ] }, { "name": "harbor", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'harbor'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "harbor" + ] }, { "name": "vault", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'vault'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "vault" + ] }, { "name": "keycloak", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'sso'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "keycloak" + ] }, { "name": "flux-system", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'flux-system'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "flux-system" + ] }, { "name": "comms", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "comms" + ] }, { "name": "element-web", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "element-web" + ] }, { "name": "element-call", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "element-call" + ] }, { "name": "matrix-synapse", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "matrix-synapse" + ] }, { "name": "livekit", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "livekit" + ] }, { "name": "coturn", "description": "", "baseQuery": "source = kube-* | where kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "coturn" + ] }, { "name": "lesavka", "description": "", "baseQuery": "source = journald-* | where _HOSTNAME = 'titan-jh'", "servicesEntities": [], - "traceGroups": [] + "traceGroups": [ + "lesavka" + ] } ] saved_queries.json: | diff --git a/services/logging/opensearch-observability-setup-job.yaml b/services/logging/opensearch-observability-setup-job.yaml index cf96878..75e65b2 100644 --- a/services/logging/opensearch-observability-setup-job.yaml +++ b/services/logging/opensearch-observability-setup-job.yaml @@ -150,7 +150,7 @@ data: apiVersion: batch/v1 kind: Job metadata: - name: opensearch-observability-setup-1 + name: opensearch-observability-setup-2 namespace: logging spec: backoffLimit: 3 diff --git a/services/logging/opensearch-prune-cronjob.yaml b/services/logging/opensearch-prune-cronjob.yaml index 74e2837..83aee1a 100644 --- a/services/logging/opensearch-prune-cronjob.yaml +++ b/services/logging/opensearch-prune-cronjob.yaml @@ -122,7 +122,7 @@ spec: - name: LOG_LIMIT_BYTES value: "1099511627776" - name: LOG_INDEX_PATTERNS - value: "kube-*,journald-*" + value: "kube-*,journald-*,trace-analytics-*" volumeMounts: - name: scripts mountPath: /scripts diff --git a/services/logging/otel-collector-helmrelease.yaml b/services/logging/otel-collector-helmrelease.yaml new file mode 100644 index 0000000..b6346cc --- /dev/null +++ b/services/logging/otel-collector-helmrelease.yaml @@ -0,0 +1,87 @@ +# services/logging/otel-collector-helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: otel-collector + namespace: logging +spec: + interval: 15m + timeout: 10m + chart: + spec: + chart: opentelemetry-collector + version: "~0.143.0" + sourceRef: + kind: HelmRepository + name: opentelemetry + namespace: flux-system + values: + fullnameOverride: otel-collector + mode: deployment + replicaCount: 1 + ports: + otlp: + enabled: true + containerPort: 4317 + servicePort: 4317 + protocol: TCP + otlp-http: + enabled: true + containerPort: 4318 + servicePort: 4318 + protocol: TCP + jaeger-compact: + enabled: false + jaeger-thrift: + enabled: false + jaeger-grpc: + enabled: false + zipkin: + enabled: false + metrics: + enabled: false + config: + receivers: + otlp: + protocols: + grpc: + endpoint: ${env:MY_POD_IP}:4317 + http: + endpoint: ${env:MY_POD_IP}:4318 + processors: + memory_limiter: + check_interval: 5s + limit_percentage: 80 + spike_limit_percentage: 25 + batch: {} + exporters: + otlp/data-prepper: + endpoint: data-prepper.logging.svc.cluster.local:21890 + tls: + insecure: true + service: + extensions: + - health_check + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [otlp/data-prepper] + resources: + requests: + cpu: "100m" + memory: "256Mi" + limits: + memory: "512Mi" + nodeSelector: + node-role.kubernetes.io/worker: "true" + hardware: rpi5 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 -- 2.47.2 From a870aa691650deb06e63727db3e6319c0f4d9d84 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 00:16:41 -0300 Subject: [PATCH 628/684] logging: pin otel collector image --- services/logging/otel-collector-helmrelease.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/logging/otel-collector-helmrelease.yaml b/services/logging/otel-collector-helmrelease.yaml index b6346cc..c24682f 100644 --- a/services/logging/otel-collector-helmrelease.yaml +++ b/services/logging/otel-collector-helmrelease.yaml @@ -17,6 +17,9 @@ spec: namespace: flux-system values: fullnameOverride: otel-collector + image: + repository: otel/opentelemetry-collector + tag: "0.143.0" mode: deployment replicaCount: 1 ports: -- 2.47.2 From c6c7259a71243c00d81937615b192dbe55dd28e2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:01:17 -0300 Subject: [PATCH 629/684] logging: add Jenkins build for data-prepper --- dockerfiles/Dockerfile.data-prepper | 16 ++++ services/jenkins/configmap-jcasc.yaml | 19 +++++ services/logging/Jenkinsfile.data-prepper | 83 +++++++++++++++++++ .../logging/data-prepper-helmrelease.yaml | 3 + 4 files changed, 121 insertions(+) create mode 100644 dockerfiles/Dockerfile.data-prepper create mode 100644 services/logging/Jenkinsfile.data-prepper diff --git a/dockerfiles/Dockerfile.data-prepper b/dockerfiles/Dockerfile.data-prepper new file mode 100644 index 0000000..b33c18c --- /dev/null +++ b/dockerfiles/Dockerfile.data-prepper @@ -0,0 +1,16 @@ +FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source + +FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre + +ENV DATA_PREPPER_PATH=/usr/share/data-prepper + +RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \ + && mkdir -p /var/log/data-prepper + +COPY --from=source /usr/share/data-prepper /usr/share/data-prepper + +RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper + +USER 10001 +WORKDIR /usr/share/data-prepper +CMD ["bin/data-prepper"] diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index f72f6aa..2c188db 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -120,6 +120,25 @@ data: } } } + pipelineJob('data-prepper') { + triggers { + scm('H/5 * * * *') + } + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/titan-iac.git') + credentials('gitea-pat') + } + branches('*/feature/sso-hardening') + } + } + scriptPath('services/logging/Jenkinsfile.data-prepper') + } + } + } base.yaml: | jenkins: disableRememberMe: false diff --git a/services/logging/Jenkinsfile.data-prepper b/services/logging/Jenkinsfile.data-prepper new file mode 100644 index 0000000..ce7e3bd --- /dev/null +++ b/services/logging/Jenkinsfile.data-prepper @@ -0,0 +1,83 @@ +pipeline { + agent { + kubernetes { + yaml """ +apiVersion: v1 +kind: Pod +spec: + restartPolicy: Never + serviceAccountName: jenkins + nodeSelector: + hardware: rpi5 + node-role.kubernetes.io/worker: "true" + containers: + - name: git + image: alpine/git:2.47.1 + command: + - cat + tty: true + - name: kaniko + image: gcr.io/kaniko-project/executor:v1.23.2 + command: + - /busybox/cat + tty: true + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1500m" + memory: "2Gi" +""" + } + } + options { + timestamps() + } + parameters { + string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/monitoring/data-prepper', description: 'Docker repository for Data Prepper') + string(name: 'IMAGE_TAG', defaultValue: '2.8.0', description: 'Image tag to publish') + booleanParam(name: 'PUSH_LATEST', defaultValue: true, description: 'Also push the latest tag') + } + stages { + stage('Checkout') { + steps { + container('git') { + checkout scm + } + } + } + stage('Build & Push') { + steps { + container('kaniko') { + withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) { + sh ''' + set -euo pipefail + mkdir -p /kaniko/.docker + ref_host="$(echo "${HARBOR_REPO}" | cut -d/ -f1)" + auth="$(printf "%s:%s" "${HARBOR_USERNAME}" "${HARBOR_PASSWORD}" | base64 | tr -d '\\n')" + cat > /kaniko/.docker/config.json < Date: Sat, 10 Jan 2026 05:06:34 -0300 Subject: [PATCH 630/684] logging: add rpi5 log retention tuning --- services/logging/fluent-bit-helmrelease.yaml | 6 +- services/logging/kustomization.yaml | 3 + .../logging/node-log-rotation-daemonset.yaml | 49 ++++++++++++++++ .../logging/node-log-rotation-script.yaml | 56 +++++++++++++++++++ .../node-log-rotation-serviceaccount.yaml | 6 ++ 5 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 services/logging/node-log-rotation-daemonset.yaml create mode 100644 services/logging/node-log-rotation-script.yaml create mode 100644 services/logging/node-log-rotation-serviceaccount.yaml diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index 30952c1..72fa958 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -34,9 +34,9 @@ spec: hostPath: path: /var/log/journal - name: fluentbit-state - hostPath: - path: /var/lib/fluent-bit - type: DirectoryOrCreate + emptyDir: + medium: Memory + sizeLimit: 64Mi extraVolumeMounts: - name: runlogjournal mountPath: /run/log/journal diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index a4e0bab..3f3a25e 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -5,6 +5,8 @@ resources: - namespace.yaml - opensearch-dashboards-objects.yaml - opensearch-observability-objects.yaml + - node-log-rotation-serviceaccount.yaml + - node-log-rotation-script.yaml - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml @@ -15,5 +17,6 @@ resources: - opensearch-observability-setup-job.yaml - opensearch-prune-cronjob.yaml - fluent-bit-helmrelease.yaml + - node-log-rotation-daemonset.yaml - oauth2-proxy.yaml - ingress.yaml diff --git a/services/logging/node-log-rotation-daemonset.yaml b/services/logging/node-log-rotation-daemonset.yaml new file mode 100644 index 0000000..f6a672c --- /dev/null +++ b/services/logging/node-log-rotation-daemonset.yaml @@ -0,0 +1,49 @@ +# services/logging/node-log-rotation-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-log-rotation + namespace: logging +spec: + selector: + matchLabels: + app: node-log-rotation + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: node-log-rotation + spec: + serviceAccountName: node-log-rotation + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + nodeSelector: + hardware: rpi5 + containers: + - name: node-log-rotation + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/node_log_rotation.sh"] + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: node-log-rotation-script + defaultMode: 0555 diff --git a/services/logging/node-log-rotation-script.yaml b/services/logging/node-log-rotation-script.yaml new file mode 100644 index 0000000..14fac87 --- /dev/null +++ b/services/logging/node-log-rotation-script.yaml @@ -0,0 +1,56 @@ +# services/logging/node-log-rotation-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-log-rotation-script + namespace: logging +data: + node_log_rotation.sh: | + #!/usr/bin/env bash + set -euo pipefail + + changed=0 + journald_changed=0 + k3s_changed=0 + k3s_agent_changed=0 + + journald_dropin="/host/etc/systemd/journald.conf.d/99-logging.conf" + k3s_dropin="/host/etc/systemd/system/k3s.service.d/99-logging.conf" + k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/99-logging.conf" + + if [ ! -f "${journald_dropin}" ]; then + mkdir -p "$(dirname "${journald_dropin}")" + printf "[Journal]\nStorage=volatile\nRuntimeMaxUse=200M\nRuntimeKeepFree=512M\nMaxFileSec=1h\n" > "${journald_dropin}" + changed=1 + journald_changed=1 + fi + + if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_dropin}" ]; then + mkdir -p "$(dirname "${k3s_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_dropin}" + changed=1 + k3s_changed=1 + fi + + if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then + mkdir -p "$(dirname "${k3s_agent_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_agent_dropin}" + changed=1 + k3s_agent_changed=1 + fi + + if [ "${changed}" -eq 1 ]; then + sleep "$(( (RANDOM % 300) + 10 ))" + chroot /host /bin/systemctl daemon-reload + if [ "${journald_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart systemd-journald + fi + if [ "${k3s_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s + fi + if [ "${k3s_agent_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s-agent + fi + fi + + sleep infinity diff --git a/services/logging/node-log-rotation-serviceaccount.yaml b/services/logging/node-log-rotation-serviceaccount.yaml new file mode 100644 index 0000000..68fc463 --- /dev/null +++ b/services/logging/node-log-rotation-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/logging/node-log-rotation-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-log-rotation + namespace: logging -- 2.47.2 From 1595898947ef016e29b76deb36668e3ea1873e16 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:15:19 -0300 Subject: [PATCH 631/684] logging: drop timestamps option from data-prepper job --- services/logging/Jenkinsfile.data-prepper | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/logging/Jenkinsfile.data-prepper b/services/logging/Jenkinsfile.data-prepper index ce7e3bd..35dda35 100644 --- a/services/logging/Jenkinsfile.data-prepper +++ b/services/logging/Jenkinsfile.data-prepper @@ -31,9 +31,6 @@ spec: """ } } - options { - timestamps() - } parameters { string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/monitoring/data-prepper', description: 'Docker repository for Data Prepper') string(name: 'IMAGE_TAG', defaultValue: '2.8.0', description: 'Image tag to publish') -- 2.47.2 From de8709bc2edc11157ca6e0c706880e5da8fc6849 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:22:27 -0300 Subject: [PATCH 632/684] logging: use kaniko debug image --- services/logging/Jenkinsfile.data-prepper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/logging/Jenkinsfile.data-prepper b/services/logging/Jenkinsfile.data-prepper index 35dda35..d90715a 100644 --- a/services/logging/Jenkinsfile.data-prepper +++ b/services/logging/Jenkinsfile.data-prepper @@ -17,7 +17,7 @@ spec: - cat tty: true - name: kaniko - image: gcr.io/kaniko-project/executor:v1.23.2 + image: gcr.io/kaniko-project/executor:v1.23.2-debug command: - /busybox/cat tty: true -- 2.47.2 From 5e245caf45a2ecd334fd14ea0f6f73c20f191e13 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:28:03 -0300 Subject: [PATCH 633/684] logging: use streaming repo for data-prepper --- services/logging/Jenkinsfile.data-prepper | 2 +- services/logging/data-prepper-helmrelease.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/logging/Jenkinsfile.data-prepper b/services/logging/Jenkinsfile.data-prepper index d90715a..611b26b 100644 --- a/services/logging/Jenkinsfile.data-prepper +++ b/services/logging/Jenkinsfile.data-prepper @@ -32,7 +32,7 @@ spec: } } parameters { - string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/monitoring/data-prepper', description: 'Docker repository for Data Prepper') + string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/data-prepper', description: 'Docker repository for Data Prepper') string(name: 'IMAGE_TAG', defaultValue: '2.8.0', description: 'Image tag to publish') booleanParam(name: 'PUSH_LATEST', defaultValue: true, description: 'Also push the latest tag') } diff --git a/services/logging/data-prepper-helmrelease.yaml b/services/logging/data-prepper-helmrelease.yaml index d0889c7..ee7c5f6 100644 --- a/services/logging/data-prepper-helmrelease.yaml +++ b/services/logging/data-prepper-helmrelease.yaml @@ -19,7 +19,7 @@ spec: fullnameOverride: data-prepper replicaCount: 1 image: - repository: registry.bstein.dev/monitoring/data-prepper + repository: registry.bstein.dev/streaming/data-prepper tag: "2.8.0" config: data-prepper-config.yaml: | -- 2.47.2 From 76f3b3f4ea8d4ed1d41634415d93a9ae42a82045 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:42:39 -0300 Subject: [PATCH 634/684] logging: force data-prepper repo override --- services/logging/Jenkinsfile.data-prepper | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/logging/Jenkinsfile.data-prepper b/services/logging/Jenkinsfile.data-prepper index 611b26b..4f7c6a7 100644 --- a/services/logging/Jenkinsfile.data-prepper +++ b/services/logging/Jenkinsfile.data-prepper @@ -50,6 +50,9 @@ spec: withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) { sh ''' set -euo pipefail + if [ -z "${HARBOR_REPO:-}" ] || [ "${HARBOR_REPO}" = "registry.bstein.dev/monitoring/data-prepper" ]; then + HARBOR_REPO="registry.bstein.dev/streaming/data-prepper" + fi mkdir -p /kaniko/.docker ref_host="$(echo "${HARBOR_REPO}" | cut -d/ -f1)" auth="$(printf "%s:%s" "${HARBOR_USERNAME}" "${HARBOR_PASSWORD}" | base64 | tr -d '\\n')" -- 2.47.2 From 63f1d902b6976a332b2002394b978738d09e4689 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:52:16 -0300 Subject: [PATCH 635/684] logging: add data-prepper pull secret --- services/logging/data-prepper-helmrelease.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/logging/data-prepper-helmrelease.yaml b/services/logging/data-prepper-helmrelease.yaml index ee7c5f6..8b27052 100644 --- a/services/logging/data-prepper-helmrelease.yaml +++ b/services/logging/data-prepper-helmrelease.yaml @@ -21,6 +21,8 @@ spec: image: repository: registry.bstein.dev/streaming/data-prepper tag: "2.8.0" + imagePullSecrets: + - name: harbor-robot-pipeline config: data-prepper-config.yaml: | ssl: false -- 2.47.2 From 753cc5900a1896007b25cb1aef485fdf7e90ee57 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 05:55:45 -0300 Subject: [PATCH 636/684] logging: extend fluent-bit helm timeout --- services/logging/fluent-bit-helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/logging/fluent-bit-helmrelease.yaml b/services/logging/fluent-bit-helmrelease.yaml index 72fa958..e16890c 100644 --- a/services/logging/fluent-bit-helmrelease.yaml +++ b/services/logging/fluent-bit-helmrelease.yaml @@ -6,6 +6,7 @@ metadata: namespace: logging spec: interval: 15m + timeout: 10m chart: spec: chart: fluent-bit -- 2.47.2 From 6f8696eb0d031d6674c0795419205cfeb131bbc0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 06:22:56 -0300 Subject: [PATCH 637/684] logging: tune kubelet image GC on rpi5 --- services/logging/node-log-rotation-script.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/services/logging/node-log-rotation-script.yaml b/services/logging/node-log-rotation-script.yaml index 14fac87..7926e0d 100644 --- a/services/logging/node-log-rotation-script.yaml +++ b/services/logging/node-log-rotation-script.yaml @@ -17,6 +17,8 @@ data: journald_dropin="/host/etc/systemd/journald.conf.d/99-logging.conf" k3s_dropin="/host/etc/systemd/system/k3s.service.d/99-logging.conf" k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/99-logging.conf" + k3s_image_gc_dropin="/host/etc/systemd/system/k3s.service.d/98-image-gc.conf" + k3s_agent_image_gc_dropin="/host/etc/systemd/system/k3s-agent.service.d/98-image-gc.conf" if [ ! -f "${journald_dropin}" ]; then mkdir -p "$(dirname "${journald_dropin}")" @@ -32,6 +34,13 @@ data: k3s_changed=1 fi + if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_image_gc_dropin}" ]; then + mkdir -p "$(dirname "${k3s_image_gc_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_image_gc_dropin}" + changed=1 + k3s_changed=1 + fi + if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then mkdir -p "$(dirname "${k3s_agent_dropin}")" printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_agent_dropin}" @@ -39,6 +48,13 @@ data: k3s_agent_changed=1 fi + if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_image_gc_dropin}" ]; then + mkdir -p "$(dirname "${k3s_agent_image_gc_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_agent_image_gc_dropin}" + changed=1 + k3s_agent_changed=1 + fi + if [ "${changed}" -eq 1 ]; then sleep "$(( (RANDOM % 300) + 10 ))" chroot /host /bin/systemctl daemon-reload -- 2.47.2 From e2efeeacba4416b9a36b58a60627b614cb543a68 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 06:57:07 -0300 Subject: [PATCH 638/684] logging: tune rpi4 image gc and rpi5 prune --- services/logging/kustomization.yaml | 6 +++ .../logging/node-image-gc-rpi4-daemonset.yaml | 49 +++++++++++++++++++ .../logging/node-image-gc-rpi4-script.yaml | 44 +++++++++++++++++ .../node-image-gc-rpi4-serviceaccount.yaml | 6 +++ .../node-image-prune-rpi5-daemonset.yaml | 49 +++++++++++++++++++ .../logging/node-image-prune-rpi5-script.yaml | 34 +++++++++++++ .../node-image-prune-rpi5-serviceaccount.yaml | 6 +++ 7 files changed, 194 insertions(+) create mode 100644 services/logging/node-image-gc-rpi4-daemonset.yaml create mode 100644 services/logging/node-image-gc-rpi4-script.yaml create mode 100644 services/logging/node-image-gc-rpi4-serviceaccount.yaml create mode 100644 services/logging/node-image-prune-rpi5-daemonset.yaml create mode 100644 services/logging/node-image-prune-rpi5-script.yaml create mode 100644 services/logging/node-image-prune-rpi5-serviceaccount.yaml diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index 3f3a25e..3c40da2 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -7,6 +7,10 @@ resources: - opensearch-observability-objects.yaml - node-log-rotation-serviceaccount.yaml - node-log-rotation-script.yaml + - node-image-gc-rpi4-serviceaccount.yaml + - node-image-gc-rpi4-script.yaml + - node-image-prune-rpi5-serviceaccount.yaml + - node-image-prune-rpi5-script.yaml - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml @@ -18,5 +22,7 @@ resources: - opensearch-prune-cronjob.yaml - fluent-bit-helmrelease.yaml - node-log-rotation-daemonset.yaml + - node-image-gc-rpi4-daemonset.yaml + - node-image-prune-rpi5-daemonset.yaml - oauth2-proxy.yaml - ingress.yaml diff --git a/services/logging/node-image-gc-rpi4-daemonset.yaml b/services/logging/node-image-gc-rpi4-daemonset.yaml new file mode 100644 index 0000000..70bace5 --- /dev/null +++ b/services/logging/node-image-gc-rpi4-daemonset.yaml @@ -0,0 +1,49 @@ +# services/logging/node-image-gc-rpi4-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-image-gc-rpi4 + namespace: logging +spec: + selector: + matchLabels: + app: node-image-gc-rpi4 + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: node-image-gc-rpi4 + spec: + serviceAccountName: node-image-gc-rpi4 + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + nodeSelector: + hardware: rpi4 + containers: + - name: node-image-gc-rpi4 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/node_image_gc_rpi4.sh"] + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: node-image-gc-rpi4-script + defaultMode: 0555 diff --git a/services/logging/node-image-gc-rpi4-script.yaml b/services/logging/node-image-gc-rpi4-script.yaml new file mode 100644 index 0000000..44c4c16 --- /dev/null +++ b/services/logging/node-image-gc-rpi4-script.yaml @@ -0,0 +1,44 @@ +# services/logging/node-image-gc-rpi4-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-image-gc-rpi4-script + namespace: logging +data: + node_image_gc_rpi4.sh: | + #!/usr/bin/env bash + set -euo pipefail + + changed=0 + k3s_changed=0 + k3s_agent_changed=0 + + k3s_dropin="/host/etc/systemd/system/k3s.service.d/98-image-gc.conf" + k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/98-image-gc.conf" + + if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_dropin}" ]; then + mkdir -p "$(dirname "${k3s_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_dropin}" + changed=1 + k3s_changed=1 + fi + + if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then + mkdir -p "$(dirname "${k3s_agent_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_agent_dropin}" + changed=1 + k3s_agent_changed=1 + fi + + if [ "${changed}" -eq 1 ]; then + sleep "$(( (RANDOM % 300) + 10 ))" + chroot /host /bin/systemctl daemon-reload + if [ "${k3s_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s + fi + if [ "${k3s_agent_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s-agent + fi + fi + + sleep infinity diff --git a/services/logging/node-image-gc-rpi4-serviceaccount.yaml b/services/logging/node-image-gc-rpi4-serviceaccount.yaml new file mode 100644 index 0000000..ec1eb41 --- /dev/null +++ b/services/logging/node-image-gc-rpi4-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/logging/node-image-gc-rpi4-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-image-gc-rpi4 + namespace: logging diff --git a/services/logging/node-image-prune-rpi5-daemonset.yaml b/services/logging/node-image-prune-rpi5-daemonset.yaml new file mode 100644 index 0000000..63fa471 --- /dev/null +++ b/services/logging/node-image-prune-rpi5-daemonset.yaml @@ -0,0 +1,49 @@ +# services/logging/node-image-prune-rpi5-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-image-prune-rpi5 + namespace: logging +spec: + selector: + matchLabels: + app: node-image-prune-rpi5 + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: node-image-prune-rpi5 + spec: + serviceAccountName: node-image-prune-rpi5 + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + nodeSelector: + hardware: rpi5 + containers: + - name: node-image-prune-rpi5 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/node_image_prune_rpi5.sh"] + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: node-image-prune-rpi5-script + defaultMode: 0555 diff --git a/services/logging/node-image-prune-rpi5-script.yaml b/services/logging/node-image-prune-rpi5-script.yaml new file mode 100644 index 0000000..ae79ce3 --- /dev/null +++ b/services/logging/node-image-prune-rpi5-script.yaml @@ -0,0 +1,34 @@ +# services/logging/node-image-prune-rpi5-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-image-prune-rpi5-script + namespace: logging +data: + node_image_prune_rpi5.sh: | + #!/usr/bin/env bash + set -euo pipefail + + threshold=70 + + sleep "$(( (RANDOM % 300) + 10 ))" + + while true; do + usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') + if [ -z "${usage}" ]; then + sleep 1800 + continue + fi + + if [ "${usage}" -ge "${threshold}" ]; then + chroot /host /bin/sh -c ' + if command -v crictl >/dev/null 2>&1; then + crictl --runtime-endpoint=unix:///run/k3s/containerd/containerd.sock rmi --prune || true + elif [ -x /usr/local/bin/crictl ]; then + /usr/local/bin/crictl --runtime-endpoint=unix:///run/k3s/containerd/containerd.sock rmi --prune || true + fi + ' + fi + + sleep 21600 + done diff --git a/services/logging/node-image-prune-rpi5-serviceaccount.yaml b/services/logging/node-image-prune-rpi5-serviceaccount.yaml new file mode 100644 index 0000000..938c78a --- /dev/null +++ b/services/logging/node-image-prune-rpi5-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/logging/node-image-prune-rpi5-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-image-prune-rpi5 + namespace: logging -- 2.47.2 From 99a6b4c054ba04ea5dbe1141ca0241717b52a043 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 20:20:54 -0300 Subject: [PATCH 639/684] maintenance: sweep unused images on arm workers --- .../platform/maintenance/kustomization.yaml | 14 +++ .../maintenance/image-sweeper-cronjob.yaml | 44 +++++++++ services/maintenance/kustomization.yaml | 15 +++ services/maintenance/namespace.yaml | 5 + .../node-image-sweeper-daemonset.yaml | 42 ++++++++ .../node-image-sweeper-script.yaml | 96 +++++++++++++++++++ .../node-image-sweeper-serviceaccount.yaml | 6 ++ .../maintenance/node-nofile-daemonset.yaml | 47 +++++++++ services/maintenance/node-nofile-script.yaml | 38 ++++++++ .../node-nofile-serviceaccount.yaml | 6 ++ services/maintenance/pod-cleaner-cronjob.yaml | 32 +++++++ services/maintenance/pod-cleaner-rbac.yaml | 32 +++++++ services/maintenance/pod-cleaner-script.yaml | 20 ++++ 13 files changed, 397 insertions(+) create mode 100644 clusters/atlas/flux-system/platform/maintenance/kustomization.yaml create mode 100644 services/maintenance/image-sweeper-cronjob.yaml create mode 100644 services/maintenance/kustomization.yaml create mode 100644 services/maintenance/namespace.yaml create mode 100644 services/maintenance/node-image-sweeper-daemonset.yaml create mode 100644 services/maintenance/node-image-sweeper-script.yaml create mode 100644 services/maintenance/node-image-sweeper-serviceaccount.yaml create mode 100644 services/maintenance/node-nofile-daemonset.yaml create mode 100644 services/maintenance/node-nofile-script.yaml create mode 100644 services/maintenance/node-nofile-serviceaccount.yaml create mode 100644 services/maintenance/pod-cleaner-cronjob.yaml create mode 100644 services/maintenance/pod-cleaner-rbac.yaml create mode 100644 services/maintenance/pod-cleaner-script.yaml diff --git a/clusters/atlas/flux-system/platform/maintenance/kustomization.yaml b/clusters/atlas/flux-system/platform/maintenance/kustomization.yaml new file mode 100644 index 0000000..fc655a4 --- /dev/null +++ b/clusters/atlas/flux-system/platform/maintenance/kustomization.yaml @@ -0,0 +1,14 @@ +# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: maintenance + namespace: flux-system +spec: + interval: 10m + path: ./services/maintenance + prune: true + sourceRef: + kind: GitRepository + name: flux-system + wait: false diff --git a/services/maintenance/image-sweeper-cronjob.yaml b/services/maintenance/image-sweeper-cronjob.yaml new file mode 100644 index 0000000..c1bd132 --- /dev/null +++ b/services/maintenance/image-sweeper-cronjob.yaml @@ -0,0 +1,44 @@ +# services/maintenance/image-sweeper-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: image-sweeper + namespace: maintenance +spec: + schedule: "30 4 * * 0" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 2 + failedJobsHistoryLimit: 2 + jobTemplate: + spec: + template: + spec: + serviceAccountName: node-image-sweeper + restartPolicy: OnFailure + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: image-sweeper + image: python:3.12.9-alpine3.20 + command: ["/bin/sh", "/scripts/node_image_sweeper.sh"] + env: + - name: ONE_SHOT + value: "true" + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: node-image-sweeper-script + defaultMode: 0555 diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml new file mode 100644 index 0000000..ccb5e7e --- /dev/null +++ b/services/maintenance/kustomization.yaml @@ -0,0 +1,15 @@ +# services/maintenance/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - node-nofile-serviceaccount.yaml + - pod-cleaner-rbac.yaml + - node-nofile-script.yaml + - pod-cleaner-script.yaml + - node-nofile-daemonset.yaml + - pod-cleaner-cronjob.yaml + - node-image-sweeper-serviceaccount.yaml + - node-image-sweeper-script.yaml + - node-image-sweeper-daemonset.yaml + - image-sweeper-cronjob.yaml diff --git a/services/maintenance/namespace.yaml b/services/maintenance/namespace.yaml new file mode 100644 index 0000000..dce28b0 --- /dev/null +++ b/services/maintenance/namespace.yaml @@ -0,0 +1,5 @@ +# services/maintenance/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: maintenance diff --git a/services/maintenance/node-image-sweeper-daemonset.yaml b/services/maintenance/node-image-sweeper-daemonset.yaml new file mode 100644 index 0000000..03e46e8 --- /dev/null +++ b/services/maintenance/node-image-sweeper-daemonset.yaml @@ -0,0 +1,42 @@ +# services/maintenance/node-image-sweeper-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-image-sweeper + namespace: maintenance +spec: + selector: + matchLabels: + app: node-image-sweeper + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: node-image-sweeper + spec: + serviceAccountName: node-image-sweeper + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: node-image-sweeper + image: python:3.12.9-alpine3.20 + command: ["/bin/sh", "/scripts/node_image_sweeper.sh"] + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: node-image-sweeper-script + defaultMode: 0555 diff --git a/services/maintenance/node-image-sweeper-script.yaml b/services/maintenance/node-image-sweeper-script.yaml new file mode 100644 index 0000000..76553a1 --- /dev/null +++ b/services/maintenance/node-image-sweeper-script.yaml @@ -0,0 +1,96 @@ +# services/maintenance/node-image-sweeper-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-image-sweeper-script + namespace: maintenance +data: + node_image_sweeper.sh: | + #!/bin/sh + set -eu + + ONE_SHOT=${ONE_SHOT:-false} + THRESHOLD_DAYS=14 + + usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage="" + if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then + THRESHOLD_DAYS=3 + fi + + cutoff=$(date -d "${THRESHOLD_DAYS} days ago" +%s 2>/dev/null || date -v -"${THRESHOLD_DAYS}"d +%s) + + RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ') + IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}') + + SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause" + + prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY' + import json, os, sys, time + + try: + data = json.load(sys.stdin) + except Exception: + print("", end="") + sys.exit(0) + + cutoff = int(os.environ.get("CUTOFF", "0")) + running = set(os.environ.get("RUNNING", "").split()) + skip = os.environ.get("SKIP", "").split() + now = int(time.time()) + prune = [] + + + def is_skip(tags): + if not tags: + return False + for t in tags: + for prefix in skip: + if prefix and t.startswith(prefix): + return True + return False + + + for img in data.get("images", []): + image_id = img.get("id", "") + if not image_id: + continue + if image_id in running: + continue + tags = img.get("repoTags") or [] + if is_skip(tags): + continue + created = img.get("createdAt") or 0 + try: + created = int(str(created)) // 1000000000 + except Exception: + created = 0 + if created and created > now: + created = now + if cutoff and created and created < cutoff: + prune.append(image_id) + + seen = set() + for p in prune: + if p in seen: + continue + seen.add(p) + print(p) + PY + ) + + if [ -n "${prune_list}" ]; then + printf "%s" "${prune_list}" | while read -r image_id; do + if [ -n "${image_id}" ]; then + chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true + fi + done + fi + + find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true + find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true + + if [ "${ONE_SHOT}" = "true" ]; then + exit 0 + fi + + sleep infinity diff --git a/services/maintenance/node-image-sweeper-serviceaccount.yaml b/services/maintenance/node-image-sweeper-serviceaccount.yaml new file mode 100644 index 0000000..854f041 --- /dev/null +++ b/services/maintenance/node-image-sweeper-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/maintenance/node-image-sweeper-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-image-sweeper + namespace: maintenance diff --git a/services/maintenance/node-nofile-daemonset.yaml b/services/maintenance/node-nofile-daemonset.yaml new file mode 100644 index 0000000..392753d --- /dev/null +++ b/services/maintenance/node-nofile-daemonset.yaml @@ -0,0 +1,47 @@ +# services/maintenance/node-nofile-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-nofile + namespace: maintenance +spec: + selector: + matchLabels: + app: node-nofile + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: node-nofile + spec: + serviceAccountName: node-nofile + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: node-nofile + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/node_nofile.sh"] + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: node-nofile-script + defaultMode: 0555 diff --git a/services/maintenance/node-nofile-script.yaml b/services/maintenance/node-nofile-script.yaml new file mode 100644 index 0000000..2e2b440 --- /dev/null +++ b/services/maintenance/node-nofile-script.yaml @@ -0,0 +1,38 @@ +# services/maintenance/node-nofile-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-nofile-script + namespace: maintenance +data: + node_nofile.sh: | + #!/usr/bin/env bash + set -euo pipefail + + limit_line="LimitNOFILE=1048576" + changed=0 + + for unit in k3s k3s-agent; do + unit_file="/host/etc/systemd/system/${unit}.service" + if [ -f "${unit_file}" ]; then + dropin_dir="/host/etc/systemd/system/${unit}.service.d" + dropin_file="${dropin_dir}/99-nofile.conf" + if [ ! -f "${dropin_file}" ] || ! grep -q "${limit_line}" "${dropin_file}"; then + mkdir -p "${dropin_dir}" + printf "[Service]\n%s\n" "${limit_line}" > "${dropin_file}" + changed=1 + fi + fi + done + + if [ "${changed}" -eq 1 ]; then + sleep "$(( (RANDOM % 300) + 10 ))" + chroot /host /bin/systemctl daemon-reload + for unit in k3s k3s-agent; do + if [ -f "/host/etc/systemd/system/${unit}.service" ]; then + chroot /host /bin/systemctl restart "${unit}" + fi + done + fi + + sleep infinity diff --git a/services/maintenance/node-nofile-serviceaccount.yaml b/services/maintenance/node-nofile-serviceaccount.yaml new file mode 100644 index 0000000..1cc0499 --- /dev/null +++ b/services/maintenance/node-nofile-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/maintenance/node-nofile-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-nofile + namespace: maintenance diff --git a/services/maintenance/pod-cleaner-cronjob.yaml b/services/maintenance/pod-cleaner-cronjob.yaml new file mode 100644 index 0000000..ffca7dd --- /dev/null +++ b/services/maintenance/pod-cleaner-cronjob.yaml @@ -0,0 +1,32 @@ +# services/maintenance/pod-cleaner-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: pod-cleaner + namespace: maintenance +spec: + schedule: "0 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + spec: + serviceAccountName: pod-cleaner + restartPolicy: Never + containers: + - name: cleaner + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/pod_cleaner.sh"] + volumeMounts: + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: script + configMap: + name: pod-cleaner-script + defaultMode: 0555 diff --git a/services/maintenance/pod-cleaner-rbac.yaml b/services/maintenance/pod-cleaner-rbac.yaml new file mode 100644 index 0000000..26bb035 --- /dev/null +++ b/services/maintenance/pod-cleaner-rbac.yaml @@ -0,0 +1,32 @@ +# services/maintenance/pod-cleaner-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pod-cleaner + namespace: maintenance + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: pod-cleaner +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "delete"] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: pod-cleaner +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pod-cleaner +subjects: + - kind: ServiceAccount + name: pod-cleaner + namespace: maintenance diff --git a/services/maintenance/pod-cleaner-script.yaml b/services/maintenance/pod-cleaner-script.yaml new file mode 100644 index 0000000..909a37c --- /dev/null +++ b/services/maintenance/pod-cleaner-script.yaml @@ -0,0 +1,20 @@ +# services/maintenance/pod-cleaner-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: pod-cleaner-script + namespace: maintenance +data: + pod_cleaner.sh: | + #!/usr/bin/env bash + set -euo pipefail + + for phase in Succeeded Failed; do + kubectl get pods -A --field-selector="status.phase=${phase}" \ + -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' \ + | while read -r namespace name; do + if [ -n "${namespace}" ] && [ -n "${name}" ]; then + kubectl delete pod -n "${namespace}" "${name}" --ignore-not-found --grace-period=0 --wait=false + fi + done + done -- 2.47.2 From b7e5a042651e7bd043adb6f50b30a86362740d15 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 20:26:46 -0300 Subject: [PATCH 640/684] maintenance: fix image sweeper script indentation --- services/maintenance/node-image-sweeper-script.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/maintenance/node-image-sweeper-script.yaml b/services/maintenance/node-image-sweeper-script.yaml index 76553a1..6e3b02c 100644 --- a/services/maintenance/node-image-sweeper-script.yaml +++ b/services/maintenance/node-image-sweeper-script.yaml @@ -17,7 +17,11 @@ data: THRESHOLD_DAYS=3 fi - cutoff=$(date -d "${THRESHOLD_DAYS} days ago" +%s 2>/dev/null || date -v -"${THRESHOLD_DAYS}"d +%s) + cutoff=$(python3 - <<'PY' + import time, os + print(int(time.time()) - int(os.environ.get("THRESHOLD_DAYS", "14")) * 86400) + PY + ) RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ') IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}') -- 2.47.2 From 1517dec30b431978153a17e49bb74a1d7186a795 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 10 Jan 2026 23:57:26 -0300 Subject: [PATCH 641/684] maintenance: run image sweeper on all nodes --- services/maintenance/image-sweeper-cronjob.yaml | 10 ++++++++-- services/maintenance/node-image-sweeper-daemonset.yaml | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/services/maintenance/image-sweeper-cronjob.yaml b/services/maintenance/image-sweeper-cronjob.yaml index c1bd132..08127bc 100644 --- a/services/maintenance/image-sweeper-cronjob.yaml +++ b/services/maintenance/image-sweeper-cronjob.yaml @@ -16,8 +16,14 @@ spec: serviceAccountName: node-image-sweeper restartPolicy: OnFailure nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" + kubernetes.io/os: linux + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule containers: - name: image-sweeper image: python:3.12.9-alpine3.20 diff --git a/services/maintenance/node-image-sweeper-daemonset.yaml b/services/maintenance/node-image-sweeper-daemonset.yaml index 03e46e8..c3cb24d 100644 --- a/services/maintenance/node-image-sweeper-daemonset.yaml +++ b/services/maintenance/node-image-sweeper-daemonset.yaml @@ -16,9 +16,15 @@ spec: app: node-image-sweeper spec: serviceAccountName: node-image-sweeper + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" + kubernetes.io/os: linux containers: - name: node-image-sweeper image: python:3.12.9-alpine3.20 -- 2.47.2 From b53c7d4a1c7cdad6edd4d9824be201c04cb2257c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 00:29:20 -0300 Subject: [PATCH 642/684] monitoring: wire grafana smtp sync and alerting provisioning --- .../monitoring/grafana-alerting-config.yaml | 26 ++++++++++ .../monitoring/grafana-smtp-sync-cronjob.yaml | 44 +++++++++++++++++ .../monitoring/grafana-smtp-sync-rbac.yaml | 49 +++++++++++++++++++ .../monitoring/grafana-smtp-sync-script.yaml | 39 +++++++++++++++ .../grafana-smtp-sync-serviceaccount.yaml | 6 +++ services/monitoring/helmrelease.yaml | 18 +++++++ services/monitoring/kustomization.yaml | 5 ++ 7 files changed, 187 insertions(+) create mode 100644 services/monitoring/grafana-alerting-config.yaml create mode 100644 services/monitoring/grafana-smtp-sync-cronjob.yaml create mode 100644 services/monitoring/grafana-smtp-sync-rbac.yaml create mode 100644 services/monitoring/grafana-smtp-sync-script.yaml create mode 100644 services/monitoring/grafana-smtp-sync-serviceaccount.yaml diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml new file mode 100644 index 0000000..b805a25 --- /dev/null +++ b/services/monitoring/grafana-alerting-config.yaml @@ -0,0 +1,26 @@ +# services/monitoring/grafana-alerting-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-alerting + namespace: monitoring + labels: + grafana_alerting: "1" +data: + alerting.yaml: | + apiVersion: 1 + contactPoints: + - orgId: 1 + name: email-admins + receivers: + - uid: email-admins + type: email + settings: + addresses: ${GRAFANA_ALERT_EMAILS} + singleEmail: true + policies: + - orgId: 1 + receiver: email-admins + group_by: + - alertname + continue: true diff --git a/services/monitoring/grafana-smtp-sync-cronjob.yaml b/services/monitoring/grafana-smtp-sync-cronjob.yaml new file mode 100644 index 0000000..8922942 --- /dev/null +++ b/services/monitoring/grafana-smtp-sync-cronjob.yaml @@ -0,0 +1,44 @@ +# services/monitoring/grafana-smtp-sync-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: grafana-smtp-sync + namespace: monitoring +spec: + schedule: "15 3 * * *" + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + serviceAccountName: grafana-smtp-sync + restartPolicy: OnFailure + containers: + - name: sync + image: bitnami/kubectl:1.31 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + if ! command -v jq >/dev/null 2>&1; then + apt-get update >/dev/null && apt-get install -y jq >/dev/null + fi + exec /scripts/sync.sh + env: + - name: SOURCE_NS + value: mailu-mailserver + - name: SOURCE_SECRET + value: mailu-postmark-relay + - name: TARGET_NS + value: monitoring + - name: TARGET_SECRET + value: grafana-smtp + volumeMounts: + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: script + configMap: + name: grafana-smtp-sync-script + defaultMode: 0555 diff --git a/services/monitoring/grafana-smtp-sync-rbac.yaml b/services/monitoring/grafana-smtp-sync-rbac.yaml new file mode 100644 index 0000000..aa5388e --- /dev/null +++ b/services/monitoring/grafana-smtp-sync-rbac.yaml @@ -0,0 +1,49 @@ +# services/monitoring/grafana-smtp-sync-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-smtp-sync +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get"] + resourceNames: + - mailu-postmark-relay +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-smtp-sync +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-smtp-sync +subjects: + - kind: ServiceAccount + name: grafana-smtp-sync + namespace: monitoring + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: grafana-smtp-sync + namespace: monitoring +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: grafana-smtp-sync + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: grafana-smtp-sync +subjects: + - kind: ServiceAccount + name: grafana-smtp-sync + namespace: monitoring diff --git a/services/monitoring/grafana-smtp-sync-script.yaml b/services/monitoring/grafana-smtp-sync-script.yaml new file mode 100644 index 0000000..cccfd48 --- /dev/null +++ b/services/monitoring/grafana-smtp-sync-script.yaml @@ -0,0 +1,39 @@ +# services/monitoring/grafana-smtp-sync-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-smtp-sync-script + namespace: monitoring +data: + sync.sh: | + #!/bin/sh + set -euo pipefail + + SOURCE_NS=${SOURCE_NS:-mailu-mailserver} + SOURCE_SECRET=${SOURCE_SECRET:-mailu-postmark-relay} + TARGET_NS=${TARGET_NS:-monitoring} + TARGET_SECRET=${TARGET_SECRET:-grafana-smtp} + + tmp=$(mktemp) + cleanup() { rm -f "$tmp"; } + trap cleanup EXIT + + kubectl -n "$SOURCE_NS" get secret "$SOURCE_SECRET" -o json > "$tmp" + + user=$(jq -r '.data["relay-username"]' "$tmp") + pass=$(jq -r '.data["relay-password"]' "$tmp") + + if [ -z "$user" ] || [ -z "$pass" ] || [ "$user" = "null" ] || [ "$pass" = "null" ]; then + echo "missing credentials from $SOURCE_NS/$SOURCE_SECRET" >&2 + exit 1 + fi + + cat < Date: Sun, 11 Jan 2026 00:30:45 -0300 Subject: [PATCH 643/684] monitoring: fix smtp sync image reference --- services/monitoring/grafana-smtp-sync-cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/monitoring/grafana-smtp-sync-cronjob.yaml b/services/monitoring/grafana-smtp-sync-cronjob.yaml index 8922942..3b92d4c 100644 --- a/services/monitoring/grafana-smtp-sync-cronjob.yaml +++ b/services/monitoring/grafana-smtp-sync-cronjob.yaml @@ -15,7 +15,7 @@ spec: restartPolicy: OnFailure containers: - name: sync - image: bitnami/kubectl:1.31 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 command: ["/bin/sh", "-c"] args: - | -- 2.47.2 From 95c3e2de37f96b4534931378d18b8010894afda3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 00:32:41 -0300 Subject: [PATCH 644/684] monitoring: allow smtp sync to get target secret --- services/monitoring/grafana-smtp-sync-rbac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/monitoring/grafana-smtp-sync-rbac.yaml b/services/monitoring/grafana-smtp-sync-rbac.yaml index aa5388e..532d622 100644 --- a/services/monitoring/grafana-smtp-sync-rbac.yaml +++ b/services/monitoring/grafana-smtp-sync-rbac.yaml @@ -32,7 +32,7 @@ metadata: rules: - apiGroups: [""] resources: ["secrets"] - verbs: ["create", "update", "patch"] + verbs: ["get", "create", "update", "patch"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding -- 2.47.2 From 1ffcb28be5507e807aa78e28e6733fb994d159e8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 01:40:07 -0300 Subject: [PATCH 645/684] monitoring: fix grafana alerting root policy --- services/monitoring/grafana-alerting-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index b805a25..8ebc8d9 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -23,4 +23,3 @@ data: receiver: email-admins group_by: - alertname - continue: true -- 2.47.2 From f533443c4201500c9e630255723b413dbabace8b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 01:57:20 -0300 Subject: [PATCH 646/684] Fix Jetson device plugin args --- .../profiles/components/device-plugin-jetson/daemonset.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml b/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml index 0fa8376..2e29134 100644 --- a/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml +++ b/infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml @@ -30,7 +30,7 @@ spec: imagePullPolicy: IfNotPresent args: - "--fail-on-init-error=false" - - "--device-list-strategy=envvar,cdi" + - "--device-list-strategy=envvar" - "--config-file=/config/config.yaml" securityContext: privileged: true -- 2.47.2 From 734a537a28c17ee3dc104740859e4ac70472a57a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 02:02:47 -0300 Subject: [PATCH 647/684] monitoring: add alert rules and include titan-20/21 in dashboards --- scripts/dashboards_render_atlas.py | 2 + .../monitoring/dashboards/atlas-nodes.json | 4 +- .../monitoring/dashboards/atlas-overview.json | 14 +- .../monitoring/dashboards/atlas-pods.json | 2 +- .../monitoring/grafana-alerting-config.yaml | 206 ++++++++++++++++++ .../monitoring/grafana-dashboard-nodes.yaml | 4 +- .../grafana-dashboard-overview.yaml | 14 +- .../monitoring/grafana-dashboard-pods.yaml | 2 +- 8 files changed, 228 insertions(+), 20 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 4476773..1dd47a1 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -64,6 +64,8 @@ WORKER_NODES = [ "titan-09", "titan-10", "titan-11", + "titan-20", + "titan-21", "titan-12", "titan-13", "titan-14", diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index 9be3adb..a46fc56 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], @@ -46,7 +46,7 @@ "unit": "none", "custom": { "displayMode": "auto", - "valueSuffix": "/18" + "valueSuffix": "/20" } }, "overrides": [] diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 0382199..df84d1c 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -449,14 +449,14 @@ }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "min": 0, - "max": 18, + "max": 20, "thresholds": { "mode": "absolute", "steps": [ @@ -466,15 +466,15 @@ }, { "color": "orange", - "value": 16 + "value": 18 }, { "color": "yellow", - "value": 17 + "value": 19 }, { "color": "green", - "value": 18 + "value": 20 } ] } @@ -1617,7 +1617,7 @@ }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1664,7 +1664,7 @@ }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index 0d46f90..1c5c311 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -520,7 +520,7 @@ }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))", "refId": "A", "instant": true, "format": "table" diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index 8ebc8d9..ea30d33 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -23,3 +23,209 @@ data: receiver: email-admins group_by: - alertname + rules.yaml: | + apiVersion: 1 + groups: + - orgId: 1 + name: atlas-disk + folder: Alerts + interval: 1m + rules: + - uid: disk-pressure-root + title: "Node rootfs high (>80%)" + condition: C + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: atlas-vm + model: + intervalMs: 60000 + maxDataPoints: 43200 + expr: avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint="/",fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{mountpoint="/",fstype!~"tmpfs|overlay"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)")) + legendFormat: '{{node}}' + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [80] + type: gt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "{{ $labels.node }} rootfs >80% for 10m" + labels: + severity: warning + - orgId: 1 + name: maintenance + folder: Alerts + interval: 1m + rules: + - uid: maint-sweeper + title: "Maintenance sweeper not ready" + condition: C + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: atlas-vm + model: + intervalMs: 60000 + maxDataPoints: 43200 + expr: kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} + legendFormat: '{{daemonset}}' + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [1] + type: lt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "node-image-sweeper not fully ready" + labels: + severity: warning + - orgId: 1 + name: postmark + folder: Alerts + interval: 1m + rules: + - uid: postmark-bounce + title: "Postmark bounce rate high" + condition: C + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: atlas-vm + model: + intervalMs: 60000 + maxDataPoints: 43200 + expr: POSTMARK_OUTBOUND_BOUNCE_RATE{window="1d"} + legendFormat: bounce 1d + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [5] + type: gt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "Postmark 1d bounce rate >5%" + labels: + severity: warning + - uid: postmark-api-down + title: "Postmark exporter down" + condition: C + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: atlas-vm + model: + intervalMs: 60000 + maxDataPoints: 43200 + expr: POSTMARK_API_UP + legendFormat: api up + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [1] + type: lt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "Postmark exporter reports API down" + labels: + severity: critical diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 1b87c60..7a8a89a 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], @@ -55,7 +55,7 @@ data: "unit": "none", "custom": { "displayMode": "auto", - "valueSuffix": "/18" + "valueSuffix": "/20" } }, "overrides": [] diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index af69a39..ac4db04 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -458,14 +458,14 @@ data: }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "min": 0, - "max": 18, + "max": 20, "thresholds": { "mode": "absolute", "steps": [ @@ -475,15 +475,15 @@ data: }, { "color": "orange", - "value": 16 + "value": 18 }, { "color": "yellow", - "value": 17 + "value": 19 }, { "color": "green", - "value": 18 + "value": 20 } ] } @@ -1626,7 +1626,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1673,7 +1673,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index dda1a41..5020d8a 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -529,7 +529,7 @@ data: }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))", "refId": "A", "instant": true, "format": "table" -- 2.47.2 From 33b89c7dc20181bfbe7b1375888b169143e7fce8 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 02:20:47 -0300 Subject: [PATCH 648/684] monitoring: remove titan-16 and add titan-20/21 to worker dashboards --- scripts/dashboards_render_atlas.py | 1 - services/monitoring/dashboards/atlas-nodes.json | 4 ++-- services/monitoring/dashboards/atlas-overview.json | 14 +++++++------- services/monitoring/dashboards/atlas-pods.json | 2 +- services/monitoring/grafana-dashboard-nodes.yaml | 4 ++-- .../monitoring/grafana-dashboard-overview.yaml | 14 +++++++------- services/monitoring/grafana-dashboard-pods.yaml | 2 +- 7 files changed, 20 insertions(+), 21 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 1dd47a1..8c5563b 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -70,7 +70,6 @@ WORKER_NODES = [ "titan-13", "titan-14", "titan-15", - "titan-16", "titan-17", "titan-18", "titan-19", diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index a46fc56..256bc18 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], @@ -46,7 +46,7 @@ "unit": "none", "custom": { "displayMode": "auto", - "valueSuffix": "/20" + "valueSuffix": "/19" } }, "overrides": [] diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index df84d1c..33bd27c 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -449,14 +449,14 @@ }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "min": 0, - "max": 20, + "max": 19, "thresholds": { "mode": "absolute", "steps": [ @@ -466,15 +466,15 @@ }, { "color": "orange", - "value": 18 + "value": 17 }, { "color": "yellow", - "value": 19 + "value": 18 }, { "color": "green", - "value": 20 + "value": 19 } ] } @@ -1617,7 +1617,7 @@ }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1664,7 +1664,7 @@ }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index 1c5c311..68429ec 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -520,7 +520,7 @@ }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)))))", "refId": "A", "instant": true, "format": "table" diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 7a8a89a..368f80f 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], @@ -55,7 +55,7 @@ data: "unit": "none", "custom": { "displayMode": "auto", - "valueSuffix": "/20" + "valueSuffix": "/19" } }, "overrides": [] diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index ac4db04..bad39c5 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -458,14 +458,14 @@ data: }, "targets": [ { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "min": 0, - "max": 20, + "max": 19, "thresholds": { "mode": "absolute", "steps": [ @@ -475,15 +475,15 @@ data: }, { "color": "orange", - "value": 18 + "value": 17 }, { "color": "yellow", - "value": 19 + "value": 18 }, { "color": "green", - "value": 20 + "value": 19 } ] } @@ -1626,7 +1626,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1673,7 +1673,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 5020d8a..0f43a05 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -529,7 +529,7 @@ data: }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)))))", "refId": "A", "instant": true, "format": "table" -- 2.47.2 From 54358df56956be8864076be16f5b622adfbfe138 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 02:28:39 -0300 Subject: [PATCH 649/684] monitoring: maintenance panels, extra alerts, update overview --- scripts/dashboards_render_atlas.py | 44 ++++ .../monitoring/dashboards/atlas-overview.json | 196 ++++++++++++++++++ .../monitoring/grafana-alerting-config.yaml | 94 +++++++++ .../grafana-dashboard-overview.yaml | 196 ++++++++++++++++++ 4 files changed, 530 insertions(+) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 8c5563b..5474298 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -1232,6 +1232,50 @@ def build_overview(): links=link_to("atlas-storage"), ) ) + panels.append( + stat_panel( + 30, + "Maintenance Sweepers Ready", + 'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100', + {"h": 6, "w": 8, "x": 0, "y": 80}, + unit="percent", + thresholds=PERCENT_THRESHOLDS, + ) + ) + panels.append( + stat_panel( + 31, + "Maintenance Cron Freshness (s)", + 'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})', + {"h": 6, "w": 8, "x": 8, "y": 80}, + unit="s", + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 3600}, + {"color": "red", "value": 10800}, + ], + }, + ) + ) + panels.append( + stat_panel( + 32, + "Postmark Bounce Rate (1d)", + 'POSTMARK_OUTBOUND_BOUNCE_RATE{window="1d"}', + {"h": 6, "w": 8, "x": 16, "y": 80}, + unit="percent", + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 2}, + {"color": "red", "value": 5}, + ], + }, + ) + ) return { "uid": "atlas-overview", diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 33bd27c..44403fb 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -2160,6 +2160,202 @@ } } ] + }, + { + "id": 30, + "type": "stat", + "title": "Maintenance Sweepers Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 80 + }, + "targets": [ + { + "expr": "kube_daemonset_status_number_ready{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} * 100", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 75 + }, + { + "color": "red", + "value": 91.5 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 31, + "type": "stat", + "title": "Maintenance Cron Freshness (s)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 80 + }, + "targets": [ + { + "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 3600 + }, + { + "color": "red", + "value": 10800 + } + ] + }, + "unit": "s", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 32, + "type": "stat", + "title": "Postmark Bounce Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 80 + }, + "targets": [ + { + "expr": "POSTMARK_OUTBOUND_BOUNCE_RATE{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 2 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } } ], "schemaVersion": 39, diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index ea30d33..474f490 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -78,6 +78,53 @@ data: summary: "{{ $labels.node }} rootfs >80% for 10m" labels: severity: warning + - uid: disk-growth-1h + title: "Node rootfs growing fast (>1Gi in 1h)" + condition: C + data: + - refId: A + relativeTimeRange: + from: 3600 + to: 0 + datasourceUid: atlas-vm + model: + intervalMs: 60000 + maxDataPoints: 43200 + expr: increase((node_filesystem_size_bytes{mountpoint="/",fstype!~"tmpfs|overlay"} - node_filesystem_free_bytes{mountpoint="/",fstype!~"tmpfs|overlay"})[1h]) / 1024 / 1024 / 1024 + legendFormat: '{{instance}}' + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [1] + type: gt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "{{ $labels.instance }} rootfs grew >1Gi in the last hour" + labels: + severity: warning - orgId: 1 name: maintenance folder: Alerts @@ -130,6 +177,53 @@ data: summary: "node-image-sweeper not fully ready" labels: severity: warning + - uid: maint-cron-stale + title: "Maintenance CronJobs stale (>3h since success)" + condition: C + data: + - refId: A + relativeTimeRange: + from: 0 + to: 0 + datasourceUid: atlas-vm + model: + expr: time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"}) + intervalMs: 60000 + maxDataPoints: 43200 + legendFormat: '{{cronjob}}' + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [10800] + type: gt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "Maintenance cronjob stale >3h since last success" + labels: + severity: warning - orgId: 1 name: postmark folder: Alerts diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index bad39c5..062310c 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -2169,6 +2169,202 @@ data: } } ] + }, + { + "id": 30, + "type": "stat", + "title": "Maintenance Sweepers Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 80 + }, + "targets": [ + { + "expr": "kube_daemonset_status_number_ready{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} * 100", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 75 + }, + { + "color": "red", + "value": 91.5 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 31, + "type": "stat", + "title": "Maintenance Cron Freshness (s)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 80 + }, + "targets": [ + { + "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 3600 + }, + { + "color": "red", + "value": 10800 + } + ] + }, + "unit": "s", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 32, + "type": "stat", + "title": "Postmark Bounce Rate (1d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 80 + }, + "targets": [ + { + "expr": "POSTMARK_OUTBOUND_BOUNCE_RATE{window=\"1d\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 2 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } } ], "schemaVersion": 39, -- 2.47.2 From cac8506929c41af942822d4ba972cfa79b6baaa9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 02:32:20 -0300 Subject: [PATCH 650/684] knowledge: add metis recovery notes --- knowledge/metis.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 knowledge/metis.md diff --git a/knowledge/metis.md b/knowledge/metis.md new file mode 100644 index 0000000..5b0d06b --- /dev/null +++ b/knowledge/metis.md @@ -0,0 +1,26 @@ +# Metis (node recovery) + +## Node classes (current map) +- rpi5 Ubuntu workers: titan-04,05,06,07,08,09,10,11,20,21 (Ubuntu 24.04.3, k3s agent) +- rpi5 control-plane: titan-0a/0b/0c (Ubuntu 24.04.1, k3s server, control-plane taint) +- rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks) +- rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent) +- amd64 agents: titan-22/24 (Debian 13, k3s agent) +- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21 (when added), plus any newcomers. + +## Longhorn disk UUIDs (critical nodes) +- titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4) +- titan-15: /mnt/astreae UUID=f3362f14-5822-449f-944b-ac570b5cd615 (ext4); /mnt/asteria UUID=9c5316e6-f847-4884-b502-11f2d0d15d6f (ext4) +- titan-17: /mnt/astreae UUID=1fecdade-08b0-49cb-9ae3-be6c188b0a96 (ext4); /mnt/asteria UUID=2fe9f613-d372-47ca-b84f-82084e4edda0 (ext4) +- titan-19: /mnt/astreae UUID=4890abb9-dda2-4f4f-9c0f-081ee82849cf (ext4); /mnt/asteria UUID=2b4ea28d-b0e6-4fa3-841b-cd7067ae9153 (ext4) + +## Metis repo (~/Development/metis) +- CLI skeleton in Go (`cmd/metis`), inventory loader (`pkg/inventory`), plan builder (`pkg/plan`). +- `inventory.example.yaml` shows expected schema (classes + per-node overlay, Longhorn disks, labels, taints). +- `AGENTS.md` in repo is untracked and holds raw notes. + +## Next implementation steps +- Add per-class golden image refs and checksums (Harbor or file://) when ready. +- Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths. +- Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection. +- Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited. -- 2.47.2 From c13b161171d78c7d3f4f3cf1dc95accdf210d32b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 08:59:51 -0300 Subject: [PATCH 651/684] knowledge: relocate metis doc; monitoring: add cpu high alert --- .gitignore | 2 + knowledge/software/metis.md | 26 ++++++++++ .../monitoring/grafana-alerting-config.yaml | 52 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 knowledge/software/metis.md diff --git a/.gitignore b/.gitignore index 2fcd3f0..8e09aa9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ !services/comms/knowledge/**/*.md __pycache__/ *.py[cod] +.pytest_cache +.venv diff --git a/knowledge/software/metis.md b/knowledge/software/metis.md new file mode 100644 index 0000000..5b0d06b --- /dev/null +++ b/knowledge/software/metis.md @@ -0,0 +1,26 @@ +# Metis (node recovery) + +## Node classes (current map) +- rpi5 Ubuntu workers: titan-04,05,06,07,08,09,10,11,20,21 (Ubuntu 24.04.3, k3s agent) +- rpi5 control-plane: titan-0a/0b/0c (Ubuntu 24.04.1, k3s server, control-plane taint) +- rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks) +- rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent) +- amd64 agents: titan-22/24 (Debian 13, k3s agent) +- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21 (when added), plus any newcomers. + +## Longhorn disk UUIDs (critical nodes) +- titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4) +- titan-15: /mnt/astreae UUID=f3362f14-5822-449f-944b-ac570b5cd615 (ext4); /mnt/asteria UUID=9c5316e6-f847-4884-b502-11f2d0d15d6f (ext4) +- titan-17: /mnt/astreae UUID=1fecdade-08b0-49cb-9ae3-be6c188b0a96 (ext4); /mnt/asteria UUID=2fe9f613-d372-47ca-b84f-82084e4edda0 (ext4) +- titan-19: /mnt/astreae UUID=4890abb9-dda2-4f4f-9c0f-081ee82849cf (ext4); /mnt/asteria UUID=2b4ea28d-b0e6-4fa3-841b-cd7067ae9153 (ext4) + +## Metis repo (~/Development/metis) +- CLI skeleton in Go (`cmd/metis`), inventory loader (`pkg/inventory`), plan builder (`pkg/plan`). +- `inventory.example.yaml` shows expected schema (classes + per-node overlay, Longhorn disks, labels, taints). +- `AGENTS.md` in repo is untracked and holds raw notes. + +## Next implementation steps +- Add per-class golden image refs and checksums (Harbor or file://) when ready. +- Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths. +- Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection. +- Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited. diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index 474f490..7800d8d 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -125,6 +125,58 @@ data: summary: "{{ $labels.instance }} rootfs grew >1Gi in the last hour" labels: severity: warning + - orgId: 1 + name: atlas-cpu + folder: Alerts + interval: 1m + rules: + - uid: cpu-high-10m + title: "Node CPU high (>90% for 10m)" + condition: C + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: atlas-vm + model: + intervalMs: 60000 + maxDataPoints: 43200 + expr: avg_over_time((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100)[10m:1m] + legendFormat: '{{instance}}' + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [90] + type: gt + operator: + type: and + reducer: + type: last + type: query + noDataState: NoData + execErrState: Error + annotations: + summary: "{{ $labels.instance }} CPU >90% for 10m" + labels: + severity: warning - orgId: 1 name: maintenance folder: Alerts -- 2.47.2 From c8a2e8caf4ef99470b62096d486905d8a20f2f2d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 09:44:40 -0300 Subject: [PATCH 652/684] knowledge: add jetson (titan-20/21) details --- knowledge/software/metis.md | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/knowledge/software/metis.md b/knowledge/software/metis.md index 5b0d06b..6f7f66e 100644 --- a/knowledge/software/metis.md +++ b/knowledge/software/metis.md @@ -6,7 +6,12 @@ - rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks) - rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent) - amd64 agents: titan-22/24 (Debian 13, k3s agent) -- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21 (when added), plus any newcomers. +- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, plus any newcomers. + +### Jetson nodes (titan-20/21) +- Ubuntu 20.04.6 (Focal), kernel 5.10.104-tegra, CRI containerd 2.0.5-k3s2, arch arm64. +- Storage: NVMe 232G at / (ext4); onboard mmc partitions present but root on NVMe; 1.9T sda present (unused). +- k3s agent with drop-in 99-nofile.conf. ## Longhorn disk UUIDs (critical nodes) - titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4) @@ -24,3 +29,27 @@ - Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths. - Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection. - Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited. + +## Node OS/Kernel/CRI snapshot (Jan 2026) +- titan-04: Ubuntu 24.04.3 LTS, kernel 6.8.0-1031-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-05: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-06: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-07: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-08: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-09: Ubuntu 24.04.3 LTS, kernel 6.8.0-1031-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-0a: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-0b: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-0c: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-10: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-11: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-12: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-13: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-14: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-15: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-17: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-18: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-19: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64 +- titan-20: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-21: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64 +- titan-22: Debian 13 (trixie), kernel 6.12.41+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64 +- titan-24: Debian 13 (trixie), kernel 6.12.57+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64 -- 2.47.2 From 0c0b328a1a2c34a4aa0b9ba46f5407e83a0192e6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 09:54:11 -0300 Subject: [PATCH 653/684] knowledge: add titan-db and titan-jh details --- knowledge/software/metis.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/knowledge/software/metis.md b/knowledge/software/metis.md index 6f7f66e..a6eaca0 100644 --- a/knowledge/software/metis.md +++ b/knowledge/software/metis.md @@ -53,3 +53,9 @@ - titan-21: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64 - titan-22: Debian 13 (trixie), kernel 6.12.41+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64 - titan-24: Debian 13 (trixie), kernel 6.12.57+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64 + + +### External hosts +- titan-db: Ubuntu 24.10, kernel 6.11.0-1015-raspi, root on /dev/sda2 ext4 (465G), boot vfat /dev/sda1; PostgreSQL service enabled. +- titan-jh: Arch Linux ARM (rolling), kernel 6.18.4-2-rpi, NVMe root ext4 238G (/), boot vfat 512M; ~495 packages installed (pacman -Q). +- titan-23/oceanus: TODO audit (future). -- 2.47.2 From 35dca130266f572286519ab83ed0e9a6197886d6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 10:06:35 -0300 Subject: [PATCH 654/684] knowledge: add control-plane storage details --- knowledge/software/metis.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/knowledge/software/metis.md b/knowledge/software/metis.md index a6eaca0..46ca773 100644 --- a/knowledge/software/metis.md +++ b/knowledge/software/metis.md @@ -59,3 +59,9 @@ - titan-db: Ubuntu 24.10, kernel 6.11.0-1015-raspi, root on /dev/sda2 ext4 (465G), boot vfat /dev/sda1; PostgreSQL service enabled. - titan-jh: Arch Linux ARM (rolling), kernel 6.18.4-2-rpi, NVMe root ext4 238G (/), boot vfat 512M; ~495 packages installed (pacman -Q). - titan-23/oceanus: TODO audit (future). + + +### Control plane Pis (titan-0a/0b/0c) +- Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, containerd 2.0.5-k3s2. +- Storage: 477G SSD root (/dev/sda2 ext4), /boot/firmware vfat (/dev/sda1). fstab uses LABEL=writable and LABEL=system-boot. +- k3s server (control-plane taint expected); etcd snapshots not yet cataloged (TODO). -- 2.47.2 From a8e6b575af37e44e3b2a656941464532c215b72a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 10:15:55 -0300 Subject: [PATCH 655/684] knowledge: record k3s versions across node classes --- knowledge/software/metis.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/knowledge/software/metis.md b/knowledge/software/metis.md index 46ca773..7ca3b39 100644 --- a/knowledge/software/metis.md +++ b/knowledge/software/metis.md @@ -65,3 +65,9 @@ - Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, containerd 2.0.5-k3s2. - Storage: 477G SSD root (/dev/sda2 ext4), /boot/firmware vfat (/dev/sda1). fstab uses LABEL=writable and LABEL=system-boot. - k3s server (control-plane taint expected); etcd snapshots not yet cataloged (TODO). + + +## k3s versions +- rpi5 workers/control-plane: k3s v1.33.3+k3s1 (crictl v1.31.0-k3s2) +- rpi4 nodes: k3s v1.31.5+k3s1 (crictl v1.31.0-k3s2) +- Jetson titan-20/21: k3s v1.33.3+k3s1 (per node info), crictl v1.31.0-k3s2 -- 2.47.2 From 3a798ae3b143bced2967c00834643f1430b6723f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 19:01:31 -0300 Subject: [PATCH 656/684] mailu: use postmark token for relay auth --- services/mailu/helmrelease.yaml | 2 +- services/monitoring/grafana-smtp-sync-script.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index 63b76bc..e675961 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -35,7 +35,7 @@ spec: externalRelay: host: "[smtp.postmarkapp.com]:587" existingSecret: mailu-postmark-relay - usernameKey: relay-username + usernameKey: relay-password passwordKey: relay-password timezone: Etc/UTC subnet: 10.42.0.0/16 diff --git a/services/monitoring/grafana-smtp-sync-script.yaml b/services/monitoring/grafana-smtp-sync-script.yaml index cccfd48..0a58a3c 100644 --- a/services/monitoring/grafana-smtp-sync-script.yaml +++ b/services/monitoring/grafana-smtp-sync-script.yaml @@ -20,8 +20,8 @@ data: kubectl -n "$SOURCE_NS" get secret "$SOURCE_SECRET" -o json > "$tmp" - user=$(jq -r '.data["relay-username"]' "$tmp") pass=$(jq -r '.data["relay-password"]' "$tmp") + user=$pass if [ -z "$user" ] || [ -z "$pass" ] || [ "$user" = "null" ] || [ "$pass" = "null" ]; then echo "missing credentials from $SOURCE_NS/$SOURCE_SECRET" >&2 -- 2.47.2 From fcc0a493699501bace78430ff27d24ff6ce6f53a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 23:46:24 -0300 Subject: [PATCH 657/684] monitoring: fix infra scopes and add jetson metrics --- .../flux-system/platform/kustomization.yaml | 1 + scripts/dashboards_render_atlas.py | 89 +++---- services/monitoring/dashboards/atlas-gpu.json | 28 +-- .../monitoring/dashboards/atlas-nodes.json | 2 +- .../monitoring/dashboards/atlas-overview.json | 234 ++---------------- .../monitoring/dashboards/atlas-pods.json | 2 +- .../monitoring/dashboards/atlas-storage.json | 132 ++++++++++ services/monitoring/dcgm-exporter.yaml | 11 +- .../monitoring/grafana-alerting-config.yaml | 9 +- .../monitoring/grafana-dashboard-gpu.yaml | 28 +-- .../monitoring/grafana-dashboard-nodes.yaml | 2 +- .../grafana-dashboard-overview.yaml | 234 ++---------------- .../monitoring/grafana-dashboard-pods.yaml | 2 +- .../monitoring/grafana-dashboard-storage.yaml | 132 ++++++++++ .../jetson-tegrastats-exporter.yaml | 168 +++++++++++++ services/monitoring/kustomization.yaml | 1 + 16 files changed, 559 insertions(+), 516 deletions(-) create mode 100644 services/monitoring/jetson-tegrastats-exporter.yaml diff --git a/clusters/atlas/flux-system/platform/kustomization.yaml b/clusters/atlas/flux-system/platform/kustomization.yaml index df226e2..7da2ca3 100644 --- a/clusters/atlas/flux-system/platform/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/kustomization.yaml @@ -9,5 +9,6 @@ resources: - gitops-ui/kustomization.yaml - monitoring/kustomization.yaml - logging/kustomization.yaml + - maintenance/kustomization.yaml - longhorn-ui/kustomization.yaml - ../platform/vault-csi/kustomization.yaml diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 5474298..7cbb386 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -84,7 +84,18 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES) WORKER_TOTAL = len(WORKER_NODES) CONTROL_SUFFIX = f"/{CONTROL_TOTAL}" WORKER_SUFFIX = f"/{WORKER_TOTAL}" -CP_ALLOWED_NS = "(^kube.*|.*-system$|^traefik$|^monitoring$)" +# Namespaces considered infrastructure (excluded from workload counts) +INFRA_NAMESPACES = [ + "kube-system", + "longhorn-system", + "metallb-system", + "monitoring", + "flux-system", + "traefik", +] +INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$" +# Namespaces allowed on control plane without counting as workloads +CP_ALLOWED_NS = INFRA_REGEX LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]" GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4] CONTROL_WORKLOADS_EXPR = ( @@ -300,9 +311,9 @@ STUCK_TABLE_EXPR = ( ")" ) -NAMESPACE_SCOPE_WORKLOAD = 'namespace!~"(^kube.*|.*-system$|^traefik$|^monitoring$)"' +NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"' NAMESPACE_SCOPE_ALL = 'namespace=~".*"' -NAMESPACE_SCOPE_INFRA = 'namespace=~"(^kube.*|.*-system$|^traefik$|^monitoring$)"' +NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"' NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"] GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) @@ -1232,51 +1243,6 @@ def build_overview(): links=link_to("atlas-storage"), ) ) - panels.append( - stat_panel( - 30, - "Maintenance Sweepers Ready", - 'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100', - {"h": 6, "w": 8, "x": 0, "y": 80}, - unit="percent", - thresholds=PERCENT_THRESHOLDS, - ) - ) - panels.append( - stat_panel( - 31, - "Maintenance Cron Freshness (s)", - 'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})', - {"h": 6, "w": 8, "x": 8, "y": 80}, - unit="s", - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 3600}, - {"color": "red", "value": 10800}, - ], - }, - ) - ) - panels.append( - stat_panel( - 32, - "Postmark Bounce Rate (1d)", - 'POSTMARK_OUTBOUND_BOUNCE_RATE{window="1d"}', - {"h": 6, "w": 8, "x": 16, "y": 80}, - unit="percent", - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 2}, - {"color": "red", "value": 5}, - ], - }, - ) - ) - return { "uid": "atlas-overview", "title": "Atlas Overview", @@ -1743,6 +1709,33 @@ def build_storage_dashboard(): time_from="90d", ) ) + panels.append( + stat_panel( + 30, + "Maintenance Sweepers Ready", + 'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100', + {"h": 4, "w": 12, "x": 0, "y": 44}, + unit="percent", + thresholds=PERCENT_THRESHOLDS, + ) + ) + panels.append( + stat_panel( + 31, + "Maintenance Cron Freshness (s)", + 'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})', + {"h": 4, "w": 12, "x": 12, "y": 44}, + unit="s", + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 3600}, + {"color": "red", "value": 10800}, + ], + }, + ) + ) return { "uid": "atlas-storage", "title": "Atlas Storage", diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index 2e71045..9460177 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -57,7 +57,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -67,7 +67,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -207,16 +207,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -226,7 +226,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -241,16 +241,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -260,7 +260,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -275,16 +275,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -294,7 +294,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index 256bc18..499e14e 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -142,7 +142,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 44403fb..bef23e2 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -76,7 +76,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"}) or on() vector(0)", "refId": "A" } ], @@ -1447,7 +1447,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1457,7 +1457,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1516,7 +1516,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1526,7 +1526,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1585,7 +1585,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", "targetBlank": false }, { @@ -1595,7 +1595,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", "targetBlank": false } ], @@ -2160,202 +2160,6 @@ } } ] - }, - { - "id": 30, - "type": "stat", - "title": "Maintenance Sweepers Ready", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 80 - }, - "targets": [ - { - "expr": "kube_daemonset_status_number_ready{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} * 100", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 50 - }, - { - "color": "orange", - "value": 75 - }, - { - "color": "red", - "value": 91.5 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 31, - "type": "stat", - "title": "Maintenance Cron Freshness (s)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 80 - }, - "targets": [ - { - "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 3600 - }, - { - "color": "red", - "value": 10800 - } - ] - }, - "unit": "s", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 32, - "type": "stat", - "title": "Postmark Bounce Rate (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 80 - }, - "targets": [ - { - "expr": "POSTMARK_OUTBOUND_BOUNCE_RATE{window=\"1d\"}", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 2 - }, - { - "color": "red", - "value": 5 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } } ], "schemaVersion": 39, @@ -2370,16 +2174,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -2389,7 +2193,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -2404,16 +2208,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -2423,7 +2227,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -2438,16 +2242,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -2457,7 +2261,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index 68429ec..a0f9e1c 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -200,7 +200,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-storage.json b/services/monitoring/dashboards/atlas-storage.json index 2e548b2..d93a941 100644 --- a/services/monitoring/dashboards/atlas-storage.json +++ b/services/monitoring/dashboards/atlas-storage.json @@ -409,6 +409,138 @@ } }, "timeFrom": "90d" + }, + { + "id": 30, + "type": "stat", + "title": "Maintenance Sweepers Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 44 + }, + "targets": [ + { + "expr": "kube_daemonset_status_number_ready{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} * 100", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 75 + }, + { + "color": "red", + "value": 91.5 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 31, + "type": "stat", + "title": "Maintenance Cron Freshness (s)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 44 + }, + "targets": [ + { + "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 3600 + }, + { + "color": "red", + "value": 10800 + } + ] + }, + "unit": "s", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } } ], "time": { diff --git a/services/monitoring/dcgm-exporter.yaml b/services/monitoring/dcgm-exporter.yaml index cd37b7b..7627420 100644 --- a/services/monitoring/dcgm-exporter.yaml +++ b/services/monitoring/dcgm-exporter.yaml @@ -28,13 +28,14 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: kubernetes.io/hostname + - key: kubernetes.io/arch operator: In values: - - titan-20 - - titan-21 - - titan-22 - - titan-24 + - amd64 + - key: jetson + operator: NotIn + values: + - "true" tolerations: - operator: Exists containers: diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index 7800d8d..c679bff 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -34,6 +34,7 @@ data: - uid: disk-pressure-root title: "Node rootfs high (>80%)" condition: C + for: "10m" data: - refId: A relativeTimeRange: @@ -81,6 +82,7 @@ data: - uid: disk-growth-1h title: "Node rootfs growing fast (>1Gi in 1h)" condition: C + for: "10m" data: - refId: A relativeTimeRange: @@ -133,6 +135,7 @@ data: - uid: cpu-high-10m title: "Node CPU high (>90% for 10m)" condition: C + for: 10m data: - refId: A relativeTimeRange: @@ -185,6 +188,7 @@ data: - uid: maint-sweeper title: "Maintenance sweeper not ready" condition: C + for: "5m" data: - refId: A relativeTimeRange: @@ -232,10 +236,11 @@ data: - uid: maint-cron-stale title: "Maintenance CronJobs stale (>3h since success)" condition: C + for: "5m" data: - refId: A relativeTimeRange: - from: 0 + from: 300 to: 0 datasourceUid: atlas-vm model: @@ -284,6 +289,7 @@ data: - uid: postmark-bounce title: "Postmark bounce rate high" condition: C + for: "10m" data: - refId: A relativeTimeRange: @@ -331,6 +337,7 @@ data: - uid: postmark-api-down title: "Postmark exporter down" condition: C + for: "5m" data: - refId: A relativeTimeRange: diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 56965eb..3f7bbec 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -66,7 +66,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -76,7 +76,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -216,16 +216,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -235,7 +235,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -250,16 +250,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -269,7 +269,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -284,16 +284,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -303,7 +303,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 368f80f..42d2c3f 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -151,7 +151,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 062310c..d89255c 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -85,7 +85,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"}) or on() vector(0)", "refId": "A" } ], @@ -1456,7 +1456,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1466,7 +1466,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1525,7 +1525,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1535,7 +1535,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1594,7 +1594,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", "targetBlank": false }, { @@ -1604,7 +1604,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", "targetBlank": false } ], @@ -2169,202 +2169,6 @@ data: } } ] - }, - { - "id": 30, - "type": "stat", - "title": "Maintenance Sweepers Ready", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 80 - }, - "targets": [ - { - "expr": "kube_daemonset_status_number_ready{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} * 100", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 50 - }, - { - "color": "orange", - "value": 75 - }, - { - "color": "red", - "value": 91.5 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 31, - "type": "stat", - "title": "Maintenance Cron Freshness (s)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 80 - }, - "targets": [ - { - "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 3600 - }, - { - "color": "red", - "value": 10800 - } - ] - }, - "unit": "s", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 32, - "type": "stat", - "title": "Postmark Bounce Rate (1d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 80 - }, - "targets": [ - { - "expr": "POSTMARK_OUTBOUND_BOUNCE_RATE{window=\"1d\"}", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 2 - }, - { - "color": "red", - "value": 5 - } - ] - }, - "unit": "percent", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } } ], "schemaVersion": 39, @@ -2379,16 +2183,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -2398,7 +2202,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -2413,16 +2217,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -2432,7 +2236,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], @@ -2447,16 +2251,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": true }, { @@ -2466,7 +2270,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 0f43a05..1ca5afb 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -209,7 +209,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-storage.yaml b/services/monitoring/grafana-dashboard-storage.yaml index 8aef820..5ce4186 100644 --- a/services/monitoring/grafana-dashboard-storage.yaml +++ b/services/monitoring/grafana-dashboard-storage.yaml @@ -418,6 +418,138 @@ data: } }, "timeFrom": "90d" + }, + { + "id": 30, + "type": "stat", + "title": "Maintenance Sweepers Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 44 + }, + "targets": [ + { + "expr": "kube_daemonset_status_number_ready{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace=\"maintenance\",daemonset=\"node-image-sweeper\"} * 100", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 75 + }, + { + "color": "red", + "value": 91.5 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 31, + "type": "stat", + "title": "Maintenance Cron Freshness (s)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 44 + }, + "targets": [ + { + "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 3600 + }, + { + "color": "red", + "value": 10800 + } + ] + }, + "unit": "s", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } } ], "time": { diff --git a/services/monitoring/jetson-tegrastats-exporter.yaml b/services/monitoring/jetson-tegrastats-exporter.yaml new file mode 100644 index 0000000..32a4455 --- /dev/null +++ b/services/monitoring/jetson-tegrastats-exporter.yaml @@ -0,0 +1,168 @@ +# services/monitoring/jetson-tegrastats-exporter.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: jetson-tegrastats-exporter + namespace: monitoring + labels: + app: jetson-tegrastats-exporter +spec: + selector: + matchLabels: + app: jetson-tegrastats-exporter + template: + metadata: + labels: + app: jetson-tegrastats-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + spec: + serviceAccountName: default + hostPID: true + tolerations: + - operator: Exists + nodeSelector: + jetson: "true" + containers: + - name: exporter + # Exposes tegrastats output as Prometheus metrics for Jetson devices. + image: python:3.10-slim + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + ports: + - name: metrics + containerPort: 9100 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 256Mi + env: + - name: JETSON_EXPORTER_PORT + value: "9100" + volumeMounts: + - name: script + mountPath: /etc/tegrastats-exporter + readOnly: true + - name: tegrastats-bin + mountPath: /host/usr/bin/tegrastats + readOnly: true + command: + - python + - /etc/tegrastats-exporter/exporter.py + volumes: + - name: script + configMap: + name: jetson-tegrastats-exporter-script + defaultMode: 0555 + - name: tegrastats-bin + hostPath: + path: /usr/bin/tegrastats + type: File +--- +apiVersion: v1 +kind: Service +metadata: + name: jetson-tegrastats-exporter + namespace: monitoring + labels: + app: jetson-tegrastats-exporter +spec: + selector: + app: jetson-tegrastats-exporter + ports: + - name: metrics + port: 9100 + targetPort: metrics +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: jetson-tegrastats-exporter-script + namespace: monitoring +data: + exporter.py: | + import http.server + import os + import re + import socketserver + import subprocess + import threading + from time import time + + PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100")) + METRICS = { + "gr3d_freq_percent": 0.0, + "gpu_temp_c": 0.0, + "cpu_temp_c": 0.0, + "ram_used_mb": 0.0, + "ram_total_mb": 0.0, + "power_5v_in_mw": 0.0, + "last_scrape_ts": 0.0, + } + LOCK = threading.Lock() + + def parse_line(line: str): + updates = {} + m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line) + if m: + updates["gr3d_freq_percent"] = float(m.group(1)) + m = re.search(r"GPU@(\\d+(?:\\.\\d+)?)C", line) + if m: + updates["gpu_temp_c"] = float(m.group(1)) + m = re.search(r"CPU@(\\d+(?:\\.\\d+)?)C", line) + if m: + updates["cpu_temp_c"] = float(m.group(1)) + m = re.search(r"RAM\\s+(\\d+)/(\\d+)MB", line) + if m: + updates["ram_used_mb"] = float(m.group(1)) + updates["ram_total_mb"] = float(m.group(2)) + m = re.search(r"POM_5V_IN\\s+(\\d+)/(\\d+)", line) + if m: + updates["power_5v_in_mw"] = float(m.group(1)) + with LOCK: + METRICS.update(updates) + METRICS["last_scrape_ts"] = time() + + def run_tegrastats(): + proc = subprocess.Popen( + ["/host/usr/bin/tegrastats", "--interval", "1000"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + for line in proc.stdout: + parse_line(line) + + class Handler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path != "/metrics": + self.send_response(404) + self.end_headers() + return + with LOCK: + metrics = METRICS.copy() + out = [] + for k, v in metrics.items(): + out.append(f"# TYPE jetson_{k} gauge") + out.append(f"jetson_{k} {v}") + body = "\\n".join(out) + "\\n" + self.send_response(200) + self.send_header("Content-Type", "text/plain; version=0.0.4") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body.encode("utf-8")) + + def log_message(self, fmt, *args): + return + + if __name__ == "__main__": + t = threading.Thread(target=run_tegrastats, daemon=True) + t.start() + with socketserver.TCPServer(("", PORT), Handler) as httpd: + httpd.serve_forever() diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index 7198ffc..9f0e8ca 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -14,6 +14,7 @@ resources: - grafana-dashboard-gpu.yaml - grafana-dashboard-mail.yaml - dcgm-exporter.yaml + - jetson-tegrastats-exporter.yaml - postmark-exporter-service.yaml - postmark-exporter-deployment.yaml - grafana-alerting-config.yaml -- 2.47.2 From 21b9129abf024ca470afa3624bd312684ff4fa6f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 23:52:40 -0300 Subject: [PATCH 658/684] monitoring: classify logging/postgres/maintenance as infra --- scripts/dashboards_render_atlas.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 7cbb386..95397a1 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -90,8 +90,11 @@ INFRA_NAMESPACES = [ "longhorn-system", "metallb-system", "monitoring", + "logging", "flux-system", "traefik", + "maintenance", + "postgres", ] INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$" # Namespaces allowed on control plane without counting as workloads -- 2.47.2 From fb2c7b22d5238a18b81c2b6e522b0219af2e5bc3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 11 Jan 2026 23:55:43 -0300 Subject: [PATCH 659/684] monitoring: regenerate dashboards with expanded infra namespaces --- services/monitoring/dashboards/atlas-gpu.json | 28 +++++++------- .../monitoring/dashboards/atlas-nodes.json | 2 +- .../monitoring/dashboards/atlas-overview.json | 38 +++++++++---------- .../monitoring/dashboards/atlas-pods.json | 2 +- .../monitoring/grafana-dashboard-gpu.yaml | 28 +++++++------- .../monitoring/grafana-dashboard-nodes.yaml | 2 +- .../grafana-dashboard-overview.yaml | 38 +++++++++---------- .../monitoring/grafana-dashboard-pods.yaml | 2 +- 8 files changed, 70 insertions(+), 70 deletions(-) diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index 9460177..095b329 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -57,7 +57,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -67,7 +67,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -207,16 +207,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -226,7 +226,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -241,16 +241,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -260,7 +260,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -275,16 +275,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -294,7 +294,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index 499e14e..735a88a 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -142,7 +142,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index bef23e2..ceb1c50 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -76,7 +76,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", "refId": "A" } ], @@ -1447,7 +1447,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1457,7 +1457,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1516,7 +1516,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1526,7 +1526,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1585,7 +1585,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false }, { @@ -1595,7 +1595,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false } ], @@ -2174,16 +2174,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2193,7 +2193,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2208,16 +2208,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2227,7 +2227,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2242,16 +2242,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2261,7 +2261,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index a0f9e1c..456780a 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -200,7 +200,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 3f7bbec..74578ff 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -66,7 +66,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -76,7 +76,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -216,16 +216,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -235,7 +235,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -250,16 +250,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -269,7 +269,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -284,16 +284,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -303,7 +303,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 42d2c3f..5234004 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -151,7 +151,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index d89255c..ff4bcc0 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -85,7 +85,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", "refId": "A" } ], @@ -1456,7 +1456,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1466,7 +1466,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1525,7 +1525,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1535,7 +1535,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1594,7 +1594,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false }, { @@ -1604,7 +1604,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Cflux-system%7Ctraefik%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false } ], @@ -2183,16 +2183,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2202,7 +2202,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2217,16 +2217,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2236,7 +2236,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2251,16 +2251,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2270,7 +2270,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 1ca5afb..24b3277 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -209,7 +209,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|flux-system|traefik)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], -- 2.47.2 From 13df82e07aab53c5297a7b10a137b4a7f1c71ab6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 00:26:46 -0300 Subject: [PATCH 660/684] monitoring: treat cert-manager as infrastructure --- scripts/dashboards_render_atlas.py | 1 + services/monitoring/dashboards/atlas-gpu.json | 28 +++++++------- .../monitoring/dashboards/atlas-nodes.json | 2 +- .../monitoring/dashboards/atlas-overview.json | 38 +++++++++---------- .../monitoring/dashboards/atlas-pods.json | 2 +- .../monitoring/grafana-dashboard-gpu.yaml | 28 +++++++------- .../monitoring/grafana-dashboard-nodes.yaml | 2 +- .../grafana-dashboard-overview.yaml | 38 +++++++++---------- .../monitoring/grafana-dashboard-pods.yaml | 2 +- 9 files changed, 71 insertions(+), 70 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 95397a1..01fe9c7 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -91,6 +91,7 @@ INFRA_NAMESPACES = [ "metallb-system", "monitoring", "logging", + "cert-manager", "flux-system", "traefik", "maintenance", diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index 095b329..fb1b216 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -57,7 +57,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -67,7 +67,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -207,16 +207,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -226,7 +226,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -241,16 +241,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -260,7 +260,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -275,16 +275,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -294,7 +294,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index 735a88a..0bfd639 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -142,7 +142,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index ceb1c50..a113d22 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -76,7 +76,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", "refId": "A" } ], @@ -1447,7 +1447,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1457,7 +1457,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1516,7 +1516,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1526,7 +1526,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1585,7 +1585,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false }, { @@ -1595,7 +1595,7 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false } ], @@ -2174,16 +2174,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2193,7 +2193,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2208,16 +2208,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2227,7 +2227,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2242,16 +2242,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2261,7 +2261,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index 456780a..ff2dbdd 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -200,7 +200,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 74578ff..49b5d39 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -66,7 +66,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -76,7 +76,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -216,16 +216,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -235,7 +235,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -250,16 +250,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -269,7 +269,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -284,16 +284,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -303,7 +303,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 5234004..5e02c18 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -151,7 +151,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index ff4bcc0..e627658 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -85,7 +85,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", "refId": "A" } ], @@ -1456,7 +1456,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1466,7 +1466,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1525,7 +1525,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1535,7 +1535,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], @@ -1594,7 +1594,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false }, { @@ -1604,7 +1604,7 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false } ], @@ -2183,16 +2183,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2202,7 +2202,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2217,16 +2217,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2236,7 +2236,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], @@ -2251,16 +2251,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": true }, { @@ -2270,7 +2270,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 24b3277..5ea8343 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -209,7 +209,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", "refId": "A" } ], -- 2.47.2 From 5f4d9b498ef2768d1af42e59f061f94c46f66b6d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 00:46:16 -0300 Subject: [PATCH 661/684] chore: remove ci-demo workload --- .../ci-demo/image-automation.yaml | 26 ---------------- .../applications/ci-demo/kustomization.yaml | 17 ---------- .../applications/kustomization.yaml | 2 -- knowledge/catalog/atlas.yaml | 13 -------- services/ci-demo/deployment.yaml | 31 ------------------- services/ci-demo/image.yaml | 24 -------------- services/ci-demo/kustomization.yaml | 11 ------- services/ci-demo/namespace.yaml | 6 ---- services/ci-demo/service.yaml | 14 --------- 9 files changed, 144 deletions(-) delete mode 100644 clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml delete mode 100644 clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml delete mode 100644 services/ci-demo/deployment.yaml delete mode 100644 services/ci-demo/image.yaml delete mode 100644 services/ci-demo/kustomization.yaml delete mode 100644 services/ci-demo/namespace.yaml delete mode 100644 services/ci-demo/service.yaml diff --git a/clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml b/clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml deleted file mode 100644 index dd3e85e..0000000 --- a/clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml -apiVersion: image.toolkit.fluxcd.io/v1 -kind: ImageUpdateAutomation -metadata: - name: ci-demo - namespace: flux-system -spec: - interval: 1m0s - sourceRef: - kind: GitRepository - name: flux-system - namespace: flux-system - git: - checkout: - ref: - branch: feature/ci-gitops - commit: - author: - email: ops@bstein.dev - name: flux-bot - messageTemplate: "chore(ci-demo): apply image updates" - push: - branch: feature/ci-gitops - update: - strategy: Setters - path: services/ci-demo diff --git a/clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml b/clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml deleted file mode 100644 index 09f598d..0000000 --- a/clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml -apiVersion: kustomize.toolkit.fluxcd.io/v1 -kind: Kustomization -metadata: - name: ci-demo - namespace: flux-system -spec: - interval: 10m - path: ./services/ci-demo - prune: true - sourceRef: - kind: GitRepository - name: flux-system - namespace: flux-system - dependsOn: - - name: core - wait: false diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 4c9fb58..5876064 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -22,8 +22,6 @@ resources: - oauth2-proxy/kustomization.yaml - mailu/kustomization.yaml - jenkins/kustomization.yaml - - ci-demo/kustomization.yaml - - ci-demo/image-automation.yaml - ai-llm/kustomization.yaml - nextcloud/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml diff --git a/knowledge/catalog/atlas.yaml b/knowledge/catalog/atlas.yaml index 06e2469..d628b7b 100644 --- a/knowledge/catalog/atlas.yaml +++ b/knowledge/catalog/atlas.yaml @@ -7,9 +7,6 @@ sources: - name: bstein-dev-home path: services/bstein-dev-home targetNamespace: bstein-dev-home -- name: ci-demo - path: services/ci-demo - targetNamespace: null - name: communication path: services/comms targetNamespace: comms @@ -134,16 +131,6 @@ workloads: node-role.kubernetes.io/worker: 'true' images: - python:3.11-slim -- kind: Deployment - namespace: ci-demo - name: ci-demo - labels: - app.kubernetes.io/name: ci-demo - serviceAccountName: null - nodeSelector: - hardware: rpi4 - images: - - registry.bstein.dev/infra/ci-demo:v0.0.0-3 - kind: Deployment namespace: comms name: atlasbot diff --git a/services/ci-demo/deployment.yaml b/services/ci-demo/deployment.yaml deleted file mode 100644 index df882f5..0000000 --- a/services/ci-demo/deployment.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# services/ci-demo/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ci-demo - namespace: ci-demo -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: ci-demo - template: - metadata: - labels: - app.kubernetes.io/name: ci-demo - spec: - nodeSelector: - hardware: rpi4 - containers: - - name: ci-demo - image: registry.bstein.dev/infra/ci-demo:latest - ports: - - name: http - containerPort: 8080 - readinessProbe: - httpGet: - path: / - port: http - initialDelaySeconds: 2 - periodSeconds: 5 - diff --git a/services/ci-demo/image.yaml b/services/ci-demo/image.yaml deleted file mode 100644 index 333fa0a..0000000 --- a/services/ci-demo/image.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# services/ci-demo/image.yaml -apiVersion: image.toolkit.fluxcd.io/v1 -kind: ImageRepository -metadata: - name: ci-demo - namespace: flux-system -spec: - image: registry.bstein.dev/infra/ci-demo - interval: 1m0s ---- -apiVersion: image.toolkit.fluxcd.io/v1 -kind: ImagePolicy -metadata: - name: ci-demo - namespace: flux-system -spec: - imageRepositoryRef: - name: ci-demo - filterTags: - pattern: '^v(?P0\.0\.0-\d+)$' - extract: '$version' - policy: - semver: - range: ">=0.0.0-0" diff --git a/services/ci-demo/kustomization.yaml b/services/ci-demo/kustomization.yaml deleted file mode 100644 index 3d68ef1..0000000 --- a/services/ci-demo/kustomization.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# services/ci-demo/kustomization.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - namespace.yaml - - image.yaml - - deployment.yaml - - service.yaml -images: - - name: registry.bstein.dev/infra/ci-demo - newTag: v0.0.0-3 # {"$imagepolicy": "flux-system:ci-demo"} diff --git a/services/ci-demo/namespace.yaml b/services/ci-demo/namespace.yaml deleted file mode 100644 index e661fc1..0000000 --- a/services/ci-demo/namespace.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# services/ci-demo/namespace.yaml -apiVersion: v1 -kind: Namespace -metadata: - name: ci-demo - diff --git a/services/ci-demo/service.yaml b/services/ci-demo/service.yaml deleted file mode 100644 index c094387..0000000 --- a/services/ci-demo/service.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# services/ci-demo/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: ci-demo - namespace: ci-demo -spec: - selector: - app.kubernetes.io/name: ci-demo - ports: - - name: http - port: 80 - targetPort: http - -- 2.47.2 From 29192b9e7f6a73bfe19d34baec892c30a5d3d522 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 21:22:54 -0300 Subject: [PATCH 662/684] services: add minio, outline, planka --- .../applications/kustomization.yaml | 3 + .../applications/minio/kustomization.yaml | 24 ++++ .../applications/outline/kustomization.yaml | 29 +++++ .../applications/planka/kustomization.yaml | 28 +++++ services/minio/bucket-job.yaml | 50 ++++++++ services/minio/deployment.yaml | 68 +++++++++++ services/minio/kustomization.yaml | 10 ++ services/minio/namespace.yaml | 5 + services/minio/pvc.yaml | 12 ++ services/minio/service.yaml | 18 +++ services/outline/app-pvc.yaml | 12 ++ services/outline/deployment.yaml | 111 ++++++++++++++++++ services/outline/ingress.yaml | 26 ++++ services/outline/kustomization.yaml | 12 ++ services/outline/namespace.yaml | 5 + services/outline/redis-deployment.yaml | 47 ++++++++ services/outline/redis-service.yaml | 15 +++ services/outline/service.yaml | 15 +++ services/planka/app-pvc.yaml | 12 ++ services/planka/deployment.yaml | 95 +++++++++++++++ services/planka/ingress.yaml | 26 ++++ services/planka/kustomization.yaml | 11 ++ services/planka/namespace.yaml | 5 + services/planka/service.yaml | 15 +++ services/planka/user-data-pvc.yaml | 12 ++ 25 files changed, 666 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/minio/kustomization.yaml create mode 100644 clusters/atlas/flux-system/applications/outline/kustomization.yaml create mode 100644 clusters/atlas/flux-system/applications/planka/kustomization.yaml create mode 100644 services/minio/bucket-job.yaml create mode 100644 services/minio/deployment.yaml create mode 100644 services/minio/kustomization.yaml create mode 100644 services/minio/namespace.yaml create mode 100644 services/minio/pvc.yaml create mode 100644 services/minio/service.yaml create mode 100644 services/outline/app-pvc.yaml create mode 100644 services/outline/deployment.yaml create mode 100644 services/outline/ingress.yaml create mode 100644 services/outline/kustomization.yaml create mode 100644 services/outline/namespace.yaml create mode 100644 services/outline/redis-deployment.yaml create mode 100644 services/outline/redis-service.yaml create mode 100644 services/outline/service.yaml create mode 100644 services/planka/app-pvc.yaml create mode 100644 services/planka/deployment.yaml create mode 100644 services/planka/ingress.yaml create mode 100644 services/planka/kustomization.yaml create mode 100644 services/planka/namespace.yaml create mode 100644 services/planka/service.yaml create mode 100644 services/planka/user-data-pvc.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 5876064..cc98f6b 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -25,3 +25,6 @@ resources: - ai-llm/kustomization.yaml - nextcloud/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml + - minio/kustomization.yaml + - outline/kustomization.yaml + - planka/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/minio/kustomization.yaml b/clusters/atlas/flux-system/applications/minio/kustomization.yaml new file mode 100644 index 0000000..e776a3e --- /dev/null +++ b/clusters/atlas/flux-system/applications/minio/kustomization.yaml @@ -0,0 +1,24 @@ +# clusters/atlas/flux-system/applications/minio/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: minio + namespace: flux-system +spec: + interval: 10m + path: ./services/minio + prune: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: minio + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: minio + namespace: minio + - apiVersion: v1 + kind: Service + name: minio + namespace: minio + wait: false diff --git a/clusters/atlas/flux-system/applications/outline/kustomization.yaml b/clusters/atlas/flux-system/applications/outline/kustomization.yaml new file mode 100644 index 0000000..e01449b --- /dev/null +++ b/clusters/atlas/flux-system/applications/outline/kustomization.yaml @@ -0,0 +1,29 @@ +# clusters/atlas/flux-system/applications/outline/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: outline + namespace: flux-system +spec: + interval: 10m + path: ./services/outline + prune: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: outline + dependsOn: + - name: keycloak + - name: mailu + - name: minio + - name: traefik + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: outline + namespace: outline + - apiVersion: v1 + kind: Service + name: outline + namespace: outline + wait: false diff --git a/clusters/atlas/flux-system/applications/planka/kustomization.yaml b/clusters/atlas/flux-system/applications/planka/kustomization.yaml new file mode 100644 index 0000000..5219a5d --- /dev/null +++ b/clusters/atlas/flux-system/applications/planka/kustomization.yaml @@ -0,0 +1,28 @@ +# clusters/atlas/flux-system/applications/planka/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: planka + namespace: flux-system +spec: + interval: 10m + path: ./services/planka + prune: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: planka + dependsOn: + - name: keycloak + - name: mailu + - name: traefik + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: planka + namespace: planka + - apiVersion: v1 + kind: Service + name: planka + namespace: planka + wait: false diff --git a/services/minio/bucket-job.yaml b/services/minio/bucket-job.yaml new file mode 100644 index 0000000..a17193d --- /dev/null +++ b/services/minio/bucket-job.yaml @@ -0,0 +1,50 @@ +# services/minio/bucket-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: minio-bucket-bootstrap-1 + namespace: minio +spec: + backoffLimit: 1 + ttlSecondsAfterFinished: 3600 + template: + spec: + restartPolicy: Never + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi4", "rpi5"] + containers: + - name: mc + image: minio/mc:RELEASE.2025-08-13T08-35-41Z + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + mc alias set local http://minio.minio.svc.cluster.local:9000 "${MINIO_ROOT_USER}" "${MINIO_ROOT_PASSWORD}" + mc mb -p local/outline || true + mc mb -p local/planka || true + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: minio-credentials + key: rootUser + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: minio-credentials + key: rootPassword + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi diff --git a/services/minio/deployment.yaml b/services/minio/deployment.yaml new file mode 100644 index 0000000..4b6d72a --- /dev/null +++ b/services/minio/deployment.yaml @@ -0,0 +1,68 @@ +# services/minio/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: minio + namespace: minio + labels: + app: minio +spec: + replicas: 1 + selector: + matchLabels: + app: minio + strategy: + type: Recreate + template: + metadata: + labels: + app: minio + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi4", "rpi5"] + containers: + - name: minio + image: minio/minio:RELEASE.2025-09-07T16-13-09Z + args: + - server + - /data + - --console-address + - ":9001" + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: minio-credentials + key: rootUser + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: minio-credentials + key: rootPassword + ports: + - name: api + containerPort: 9000 + - name: console + containerPort: 9001 + volumeMounts: + - name: data + mountPath: /data + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "1" + memory: 2Gi + volumes: + - name: data + persistentVolumeClaim: + claimName: minio-data diff --git a/services/minio/kustomization.yaml b/services/minio/kustomization.yaml new file mode 100644 index 0000000..0565e31 --- /dev/null +++ b/services/minio/kustomization.yaml @@ -0,0 +1,10 @@ +# services/minio/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: minio +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - bucket-job.yaml + - service.yaml diff --git a/services/minio/namespace.yaml b/services/minio/namespace.yaml new file mode 100644 index 0000000..3465488 --- /dev/null +++ b/services/minio/namespace.yaml @@ -0,0 +1,5 @@ +# services/minio/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: minio diff --git a/services/minio/pvc.yaml b/services/minio/pvc.yaml new file mode 100644 index 0000000..5c3828b --- /dev/null +++ b/services/minio/pvc.yaml @@ -0,0 +1,12 @@ +# services/minio/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: minio-data + namespace: minio +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: asteria + resources: + requests: + storage: 100Gi diff --git a/services/minio/service.yaml b/services/minio/service.yaml new file mode 100644 index 0000000..d2edec5 --- /dev/null +++ b/services/minio/service.yaml @@ -0,0 +1,18 @@ +# services/minio/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: minio + namespace: minio + labels: + app: minio +spec: + selector: + app: minio + ports: + - name: api + port: 9000 + targetPort: api + - name: console + port: 9001 + targetPort: console diff --git a/services/outline/app-pvc.yaml b/services/outline/app-pvc.yaml new file mode 100644 index 0000000..8ac702c --- /dev/null +++ b/services/outline/app-pvc.yaml @@ -0,0 +1,12 @@ +# services/outline/app-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: outline-app + namespace: outline +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: astreae + resources: + requests: + storage: 5Gi diff --git a/services/outline/deployment.yaml b/services/outline/deployment.yaml new file mode 100644 index 0000000..74f3f35 --- /dev/null +++ b/services/outline/deployment.yaml @@ -0,0 +1,111 @@ +# services/outline/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: outline + namespace: outline + labels: + app: outline +spec: + replicas: 1 + selector: + matchLabels: + app: outline + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + app: outline + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi4", "rpi5"] + containers: + - name: outline + image: outlinewiki/outline:1.2.0 + ports: + - name: http + containerPort: 3000 + env: + - name: NODE_ENV + value: production + - name: URL + value: https://notes.bstein.dev + - name: PORT + value: "3000" + - name: REDIS_URL + value: redis://outline-redis:6379 + - name: FILE_STORAGE + value: s3 + - name: AWS_REGION + value: us-east-1 + - name: AWS_S3_FORCE_PATH_STYLE + value: "true" + - name: AWS_S3_ACL + value: private + - name: FORCE_HTTPS + value: "true" + - name: OIDC_ENFORCED + value: "true" + - name: OIDC_SCOPES + value: openid profile email + - name: OIDC_USERNAME_CLAIM + value: preferred_username + - name: OIDC_DISPLAY_NAME + value: Atlas SSO + - name: SMTP_SECURE + value: "false" + - name: SMTP_PORT + value: "25" + envFrom: + - secretRef: + name: outline-db + - secretRef: + name: outline-secrets + - secretRef: + name: outline-oidc + - secretRef: + name: outline-s3 + - secretRef: + name: outline-smtp + volumeMounts: + - name: app-data + mountPath: /var/lib/outline + readinessProbe: + httpGet: + path: /_health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /_health + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 3 + failureThreshold: 6 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "1" + memory: 2Gi + volumes: + - name: app-data + persistentVolumeClaim: + claimName: outline-app diff --git a/services/outline/ingress.yaml b/services/outline/ingress.yaml new file mode 100644 index 0000000..735baae --- /dev/null +++ b/services/outline/ingress.yaml @@ -0,0 +1,26 @@ +# services/outline/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: outline + namespace: outline + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: ["notes.bstein.dev"] + secretName: outline-tls + rules: + - host: notes.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: outline + port: + number: 80 diff --git a/services/outline/kustomization.yaml b/services/outline/kustomization.yaml new file mode 100644 index 0000000..941cb91 --- /dev/null +++ b/services/outline/kustomization.yaml @@ -0,0 +1,12 @@ +# services/outline/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: outline +resources: + - namespace.yaml + - app-pvc.yaml + - redis-deployment.yaml + - redis-service.yaml + - deployment.yaml + - service.yaml + - ingress.yaml diff --git a/services/outline/namespace.yaml b/services/outline/namespace.yaml new file mode 100644 index 0000000..4172c02 --- /dev/null +++ b/services/outline/namespace.yaml @@ -0,0 +1,5 @@ +# services/outline/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: outline diff --git a/services/outline/redis-deployment.yaml b/services/outline/redis-deployment.yaml new file mode 100644 index 0000000..5e08128 --- /dev/null +++ b/services/outline/redis-deployment.yaml @@ -0,0 +1,47 @@ +# services/outline/redis-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: outline-redis + namespace: outline + labels: + app: outline-redis +spec: + replicas: 1 + selector: + matchLabels: + app: outline-redis + template: + metadata: + labels: + app: outline-redis + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi4", "rpi5"] + containers: + - name: redis + image: redis:7.4.1-alpine + ports: + - name: redis + containerPort: 6379 + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + emptyDir: {} diff --git a/services/outline/redis-service.yaml b/services/outline/redis-service.yaml new file mode 100644 index 0000000..a80def2 --- /dev/null +++ b/services/outline/redis-service.yaml @@ -0,0 +1,15 @@ +# services/outline/redis-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: outline-redis + namespace: outline + labels: + app: outline-redis +spec: + selector: + app: outline-redis + ports: + - name: redis + port: 6379 + targetPort: redis diff --git a/services/outline/service.yaml b/services/outline/service.yaml new file mode 100644 index 0000000..383df0e --- /dev/null +++ b/services/outline/service.yaml @@ -0,0 +1,15 @@ +# services/outline/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: outline + namespace: outline + labels: + app: outline +spec: + selector: + app: outline + ports: + - name: http + port: 80 + targetPort: http diff --git a/services/planka/app-pvc.yaml b/services/planka/app-pvc.yaml new file mode 100644 index 0000000..7ef6a91 --- /dev/null +++ b/services/planka/app-pvc.yaml @@ -0,0 +1,12 @@ +# services/planka/app-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: planka-app-data + namespace: planka +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: astreae + resources: + requests: + storage: 2Gi diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml new file mode 100644 index 0000000..73b6f88 --- /dev/null +++ b/services/planka/deployment.yaml @@ -0,0 +1,95 @@ +# services/planka/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: planka + namespace: planka + labels: + app: planka +spec: + replicas: 1 + selector: + matchLabels: + app: planka + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + app: planka + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: ["rpi4", "rpi5"] + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + containers: + - name: planka + image: ghcr.io/plankanban/planka:2.0.0-rc.4 + ports: + - name: http + containerPort: 1337 + env: + - name: BASE_URL + value: https://tasks.bstein.dev + - name: TRUST_PROXY + value: "true" + envFrom: + - secretRef: + name: planka-db + - secretRef: + name: planka-secrets + - secretRef: + name: planka-oidc + - secretRef: + name: planka-smtp + volumeMounts: + - name: user-data + mountPath: /app/public + - name: user-data + mountPath: /app/private + - name: app-data + mountPath: /app/.tmp + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 6 + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 3 + failureThreshold: 6 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "1" + memory: 2Gi + volumes: + - name: user-data + persistentVolumeClaim: + claimName: planka-user-data + - name: app-data + persistentVolumeClaim: + claimName: planka-app-data diff --git a/services/planka/ingress.yaml b/services/planka/ingress.yaml new file mode 100644 index 0000000..7bd2912 --- /dev/null +++ b/services/planka/ingress.yaml @@ -0,0 +1,26 @@ +# services/planka/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: planka + namespace: planka + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: ["tasks.bstein.dev"] + secretName: planka-tls + rules: + - host: tasks.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: planka + port: + number: 80 diff --git a/services/planka/kustomization.yaml b/services/planka/kustomization.yaml new file mode 100644 index 0000000..ab42954 --- /dev/null +++ b/services/planka/kustomization.yaml @@ -0,0 +1,11 @@ +# services/planka/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: planka +resources: + - namespace.yaml + - user-data-pvc.yaml + - app-pvc.yaml + - deployment.yaml + - service.yaml + - ingress.yaml diff --git a/services/planka/namespace.yaml b/services/planka/namespace.yaml new file mode 100644 index 0000000..6a56e21 --- /dev/null +++ b/services/planka/namespace.yaml @@ -0,0 +1,5 @@ +# services/planka/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: planka diff --git a/services/planka/service.yaml b/services/planka/service.yaml new file mode 100644 index 0000000..6abf6cf --- /dev/null +++ b/services/planka/service.yaml @@ -0,0 +1,15 @@ +# services/planka/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: planka + namespace: planka + labels: + app: planka +spec: + selector: + app: planka + ports: + - name: http + port: 80 + targetPort: http diff --git a/services/planka/user-data-pvc.yaml b/services/planka/user-data-pvc.yaml new file mode 100644 index 0000000..760f33c --- /dev/null +++ b/services/planka/user-data-pvc.yaml @@ -0,0 +1,12 @@ +# services/planka/user-data-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: planka-user-data + namespace: planka +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: asteria + resources: + requests: + storage: 20Gi -- 2.47.2 From 9df1eb85c3f835d774ba395828edf28a4cc40bcf Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 00:29:57 +0000 Subject: [PATCH 663/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index d9a50bd..c2b1be3 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-87 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-88 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From d87d584992f168782042a7b5ea32327a8cc05108 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 00:30:57 +0000 Subject: [PATCH 664/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index f3b3ef6..6b14a3e 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-87 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-88 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index f88cf41..980af46 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-87 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-88 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From d673493f89aa7b5f408afaf40654f7b05c475d9b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 21:40:01 -0300 Subject: [PATCH 665/684] minio: rerun bucket bootstrap job --- services/minio/bucket-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/minio/bucket-job.yaml b/services/minio/bucket-job.yaml index a17193d..3e9fa15 100644 --- a/services/minio/bucket-job.yaml +++ b/services/minio/bucket-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: minio-bucket-bootstrap-1 + name: minio-bucket-bootstrap-2 namespace: minio spec: backoffLimit: 1 -- 2.47.2 From 6376beebb1323de02c0db60748415820793b76a9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 21:45:00 -0300 Subject: [PATCH 666/684] services: fix outline pg ssl and planka init --- services/outline/deployment.yaml | 2 ++ services/planka/deployment.yaml | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/services/outline/deployment.yaml b/services/outline/deployment.yaml index 74f3f35..dd50800 100644 --- a/services/outline/deployment.yaml +++ b/services/outline/deployment.yaml @@ -46,6 +46,8 @@ spec: value: "3000" - name: REDIS_URL value: redis://outline-redis:6379 + - name: PGSSLMODE + value: disable - name: FILE_STORAGE value: s3 - name: AWS_REGION diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml index 73b6f88..57ebd6b 100644 --- a/services/planka/deployment.yaml +++ b/services/planka/deployment.yaml @@ -36,6 +36,27 @@ spec: runAsGroup: 1000 fsGroup: 1000 fsGroupChangePolicy: OnRootMismatch + initContainers: + - name: init-user-data + image: docker.io/alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -e + mkdir -p /app/public/preloaded-favicons \ + /app/public/favicons \ + /app/public/user-avatars \ + /app/public/background-images \ + /app/private/attachments \ + /app/.tmp + chown -R 1000:1000 /app/public /app/private /app/.tmp + volumeMounts: + - name: user-data + mountPath: /app/public + - name: user-data + mountPath: /app/private + - name: app-data + mountPath: /app/.tmp containers: - name: planka image: ghcr.io/plankanban/planka:2.0.0-rc.4 -- 2.47.2 From 7a49e99e62af212afbccda90769e42b2e258dcf0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 22:02:07 -0300 Subject: [PATCH 667/684] planka: fix init permissions --- services/planka/deployment.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml index 57ebd6b..236c94a 100644 --- a/services/planka/deployment.yaml +++ b/services/planka/deployment.yaml @@ -39,6 +39,9 @@ spec: initContainers: - name: init-user-data image: docker.io/alpine:3.20 + securityContext: + runAsUser: 0 + runAsGroup: 0 command: ["/bin/sh", "-c"] args: - | -- 2.47.2 From 5a5766c9b5416a8797d8f60b8a592a7828cfb314 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 22:47:23 -0300 Subject: [PATCH 668/684] planka: avoid mounting over assets --- services/planka/deployment.yaml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml index 236c94a..0a3d93d 100644 --- a/services/planka/deployment.yaml +++ b/services/planka/deployment.yaml @@ -46,20 +46,15 @@ spec: args: - | set -e - mkdir -p /app/public/preloaded-favicons \ - /app/public/favicons \ - /app/public/user-avatars \ - /app/public/background-images \ - /app/private/attachments \ - /app/.tmp - chown -R 1000:1000 /app/public /app/private /app/.tmp + mkdir -p /data/public/user-avatars \ + /data/public/background-images \ + /data/private/attachments + chown -R 1000:1000 /data /tmp-data volumeMounts: - name: user-data - mountPath: /app/public - - name: user-data - mountPath: /app/private + mountPath: /data - name: app-data - mountPath: /app/.tmp + mountPath: /tmp-data containers: - name: planka image: ghcr.io/plankanban/planka:2.0.0-rc.4 @@ -82,9 +77,14 @@ spec: name: planka-smtp volumeMounts: - name: user-data - mountPath: /app/public + mountPath: /app/public/user-avatars + subPath: public/user-avatars - name: user-data - mountPath: /app/private + mountPath: /app/public/background-images + subPath: public/background-images + - name: user-data + mountPath: /app/private/attachments + subPath: private/attachments - name: app-data mountPath: /app/.tmp readinessProbe: -- 2.47.2 From e12d020c514c484dbdee393992eae57fa17c9e83 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 01:57:04 +0000 Subject: [PATCH 669/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index c2b1be3..f00cb05 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-88 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-89 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From ed9a41bd70f187f8244ae0f90b0fe1ce81560046 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 01:58:04 +0000 Subject: [PATCH 670/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 6b14a3e..09a7bad 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-88 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-89 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 980af46..6cdca28 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-88 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-89 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 1a50f51115e908df6a0f66c3bd48f6ea7b65bea1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 23:13:30 -0300 Subject: [PATCH 671/684] planka: enable project owners via oidc --- services/keycloak/realm-settings-job.yaml | 63 ++++++++++++++++++++++- services/planka/deployment.yaml | 6 +++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index af595c1..108f141 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-14 + name: keycloak-realm-settings-15 namespace: sso spec: backoffLimit: 0 @@ -251,6 +251,67 @@ spec: if status not in (201, 204): raise SystemExit(f"Unexpected group create response for {group_name}: {status}") + # Ensure Planka client exposes groups in userinfo for role mapping. + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId=planka", + access_token, + ) + planka_client = None + if status == 200 and isinstance(clients, list): + for item in clients: + if isinstance(item, dict) and item.get("clientId") == "planka": + planka_client = item + break + + if planka_client: + client_id = planka_client.get("id") + mapper_payload = { + "name": "groups", + "protocol": "openid-connect", + "protocolMapper": "oidc-group-membership-mapper", + "consentRequired": False, + "config": { + "full.path": "false", + "id.token.claim": "true", + "access.token.claim": "true", + "userinfo.token.claim": "true", + "claim.name": "groups", + "jsonType.label": "String", + }, + } + status, mappers = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_id}/protocol-mappers/models", + access_token, + ) + existing = None + if status == 200 and isinstance(mappers, list): + for item in mappers: + if isinstance(item, dict) and item.get("name") == mapper_payload["name"]: + existing = item + break + + if existing and existing.get("id"): + mapper_payload["id"] = existing["id"] + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/clients/{client_id}/protocol-mappers/models/{existing['id']}", + access_token, + mapper_payload, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected protocol mapper update response: {status}") + else: + status, _ = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/clients/{client_id}/protocol-mappers/models", + access_token, + mapper_payload, + ) + if status not in (201, 204): + raise SystemExit(f"Unexpected protocol mapper create response: {status}") + # Ensure MFA is on by default for newly-created users. status, required_actions = http_json( "GET", diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml index 0a3d93d..36fe4fd 100644 --- a/services/planka/deployment.yaml +++ b/services/planka/deployment.yaml @@ -66,6 +66,12 @@ spec: value: https://tasks.bstein.dev - name: TRUST_PROXY value: "true" + - name: OIDC_IGNORE_ROLES + value: "false" + - name: OIDC_PROJECT_OWNER_ROLES + value: "*" + - name: OIDC_ROLES_ATTRIBUTE + value: groups envFrom: - secretRef: name: planka-db -- 2.47.2 From 35a19a2f7b21a120104476e51c520a4ca169dee1 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 23:13:53 -0300 Subject: [PATCH 672/684] outline: move to local storage --- .../applications/kustomization.yaml | 1 - .../applications/minio/kustomization.yaml | 24 ------- .../applications/outline/kustomization.yaml | 1 - services/minio/bucket-job.yaml | 50 -------------- services/minio/deployment.yaml | 68 ------------------- services/minio/kustomization.yaml | 10 --- services/minio/namespace.yaml | 5 -- services/minio/pvc.yaml | 12 ---- services/minio/service.yaml | 18 ----- services/outline/deployment.yaml | 20 ++---- services/outline/kustomization.yaml | 2 +- .../outline/{app-pvc.yaml => user-pvc.yaml} | 6 +- 12 files changed, 11 insertions(+), 206 deletions(-) delete mode 100644 clusters/atlas/flux-system/applications/minio/kustomization.yaml delete mode 100644 services/minio/bucket-job.yaml delete mode 100644 services/minio/deployment.yaml delete mode 100644 services/minio/kustomization.yaml delete mode 100644 services/minio/namespace.yaml delete mode 100644 services/minio/pvc.yaml delete mode 100644 services/minio/service.yaml rename services/outline/{app-pvc.yaml => user-pvc.yaml} (64%) diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index cc98f6b..d48cf9e 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -25,6 +25,5 @@ resources: - ai-llm/kustomization.yaml - nextcloud/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml - - minio/kustomization.yaml - outline/kustomization.yaml - planka/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/minio/kustomization.yaml b/clusters/atlas/flux-system/applications/minio/kustomization.yaml deleted file mode 100644 index e776a3e..0000000 --- a/clusters/atlas/flux-system/applications/minio/kustomization.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# clusters/atlas/flux-system/applications/minio/kustomization.yaml -apiVersion: kustomize.toolkit.fluxcd.io/v1 -kind: Kustomization -metadata: - name: minio - namespace: flux-system -spec: - interval: 10m - path: ./services/minio - prune: true - sourceRef: - kind: GitRepository - name: flux-system - targetNamespace: minio - healthChecks: - - apiVersion: apps/v1 - kind: Deployment - name: minio - namespace: minio - - apiVersion: v1 - kind: Service - name: minio - namespace: minio - wait: false diff --git a/clusters/atlas/flux-system/applications/outline/kustomization.yaml b/clusters/atlas/flux-system/applications/outline/kustomization.yaml index e01449b..429d093 100644 --- a/clusters/atlas/flux-system/applications/outline/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/outline/kustomization.yaml @@ -15,7 +15,6 @@ spec: dependsOn: - name: keycloak - name: mailu - - name: minio - name: traefik healthChecks: - apiVersion: apps/v1 diff --git a/services/minio/bucket-job.yaml b/services/minio/bucket-job.yaml deleted file mode 100644 index 3e9fa15..0000000 --- a/services/minio/bucket-job.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# services/minio/bucket-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: minio-bucket-bootstrap-2 - namespace: minio -spec: - backoffLimit: 1 - ttlSecondsAfterFinished: 3600 - template: - spec: - restartPolicy: Never - nodeSelector: - node-role.kubernetes.io/worker: "true" - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: ["rpi4", "rpi5"] - containers: - - name: mc - image: minio/mc:RELEASE.2025-08-13T08-35-41Z - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - mc alias set local http://minio.minio.svc.cluster.local:9000 "${MINIO_ROOT_USER}" "${MINIO_ROOT_PASSWORD}" - mc mb -p local/outline || true - mc mb -p local/planka || true - env: - - name: MINIO_ROOT_USER - valueFrom: - secretKeyRef: - name: minio-credentials - key: rootUser - - name: MINIO_ROOT_PASSWORD - valueFrom: - secretKeyRef: - name: minio-credentials - key: rootPassword - resources: - requests: - cpu: 50m - memory: 64Mi - limits: - cpu: 200m - memory: 128Mi diff --git a/services/minio/deployment.yaml b/services/minio/deployment.yaml deleted file mode 100644 index 4b6d72a..0000000 --- a/services/minio/deployment.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# services/minio/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: minio - namespace: minio - labels: - app: minio -spec: - replicas: 1 - selector: - matchLabels: - app: minio - strategy: - type: Recreate - template: - metadata: - labels: - app: minio - spec: - nodeSelector: - node-role.kubernetes.io/worker: "true" - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: ["rpi4", "rpi5"] - containers: - - name: minio - image: minio/minio:RELEASE.2025-09-07T16-13-09Z - args: - - server - - /data - - --console-address - - ":9001" - env: - - name: MINIO_ROOT_USER - valueFrom: - secretKeyRef: - name: minio-credentials - key: rootUser - - name: MINIO_ROOT_PASSWORD - valueFrom: - secretKeyRef: - name: minio-credentials - key: rootPassword - ports: - - name: api - containerPort: 9000 - - name: console - containerPort: 9001 - volumeMounts: - - name: data - mountPath: /data - resources: - requests: - cpu: 200m - memory: 512Mi - limits: - cpu: "1" - memory: 2Gi - volumes: - - name: data - persistentVolumeClaim: - claimName: minio-data diff --git a/services/minio/kustomization.yaml b/services/minio/kustomization.yaml deleted file mode 100644 index 0565e31..0000000 --- a/services/minio/kustomization.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# services/minio/kustomization.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: minio -resources: - - namespace.yaml - - pvc.yaml - - deployment.yaml - - bucket-job.yaml - - service.yaml diff --git a/services/minio/namespace.yaml b/services/minio/namespace.yaml deleted file mode 100644 index 3465488..0000000 --- a/services/minio/namespace.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# services/minio/namespace.yaml -apiVersion: v1 -kind: Namespace -metadata: - name: minio diff --git a/services/minio/pvc.yaml b/services/minio/pvc.yaml deleted file mode 100644 index 5c3828b..0000000 --- a/services/minio/pvc.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# services/minio/pvc.yaml -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: minio-data - namespace: minio -spec: - accessModes: ["ReadWriteOnce"] - storageClassName: asteria - resources: - requests: - storage: 100Gi diff --git a/services/minio/service.yaml b/services/minio/service.yaml deleted file mode 100644 index d2edec5..0000000 --- a/services/minio/service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# services/minio/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: minio - namespace: minio - labels: - app: minio -spec: - selector: - app: minio - ports: - - name: api - port: 9000 - targetPort: api - - name: console - port: 9001 - targetPort: console diff --git a/services/outline/deployment.yaml b/services/outline/deployment.yaml index dd50800..9f8160e 100644 --- a/services/outline/deployment.yaml +++ b/services/outline/deployment.yaml @@ -49,13 +49,9 @@ spec: - name: PGSSLMODE value: disable - name: FILE_STORAGE - value: s3 - - name: AWS_REGION - value: us-east-1 - - name: AWS_S3_FORCE_PATH_STYLE - value: "true" - - name: AWS_S3_ACL - value: private + value: local + - name: FILE_STORAGE_LOCAL_ROOT_DIR + value: /var/lib/outline/data - name: FORCE_HTTPS value: "true" - name: OIDC_ENFORCED @@ -77,13 +73,11 @@ spec: name: outline-secrets - secretRef: name: outline-oidc - - secretRef: - name: outline-s3 - secretRef: name: outline-smtp volumeMounts: - - name: app-data - mountPath: /var/lib/outline + - name: user-data + mountPath: /var/lib/outline/data readinessProbe: httpGet: path: /_health @@ -108,6 +102,6 @@ spec: cpu: "1" memory: 2Gi volumes: - - name: app-data + - name: user-data persistentVolumeClaim: - claimName: outline-app + claimName: outline-user-data diff --git a/services/outline/kustomization.yaml b/services/outline/kustomization.yaml index 941cb91..33640f6 100644 --- a/services/outline/kustomization.yaml +++ b/services/outline/kustomization.yaml @@ -4,7 +4,7 @@ kind: Kustomization namespace: outline resources: - namespace.yaml - - app-pvc.yaml + - user-pvc.yaml - redis-deployment.yaml - redis-service.yaml - deployment.yaml diff --git a/services/outline/app-pvc.yaml b/services/outline/user-pvc.yaml similarity index 64% rename from services/outline/app-pvc.yaml rename to services/outline/user-pvc.yaml index 8ac702c..f31426d 100644 --- a/services/outline/app-pvc.yaml +++ b/services/outline/user-pvc.yaml @@ -1,12 +1,12 @@ -# services/outline/app-pvc.yaml +# services/outline/user-pvc.yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: outline-app + name: outline-user-data namespace: outline spec: accessModes: ["ReadWriteOnce"] - storageClassName: astreae + storageClassName: asteria resources: requests: storage: 5Gi -- 2.47.2 From 4d6d0b89b24db382aef65343981deea127cdd341 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 12 Jan 2026 23:24:09 -0300 Subject: [PATCH 673/684] planka: default users to project owners --- services/keycloak/realm-settings-job.yaml | 103 +++++++++++++++++++--- services/planka/deployment.yaml | 4 +- 2 files changed, 95 insertions(+), 12 deletions(-) diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index 108f141..bdc816d 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-15 + name: keycloak-realm-settings-16 namespace: sso spec: backoffLimit: 0 @@ -227,21 +227,23 @@ spec: if status not in (200, 204): raise SystemExit(f"Unexpected user-profile update response: {status}") - # Ensure basic realm groups exist for provisioning. - for group_name in ("dev", "admin"): + def find_group(group_name: str): status, groups = http_json( "GET", f"{base_url}/admin/realms/{realm}/groups?search={urllib.parse.quote(group_name)}", access_token, ) - exists = False - if status == 200 and isinstance(groups, list): - for item in groups: - if isinstance(item, dict) and item.get("name") == group_name: - exists = True - break - if exists: - continue + if status != 200 or not isinstance(groups, list): + return None + for item in groups: + if isinstance(item, dict) and item.get("name") == group_name: + return item + return None + + def ensure_group(group_name: str): + group = find_group(group_name) + if group: + return group status, _ = http_json( "POST", f"{base_url}/admin/realms/{realm}/groups", @@ -250,6 +252,85 @@ spec: ) if status not in (201, 204): raise SystemExit(f"Unexpected group create response for {group_name}: {status}") + return find_group(group_name) + + # Ensure basic realm groups exist for provisioning. + ensure_group("dev") + ensure_group("admin") + planka_group = ensure_group("planka-users") + + if planka_group and planka_group.get("id"): + group_id = planka_group["id"] + status, default_groups = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/default-groups", + access_token, + ) + default_ids = set() + if status == 200 and isinstance(default_groups, list): + for item in default_groups: + if isinstance(item, dict) and item.get("id"): + default_ids.add(item["id"]) + + if group_id not in default_ids: + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/default-groups/{group_id}", + access_token, + ) + if status not in (200, 201, 204): + status, _ = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/default-groups/{group_id}", + access_token, + ) + if status not in (200, 201, 204): + raise SystemExit( + f"Unexpected default-group update response for planka-users: {status}" + ) + + # Ensure all existing users are in the planka-users group. + first = 0 + page_size = 100 + while True: + status, users = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/users?first={first}&max={page_size}", + access_token, + ) + if status != 200 or not isinstance(users, list) or not users: + break + for user in users: + user_id = user.get("id") if isinstance(user, dict) else None + if not user_id: + continue + status, groups = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/users/{user_id}/groups", + access_token, + ) + if status == 200 and isinstance(groups, list): + already = any(isinstance(g, dict) and g.get("id") == group_id for g in groups) + if already: + continue + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/users/{user_id}/groups/{group_id}", + access_token, + ) + if status not in (200, 201, 204): + status, _ = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/users/{user_id}/groups/{group_id}", + access_token, + ) + if status not in (200, 201, 204): + raise SystemExit( + f"Unexpected group membership update for user {user_id}: {status}" + ) + if len(users) < page_size: + break + first += page_size # Ensure Planka client exposes groups in userinfo for role mapping. status, clients = http_json( diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml index 36fe4fd..9524245 100644 --- a/services/planka/deployment.yaml +++ b/services/planka/deployment.yaml @@ -68,8 +68,10 @@ spec: value: "true" - name: OIDC_IGNORE_ROLES value: "false" + - name: OIDC_ADMIN_ROLES + value: admin - name: OIDC_PROJECT_OWNER_ROLES - value: "*" + value: planka-users - name: OIDC_ROLES_ATTRIBUTE value: groups envFrom: -- 2.47.2 From 606718459eac21d93b056cb692ba8aa13235ac37 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 02:37:08 +0000 Subject: [PATCH 674/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index f00cb05..38dec19 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-89 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-90 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 4d884bfcb1a9dd63f6b60c7509d1ef47759d8f4b Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 02:38:08 +0000 Subject: [PATCH 675/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 09a7bad..e451817 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-89 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-90 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 6cdca28..0e658dc 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-89 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-90 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 27460f8dc338c10251770774838841a42c36f534 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 11:59:53 +0000 Subject: [PATCH 676/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 38dec19..3da1796 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-90 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-91 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From b01ac8da25d13d5ec7e99e73ea958aec06bfb238 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 12:00:52 +0000 Subject: [PATCH 677/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index e451817..68c4e5f 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-90 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-91 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 0e658dc..725fa64 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-90 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-91 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 6d213e5b2567c2cd3878e1a885ddb946e0280d66 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 12:47:56 +0000 Subject: [PATCH 678/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/frontend-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 3da1796..3092edb 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -22,7 +22,7 @@ spec: - name: harbor-bstein-robot containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-91 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} imagePullPolicy: Always ports: - name: http -- 2.47.2 From 17b733c65ed803263bb05d7b5dd24770c4cba739 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 12:48:56 +0000 Subject: [PATCH 679/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/backend-deployment.yaml | 2 +- services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 68c4e5f..2e92443 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,7 +24,7 @@ spec: - name: harbor-bstein-robot containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-91 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: ["gunicorn"] args: diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 725fa64..5e7c779 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -23,7 +23,7 @@ spec: - name: harbor-bstein-robot containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-91 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} imagePullPolicy: Always command: - python -- 2.47.2 From 6da576a70704f345bedbf98607d538a421d0c8cc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 13 Jan 2026 09:59:39 -0300 Subject: [PATCH 680/684] iac: externalize ConfigMap scripts --- .../chat-ai-gateway-configmap.yaml | 78 --- services/bstein-dev-home/kustomization.yaml | 11 +- services/bstein-dev-home/scripts/gateway.py | 70 ++ .../scripts}/test_portal_onboarding_flow.py | 0 .../scripts}/vaultwarden_cred_sync.py | 0 services/comms/atlasbot-configmap.yaml | 629 ------------------ services/comms/guest-register-configmap.yaml | 271 -------- services/comms/kustomization.yaml | 32 +- services/comms/scripts/atlasbot/bot.py | 622 +++++++++++++++++ .../comms/scripts/guest-register/server.py | 264 ++++++++ .../synapse/redis/ping_liveness_local.sh | 20 + .../redis/ping_liveness_local_and_master.sh | 5 + .../synapse/redis/ping_liveness_master.sh | 20 + .../synapse/redis/ping_readiness_local.sh | 19 + .../redis/ping_readiness_local_and_master.sh | 5 + .../synapse/redis/ping_readiness_master.sh | 19 + .../scripts/synapse/redis/start-master.sh | 15 + services/comms/scripts/synapse/signing-key.sh | 41 ++ services/comms/synapse-rendered.yaml | 192 ------ services/jenkins/configmap-init-scripts.yaml | 24 - services/jenkins/kustomization.yaml | 9 +- services/jenkins/scripts/theme.groovy | 16 + services/logging/kustomization.yaml | 35 +- .../logging/node-image-gc-rpi4-script.yaml | 44 -- .../logging/node-image-prune-rpi5-script.yaml | 34 - .../logging/node-log-rotation-script.yaml | 72 -- .../opensearch-observability-setup-job.yaml | 148 ----- .../logging/opensearch-prune-cronjob.yaml | 85 --- .../logging/scripts/node_image_gc_rpi4.sh | 36 + .../logging/scripts/node_image_prune_rpi5.sh | 26 + services/logging/scripts/node_log_rotation.sh | 64 ++ .../scripts/opensearch_observability_seed.py | 140 ++++ services/logging/scripts/opensearch_prune.py | 77 +++ services/maintenance/kustomization.yaml | 23 +- .../node-image-sweeper-script.yaml | 100 --- services/maintenance/node-nofile-script.yaml | 38 -- services/maintenance/pod-cleaner-script.yaml | 20 - .../maintenance/scripts/node_image_sweeper.sh | 92 +++ services/maintenance/scripts/node_nofile.sh | 30 + services/maintenance/scripts/pod_cleaner.sh | 12 + .../monitoring/grafana-smtp-sync-script.yaml | 39 -- .../jetson-tegrastats-exporter.yaml | 88 --- services/monitoring/kustomization.yaml | 22 +- .../monitoring/postmark-exporter-script.yaml | 156 ----- .../monitoring/scripts/grafana_smtp_sync.sh | 31 + .../scripts/jetson_tegrastats_exporter.py | 80 +++ .../monitoring/scripts/postmark_exporter.py | 149 +++++ 47 files changed, 1971 insertions(+), 2032 deletions(-) delete mode 100644 services/bstein-dev-home/chat-ai-gateway-configmap.yaml create mode 100644 services/bstein-dev-home/scripts/gateway.py rename {scripts/tests => services/bstein-dev-home/scripts}/test_portal_onboarding_flow.py (100%) rename {scripts => services/bstein-dev-home/scripts}/vaultwarden_cred_sync.py (100%) delete mode 100644 services/comms/atlasbot-configmap.yaml delete mode 100644 services/comms/guest-register-configmap.yaml create mode 100644 services/comms/scripts/atlasbot/bot.py create mode 100644 services/comms/scripts/guest-register/server.py create mode 100644 services/comms/scripts/synapse/redis/ping_liveness_local.sh create mode 100644 services/comms/scripts/synapse/redis/ping_liveness_local_and_master.sh create mode 100644 services/comms/scripts/synapse/redis/ping_liveness_master.sh create mode 100644 services/comms/scripts/synapse/redis/ping_readiness_local.sh create mode 100644 services/comms/scripts/synapse/redis/ping_readiness_local_and_master.sh create mode 100644 services/comms/scripts/synapse/redis/ping_readiness_master.sh create mode 100644 services/comms/scripts/synapse/redis/start-master.sh create mode 100644 services/comms/scripts/synapse/signing-key.sh delete mode 100644 services/jenkins/configmap-init-scripts.yaml create mode 100644 services/jenkins/scripts/theme.groovy delete mode 100644 services/logging/node-image-gc-rpi4-script.yaml delete mode 100644 services/logging/node-image-prune-rpi5-script.yaml delete mode 100644 services/logging/node-log-rotation-script.yaml create mode 100644 services/logging/scripts/node_image_gc_rpi4.sh create mode 100644 services/logging/scripts/node_image_prune_rpi5.sh create mode 100644 services/logging/scripts/node_log_rotation.sh create mode 100644 services/logging/scripts/opensearch_observability_seed.py create mode 100644 services/logging/scripts/opensearch_prune.py delete mode 100644 services/maintenance/node-image-sweeper-script.yaml delete mode 100644 services/maintenance/node-nofile-script.yaml delete mode 100644 services/maintenance/pod-cleaner-script.yaml create mode 100644 services/maintenance/scripts/node_image_sweeper.sh create mode 100644 services/maintenance/scripts/node_nofile.sh create mode 100644 services/maintenance/scripts/pod_cleaner.sh delete mode 100644 services/monitoring/grafana-smtp-sync-script.yaml delete mode 100644 services/monitoring/postmark-exporter-script.yaml create mode 100644 services/monitoring/scripts/grafana_smtp_sync.sh create mode 100644 services/monitoring/scripts/jetson_tegrastats_exporter.py create mode 100644 services/monitoring/scripts/postmark_exporter.py diff --git a/services/bstein-dev-home/chat-ai-gateway-configmap.yaml b/services/bstein-dev-home/chat-ai-gateway-configmap.yaml deleted file mode 100644 index 17ed95b..0000000 --- a/services/bstein-dev-home/chat-ai-gateway-configmap.yaml +++ /dev/null @@ -1,78 +0,0 @@ -# services/bstein-dev-home/chat-ai-gateway-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: chat-ai-gateway - namespace: bstein-dev-home -data: - gateway.py: | - import json - import os - from http.server import BaseHTTPRequestHandler, HTTPServer - from urllib import request, error - - UPSTREAM = os.environ.get("UPSTREAM_URL", "http://bstein-dev-home-backend/api/chat") - KEY_MATRIX = os.environ.get("CHAT_KEY_MATRIX", "") - KEY_HOMEPAGE = os.environ.get("CHAT_KEY_HOMEPAGE", "") - - ALLOWED = {k for k in (KEY_MATRIX, KEY_HOMEPAGE) if k} - - class Handler(BaseHTTPRequestHandler): - def _send_json(self, code: int, payload: dict): - body = json.dumps(payload).encode() - self.send_response(code) - self.send_header("Content-Type", "application/json") - self.send_header("Content-Length", str(len(body))) - self.end_headers() - self.wfile.write(body) - - def do_GET(self): # noqa: N802 - if self.path in ("/healthz", "/"): - return self._send_json(200, {"ok": True}) - return self._send_json(404, {"error": "not_found"}) - - def do_POST(self): # noqa: N802 - if self.path != "/": - return self._send_json(404, {"error": "not_found"}) - - key = self.headers.get("x-api-key", "") - if not key or key not in ALLOWED: - return self._send_json(401, {"error": "unauthorized"}) - - length = int(self.headers.get("content-length", "0") or "0") - raw = self.rfile.read(length) if length else b"{}" - - try: - upstream_req = request.Request( - UPSTREAM, - data=raw, - headers={"Content-Type": "application/json"}, - method="POST", - ) - with request.urlopen(upstream_req, timeout=90) as resp: - data = resp.read() - self.send_response(resp.status) - for k, v in resp.headers.items(): - if k.lower() in ("content-length", "connection", "server", "date"): - continue - self.send_header(k, v) - self.send_header("Content-Length", str(len(data))) - self.end_headers() - self.wfile.write(data) - except error.HTTPError as e: - data = e.read() if hasattr(e, "read") else b"" - self.send_response(e.code) - self.send_header("Content-Type", "application/json") - self.send_header("Content-Length", str(len(data))) - self.end_headers() - self.wfile.write(data) - except Exception: - return self._send_json(502, {"error": "bad_gateway"}) - - def main(): - port = int(os.environ.get("PORT", "8080")) - httpd = HTTPServer(("0.0.0.0", port), Handler) - httpd.serve_forever() - - if __name__ == "__main__": - main() diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 3a423ef..4847d2b 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -7,7 +7,6 @@ resources: - image.yaml - rbac.yaml - portal-e2e-client-secret-sync-rbac.yaml - - chat-ai-gateway-configmap.yaml - chat-ai-gateway-deployment.yaml - chat-ai-gateway-service.yaml - frontend-deployment.yaml @@ -19,15 +18,21 @@ resources: - ingress.yaml configMapGenerator: + - name: chat-ai-gateway + namespace: bstein-dev-home + files: + - gateway.py=scripts/gateway.py + options: + disableNameSuffixHash: true - name: vaultwarden-cred-sync-script namespace: bstein-dev-home files: - - vaultwarden_cred_sync.py=../../scripts/vaultwarden_cred_sync.py + - vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py options: disableNameSuffixHash: true - name: portal-onboarding-e2e-tests namespace: bstein-dev-home files: - - test_portal_onboarding_flow.py=../../scripts/tests/test_portal_onboarding_flow.py + - test_portal_onboarding_flow.py=scripts/test_portal_onboarding_flow.py options: disableNameSuffixHash: true diff --git a/services/bstein-dev-home/scripts/gateway.py b/services/bstein-dev-home/scripts/gateway.py new file mode 100644 index 0000000..3ca2fa1 --- /dev/null +++ b/services/bstein-dev-home/scripts/gateway.py @@ -0,0 +1,70 @@ +import json +import os +from http.server import BaseHTTPRequestHandler, HTTPServer +from urllib import request, error + +UPSTREAM = os.environ.get("UPSTREAM_URL", "http://bstein-dev-home-backend/api/chat") +KEY_MATRIX = os.environ.get("CHAT_KEY_MATRIX", "") +KEY_HOMEPAGE = os.environ.get("CHAT_KEY_HOMEPAGE", "") + +ALLOWED = {k for k in (KEY_MATRIX, KEY_HOMEPAGE) if k} + +class Handler(BaseHTTPRequestHandler): + def _send_json(self, code: int, payload: dict): + body = json.dumps(payload).encode() + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): # noqa: N802 + if self.path in ("/healthz", "/"): + return self._send_json(200, {"ok": True}) + return self._send_json(404, {"error": "not_found"}) + + def do_POST(self): # noqa: N802 + if self.path != "/": + return self._send_json(404, {"error": "not_found"}) + + key = self.headers.get("x-api-key", "") + if not key or key not in ALLOWED: + return self._send_json(401, {"error": "unauthorized"}) + + length = int(self.headers.get("content-length", "0") or "0") + raw = self.rfile.read(length) if length else b"{}" + + try: + upstream_req = request.Request( + UPSTREAM, + data=raw, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with request.urlopen(upstream_req, timeout=90) as resp: + data = resp.read() + self.send_response(resp.status) + for k, v in resp.headers.items(): + if k.lower() in ("content-length", "connection", "server", "date"): + continue + self.send_header(k, v) + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + except error.HTTPError as e: + data = e.read() if hasattr(e, "read") else b"" + self.send_response(e.code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + except Exception: + return self._send_json(502, {"error": "bad_gateway"}) + +def main(): + port = int(os.environ.get("PORT", "8080")) + httpd = HTTPServer(("0.0.0.0", port), Handler) + httpd.serve_forever() + +if __name__ == "__main__": + main() diff --git a/scripts/tests/test_portal_onboarding_flow.py b/services/bstein-dev-home/scripts/test_portal_onboarding_flow.py similarity index 100% rename from scripts/tests/test_portal_onboarding_flow.py rename to services/bstein-dev-home/scripts/test_portal_onboarding_flow.py diff --git a/scripts/vaultwarden_cred_sync.py b/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py similarity index 100% rename from scripts/vaultwarden_cred_sync.py rename to services/bstein-dev-home/scripts/vaultwarden_cred_sync.py diff --git a/services/comms/atlasbot-configmap.yaml b/services/comms/atlasbot-configmap.yaml deleted file mode 100644 index be9640e..0000000 --- a/services/comms/atlasbot-configmap.yaml +++ /dev/null @@ -1,629 +0,0 @@ -# services/comms/atlasbot-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: atlasbot -data: - bot.py: | - import collections - import json - import os - import re - import ssl - import time - from typing import Any - from urllib import error, parse, request - - BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") - AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080") - USER = os.environ["BOT_USER"] - PASSWORD = os.environ["BOT_PASS"] - ROOM_ALIAS = "#othrys:live.bstein.dev" - - OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") - MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") - API_KEY = os.environ.get("CHAT_API_KEY", "") - - KB_DIR = os.environ.get("KB_DIR", "") - VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428") - - BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{USER},atlas") - SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") - - MAX_KB_CHARS = int(os.environ.get("ATLASBOT_MAX_KB_CHARS", "2500")) - MAX_TOOL_CHARS = int(os.environ.get("ATLASBOT_MAX_TOOL_CHARS", "2500")) - - TOKEN_RE = re.compile(r"[a-z0-9][a-z0-9_.-]{1,}", re.IGNORECASE) - HOST_RE = re.compile(r"(?i)([a-z0-9-]+(?:\\.[a-z0-9-]+)+)") - STOPWORDS = { - "the", - "and", - "for", - "with", - "this", - "that", - "from", - "into", - "what", - "how", - "why", - "when", - "where", - "which", - "who", - "can", - "could", - "should", - "would", - "please", - "help", - "atlas", - "othrys", - } - - METRIC_HINT_WORDS = { - "health", - "status", - "down", - "slow", - "error", - "unknown_error", - "timeout", - "crash", - "crashloop", - "restart", - "restarts", - "pending", - "unreachable", - "latency", - } - - def _tokens(text: str) -> list[str]: - toks = [t.lower() for t in TOKEN_RE.findall(text or "")] - return [t for t in toks if t not in STOPWORDS and len(t) >= 2] - - - # Mention detection (Matrix rich mentions + plain @atlas). - MENTION_TOKENS = [m.strip() for m in BOT_MENTIONS.split(",") if m.strip()] - MENTION_LOCALPARTS = [m.lstrip("@").split(":", 1)[0] for m in MENTION_TOKENS] - MENTION_RE = re.compile( - r"(? str: - t = token.strip() - if not t: - return "" - if t.startswith("@") and ":" in t: - return t - t = t.lstrip("@") - if ":" in t: - return f"@{t}" - return f"@{t}:{SERVER_NAME}" - - MENTION_USER_IDS = {normalize_user_id(t).lower() for t in MENTION_TOKENS if normalize_user_id(t)} - - def is_mentioned(content: dict, body: str) -> bool: - if MENTION_RE.search(body or "") is not None: - return True - mentions = content.get("m.mentions", {}) - user_ids = mentions.get("user_ids", []) - if not isinstance(user_ids, list): - return False - return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids) - - - # Matrix HTTP helper. - def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): - url = (base or BASE) + path - data = None - headers = {} - if body is not None: - data = json.dumps(body).encode() - headers["Content-Type"] = "application/json" - if token: - headers["Authorization"] = f"Bearer {token}" - r = request.Request(url, data=data, headers=headers, method=method) - with request.urlopen(r, timeout=timeout) as resp: - raw = resp.read() - return json.loads(raw.decode()) if raw else {} - - def login() -> str: - login_user = normalize_user_id(USER) - payload = { - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": login_user}, - "password": PASSWORD, - } - res = req("POST", "/_matrix/client/v3/login", body=payload, base=AUTH_BASE) - return res["access_token"] - - def resolve_alias(token: str, alias: str) -> str: - enc = parse.quote(alias) - res = req("GET", f"/_matrix/client/v3/directory/room/{enc}", token) - return res["room_id"] - - def join_room(token: str, room: str): - req("POST", f"/_matrix/client/v3/rooms/{parse.quote(room)}/join", token, body={}) - - def send_msg(token: str, room: str, text: str): - path = f"/_matrix/client/v3/rooms/{parse.quote(room)}/send/m.room.message" - req("POST", path, token, body={"msgtype": "m.text", "body": text}) - - - # Atlas KB loader (no external deps; files are pre-rendered JSON via scripts/knowledge_render_atlas.py). - KB = {"catalog": {}, "runbooks": []} - _HOST_INDEX: dict[str, list[dict]] = {} - _NAME_INDEX: set[str] = set() - - def _load_json_file(path: str) -> Any | None: - try: - with open(path, "rb") as f: - return json.loads(f.read().decode("utf-8")) - except Exception: - return None - - def load_kb(): - global KB, _HOST_INDEX, _NAME_INDEX - if not KB_DIR: - return - catalog = _load_json_file(os.path.join(KB_DIR, "catalog", "atlas.json")) or {} - runbooks = _load_json_file(os.path.join(KB_DIR, "catalog", "runbooks.json")) or [] - KB = {"catalog": catalog, "runbooks": runbooks} - - host_index: dict[str, list[dict]] = collections.defaultdict(list) - for ep in catalog.get("http_endpoints", []) if isinstance(catalog, dict) else []: - host = (ep.get("host") or "").lower() - if host: - host_index[host].append(ep) - _HOST_INDEX = {k: host_index[k] for k in sorted(host_index.keys())} - - names: set[str] = set() - for s in catalog.get("services", []) if isinstance(catalog, dict) else []: - if isinstance(s, dict) and s.get("name"): - names.add(str(s["name"]).lower()) - for w in catalog.get("workloads", []) if isinstance(catalog, dict) else []: - if isinstance(w, dict) and w.get("name"): - names.add(str(w["name"]).lower()) - _NAME_INDEX = names - - def kb_retrieve(query: str, *, limit: int = 3) -> str: - q = (query or "").strip() - if not q or not KB.get("runbooks"): - return "" - ql = q.lower() - q_tokens = _tokens(q) - if not q_tokens: - return "" - - scored: list[tuple[int, dict]] = [] - for doc in KB.get("runbooks", []): - if not isinstance(doc, dict): - continue - title = str(doc.get("title") or "") - body = str(doc.get("body") or "") - tags = doc.get("tags") or [] - entrypoints = doc.get("entrypoints") or [] - hay = (title + "\n" + " ".join(tags) + "\n" + " ".join(entrypoints) + "\n" + body).lower() - score = 0 - for t in set(q_tokens): - if t in hay: - score += 3 if t in title.lower() else 1 - for h in entrypoints: - if isinstance(h, str) and h.lower() in ql: - score += 4 - if score: - scored.append((score, doc)) - - scored.sort(key=lambda x: x[0], reverse=True) - picked = [d for _, d in scored[:limit]] - if not picked: - return "" - - parts: list[str] = ["Atlas KB (retrieved):"] - used = 0 - for d in picked: - path = d.get("path") or "" - title = d.get("title") or path - body = (d.get("body") or "").strip() - snippet = body[:900].strip() - chunk = f"- {title} ({path})\n{snippet}" - if used + len(chunk) > MAX_KB_CHARS: - break - parts.append(chunk) - used += len(chunk) - return "\n".join(parts).strip() - - def catalog_hints(query: str) -> tuple[str, list[tuple[str, str]]]: - q = (query or "").strip() - if not q or not KB.get("catalog"): - return "", [] - ql = q.lower() - hosts = {m.group(1).lower() for m in HOST_RE.finditer(ql) if m.group(1).lower().endswith("bstein.dev")} - - # Also match by known workload/service names. - for t in _tokens(ql): - if t in _NAME_INDEX: - hosts |= {ep["host"].lower() for ep in KB["catalog"].get("http_endpoints", []) if isinstance(ep, dict) and ep.get("backend", {}).get("service") == t} - - edges: list[tuple[str, str]] = [] - lines: list[str] = [] - for host in sorted(hosts): - for ep in _HOST_INDEX.get(host, []): - backend = ep.get("backend") or {} - ns = backend.get("namespace") or "" - svc = backend.get("service") or "" - path = ep.get("path") or "/" - if not svc: - continue - wk = backend.get("workloads") or [] - wk_str = ", ".join(f"{w.get('kind')}:{w.get('name')}" for w in wk if isinstance(w, dict) and w.get("name")) or "unknown" - lines.append(f"- {host}{path} → {ns}/{svc} → {wk_str}") - for w in wk: - if isinstance(w, dict) and w.get("name"): - edges.append((ns, str(w["name"]))) - if not lines: - return "", [] - return "Atlas endpoints (from GitOps):\n" + "\n".join(lines[:20]), edges - - - # Kubernetes API (read-only). RBAC is provided via ServiceAccount atlasbot. - _K8S_TOKEN: str | None = None - _K8S_CTX: ssl.SSLContext | None = None - - def _k8s_context() -> ssl.SSLContext: - global _K8S_CTX - if _K8S_CTX is not None: - return _K8S_CTX - ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - ctx = ssl.create_default_context(cafile=ca_path) - _K8S_CTX = ctx - return ctx - - def _k8s_token() -> str: - global _K8S_TOKEN - if _K8S_TOKEN: - return _K8S_TOKEN - token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" - with open(token_path, "r", encoding="utf-8") as f: - _K8S_TOKEN = f.read().strip() - return _K8S_TOKEN - - def k8s_get(path: str, timeout: int = 8) -> dict: - host = os.environ.get("KUBERNETES_SERVICE_HOST") - port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS") or os.environ.get("KUBERNETES_SERVICE_PORT") or "443" - if not host: - raise RuntimeError("k8s host missing") - url = f"https://{host}:{port}{path}" - headers = {"Authorization": f"Bearer {_k8s_token()}"} - r = request.Request(url, headers=headers, method="GET") - with request.urlopen(r, timeout=timeout, context=_k8s_context()) as resp: - raw = resp.read() - return json.loads(raw.decode()) if raw else {} - - def k8s_pods(namespace: str) -> list[dict]: - data = k8s_get(f"/api/v1/namespaces/{parse.quote(namespace)}/pods?limit=500") - items = data.get("items") or [] - return items if isinstance(items, list) else [] - - def summarize_pods(namespace: str, prefixes: set[str] | None = None) -> str: - try: - pods = k8s_pods(namespace) - except Exception: - return "" - out: list[str] = [] - for p in pods: - md = p.get("metadata") or {} - st = p.get("status") or {} - name = md.get("name") or "" - if prefixes and not any(name.startswith(pref + "-") or name == pref or name.startswith(pref) for pref in prefixes): - continue - phase = st.get("phase") or "?" - cs = st.get("containerStatuses") or [] - restarts = 0 - ready = 0 - total = 0 - reason = st.get("reason") or "" - for c in cs if isinstance(cs, list) else []: - if not isinstance(c, dict): - continue - total += 1 - restarts += int(c.get("restartCount") or 0) - if c.get("ready"): - ready += 1 - state = c.get("state") or {} - if not reason and isinstance(state, dict): - waiting = state.get("waiting") or {} - if isinstance(waiting, dict) and waiting.get("reason"): - reason = waiting.get("reason") - extra = f" ({reason})" if reason else "" - out.append(f"- {namespace}/{name}: {phase} {ready}/{total} restarts={restarts}{extra}") - return "\n".join(out[:20]) - - def flux_not_ready() -> str: - try: - data = k8s_get( - "/apis/kustomize.toolkit.fluxcd.io/v1/namespaces/flux-system/kustomizations?limit=200" - ) - except Exception: - return "" - items = data.get("items") or [] - bad: list[str] = [] - for it in items if isinstance(items, list) else []: - md = it.get("metadata") or {} - st = it.get("status") or {} - name = md.get("name") or "" - conds = st.get("conditions") or [] - ready = None - msg = "" - for c in conds if isinstance(conds, list) else []: - if isinstance(c, dict) and c.get("type") == "Ready": - ready = c.get("status") - msg = c.get("message") or "" - if ready not in ("True", True): - bad.append(f"- flux kustomization/{name}: Ready={ready} {msg}".strip()) - return "\n".join(bad[:10]) - - - # VictoriaMetrics (PromQL) helpers. - def vm_query(query: str, timeout: int = 8) -> dict | None: - try: - url = VM_URL.rstrip("/") + "/api/v1/query?" + parse.urlencode({"query": query}) - with request.urlopen(url, timeout=timeout) as resp: - return json.loads(resp.read().decode()) - except Exception: - return None - - def _vm_value_series(res: dict) -> list[dict]: - if not res or (res.get("status") != "success"): - return [] - data = res.get("data") or {} - result = data.get("result") or [] - return result if isinstance(result, list) else [] - - def vm_render_result(res: dict | None, limit: int = 12) -> str: - if not res: - return "" - series = _vm_value_series(res) - if not series: - return "" - out: list[str] = [] - for r in series[:limit]: - if not isinstance(r, dict): - continue - metric = r.get("metric") or {} - value = r.get("value") or [] - val = value[1] if isinstance(value, list) and len(value) > 1 else "" - # Prefer common labels if present. - label_parts = [] - for k in ("namespace", "pod", "container", "node", "instance", "job", "phase"): - if isinstance(metric, dict) and metric.get(k): - label_parts.append(f"{k}={metric.get(k)}") - if not label_parts and isinstance(metric, dict): - for k in sorted(metric.keys()): - if k.startswith("__"): - continue - label_parts.append(f"{k}={metric.get(k)}") - if len(label_parts) >= 4: - break - labels = ", ".join(label_parts) if label_parts else "series" - out.append(f"- {labels}: {val}") - return "\n".join(out) - - def vm_top_restarts(hours: int = 1) -> str: - q = f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{hours}h])))" - res = vm_query(q) - if not res or (res.get("status") != "success"): - return "" - out: list[str] = [] - for r in (res.get("data") or {}).get("result") or []: - if not isinstance(r, dict): - continue - m = r.get("metric") or {} - v = r.get("value") or [] - ns = (m.get("namespace") or "").strip() - pod = (m.get("pod") or "").strip() - val = v[1] if isinstance(v, list) and len(v) > 1 else "" - if pod: - out.append(f"- restarts({hours}h): {ns}/{pod} = {val}") - return "\n".join(out) - - def vm_cluster_snapshot() -> str: - parts: list[str] = [] - # Node readiness (kube-state-metrics). - ready = vm_query('sum(kube_node_status_condition{condition="Ready",status="true"})') - not_ready = vm_query('sum(kube_node_status_condition{condition="Ready",status="false"})') - if ready and not_ready: - try: - r = _vm_value_series(ready)[0]["value"][1] - nr = _vm_value_series(not_ready)[0]["value"][1] - parts.append(f"- nodes ready: {r} (not ready: {nr})") - except Exception: - pass - - phases = vm_query("sum by (phase) (kube_pod_status_phase)") - pr = vm_render_result(phases, limit=8) - if pr: - parts.append("Pod phases:") - parts.append(pr) - return "\n".join(parts).strip() - - - # Conversation state. - history = collections.defaultdict(list) # (room_id, sender|None) -> list[str] (short transcript) - - def key_for(room_id: str, sender: str, is_dm: bool): - return (room_id, None) if is_dm else (room_id, sender) - - def build_context(prompt: str, *, allow_tools: bool, targets: list[tuple[str, str]]) -> str: - parts: list[str] = [] - - kb = kb_retrieve(prompt) - if kb: - parts.append(kb) - - endpoints, edges = catalog_hints(prompt) - if endpoints: - parts.append(endpoints) - - if allow_tools: - # Scope pod summaries to relevant namespaces/workloads when possible. - prefixes_by_ns: dict[str, set[str]] = collections.defaultdict(set) - for ns, name in (targets or []) + (edges or []): - if ns and name: - prefixes_by_ns[ns].add(name) - pod_lines: list[str] = [] - for ns in sorted(prefixes_by_ns.keys()): - summary = summarize_pods(ns, prefixes_by_ns[ns]) - if summary: - pod_lines.append(f"Pods (live):\n{summary}") - if pod_lines: - parts.append("\n".join(pod_lines)[:MAX_TOOL_CHARS]) - - flux_bad = flux_not_ready() - if flux_bad: - parts.append("Flux (not ready):\n" + flux_bad) - - p_l = (prompt or "").lower() - if any(w in p_l for w in METRIC_HINT_WORDS): - restarts = vm_top_restarts(1) - if restarts: - parts.append("VictoriaMetrics (top restarts 1h):\n" + restarts) - snap = vm_cluster_snapshot() - if snap: - parts.append("VictoriaMetrics (cluster snapshot):\n" + snap) - - return "\n\n".join([p for p in parts if p]).strip() - - def ollama_reply(hist_key, prompt: str, *, context: str) -> str: - try: - system = ( - "System: You are Atlas, the Titan lab assistant for Atlas/Othrys. " - "Be helpful, direct, and concise. " - "Prefer answering with exact repo paths and Kubernetes resource names. " - "Never include or request secret values." - ) - transcript_parts = [system] - if context: - transcript_parts.append("Context (grounded):\n" + context[:MAX_KB_CHARS]) - transcript_parts.extend(history[hist_key][-24:]) - transcript_parts.append(f"User: {prompt}") - transcript = "\n".join(transcript_parts) - - payload = {"model": MODEL, "message": transcript} - headers = {"Content-Type": "application/json"} - if API_KEY: - headers["x-api-key"] = API_KEY - r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers) - with request.urlopen(r, timeout=20) as resp: - data = json.loads(resp.read().decode()) - reply = data.get("message") or data.get("response") or data.get("reply") or "I'm here to help." - history[hist_key].append(f"Atlas: {reply}") - return reply - except Exception: - return "I’m here — but I couldn’t reach the model backend." - - def sync_loop(token: str, room_id: str): - since = None - try: - res = req("GET", "/_matrix/client/v3/sync?timeout=0", token, timeout=10) - since = res.get("next_batch") - except Exception: - pass - - while True: - params = {"timeout": 30000} - if since: - params["since"] = since - query = parse.urlencode(params) - try: - res = req("GET", f"/_matrix/client/v3/sync?{query}", token, timeout=35) - except Exception: - time.sleep(5) - continue - since = res.get("next_batch", since) - - # invites - for rid, data in res.get("rooms", {}).get("invite", {}).items(): - try: - join_room(token, rid) - except Exception: - pass - - # messages - for rid, data in res.get("rooms", {}).get("join", {}).items(): - timeline = data.get("timeline", {}).get("events", []) - joined_count = data.get("summary", {}).get("m.joined_member_count") - is_dm = joined_count is not None and joined_count <= 2 - - for ev in timeline: - if ev.get("type") != "m.room.message": - continue - content = ev.get("content", {}) - body = (content.get("body", "") or "").strip() - if not body: - continue - sender = ev.get("sender", "") - if sender == f"@{USER}:live.bstein.dev": - continue - - mentioned = is_mentioned(content, body) - hist_key = key_for(rid, sender, is_dm) - history[hist_key].append(f"{sender}: {body}") - history[hist_key] = history[hist_key][-80:] - - if not (is_dm or mentioned): - continue - - # Only do live cluster/metrics introspection in DMs. - allow_tools = is_dm - - promql = "" - if allow_tools: - m = re.match(r"(?is)^\\s*promql\\s*(?:\\:|\\s)\\s*(.+?)\\s*$", body) - if m: - promql = m.group(1).strip() - - # Attempt to scope tools to the most likely workloads when hostnames are mentioned. - targets: list[tuple[str, str]] = [] - for m in HOST_RE.finditer(body.lower()): - host = m.group(1).lower() - for ep in _HOST_INDEX.get(host, []): - backend = ep.get("backend") or {} - ns = backend.get("namespace") or "" - for w in backend.get("workloads") or []: - if isinstance(w, dict) and w.get("name"): - targets.append((ns, str(w["name"]))) - - context = build_context(body, allow_tools=allow_tools, targets=targets) - if allow_tools and promql: - res = vm_query(promql, timeout=20) - rendered = vm_render_result(res, limit=15) or "(no results)" - extra = "VictoriaMetrics (PromQL result):\n" + rendered - context = (context + "\n\n" + extra).strip() if context else extra - reply = ollama_reply(hist_key, body, context=context) - send_msg(token, rid, reply) - - def login_with_retry(): - last_err = None - for attempt in range(10): - try: - return login() - except Exception as exc: # noqa: BLE001 - last_err = exc - time.sleep(min(30, 2 ** attempt)) - raise last_err - - def main(): - load_kb() - token = login_with_retry() - try: - room_id = resolve_alias(token, ROOM_ALIAS) - join_room(token, room_id) - except Exception: - room_id = None - sync_loop(token, room_id) - - if __name__ == "__main__": - main() diff --git a/services/comms/guest-register-configmap.yaml b/services/comms/guest-register-configmap.yaml deleted file mode 100644 index a40d52c..0000000 --- a/services/comms/guest-register-configmap.yaml +++ /dev/null @@ -1,271 +0,0 @@ -# services/comms/guest-register-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: matrix-guest-register -data: - server.py: | - import base64 - import json - import os - import random - import secrets - from http.server import BaseHTTPRequestHandler, HTTPServer - from urllib import error, parse, request - - MAS_BASE = os.environ.get("MAS_BASE", "http://matrix-authentication-service:8080").rstrip("/") - MAS_ADMIN_API_BASE = os.environ.get("MAS_ADMIN_API_BASE", "http://matrix-authentication-service:8081/api/admin/v1").rstrip("/") - SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") - SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") - - MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] - MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") - MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") - RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) - RATE_MAX = int(os.environ.get("RATE_MAX", "30")) - _rate = {} # ip -> [window_start, count] - - ADJ = [ - "brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty", - "amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow", - "quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind", - ] - NOUN = [ - "otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit", - "breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak", - "pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr", - ] - - def _json(method, url, *, headers=None, body=None, timeout=20): - hdrs = {"Content-Type": "application/json"} - if headers: - hdrs.update(headers) - data = None - if body is not None: - data = json.dumps(body).encode() - req = request.Request(url, data=data, headers=hdrs, method=method) - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read() - payload = json.loads(raw.decode()) if raw else {} - return resp.status, payload - except error.HTTPError as e: - raw = e.read() - try: - payload = json.loads(raw.decode()) if raw else {} - except Exception: - payload = {} - return e.code, payload - - def _form(method, url, *, headers=None, fields=None, timeout=20): - hdrs = {"Content-Type": "application/x-www-form-urlencoded"} - if headers: - hdrs.update(headers) - data = parse.urlencode(fields or {}).encode() - req = request.Request(url, data=data, headers=hdrs, method=method) - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read() - payload = json.loads(raw.decode()) if raw else {} - return resp.status, payload - except error.HTTPError as e: - raw = e.read() - try: - payload = json.loads(raw.decode()) if raw else {} - except Exception: - payload = {} - return e.code, payload - - _admin_token = None - _admin_token_at = 0.0 - - def _mas_admin_access_token(now): - global _admin_token, _admin_token_at - if _admin_token and (now - _admin_token_at) < 300: - return _admin_token - - with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: - client_secret = fh.read().strip() - basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode()).decode() - - status, payload = _form( - "POST", - f"{MAS_BASE}/oauth2/token", - headers={"Authorization": f"Basic {basic}"}, - fields={"grant_type": "client_credentials", "scope": MAS_ADMIN_SCOPE}, - timeout=20, - ) - if status != 200 or "access_token" not in payload: - raise RuntimeError("mas_admin_token_failed") - - _admin_token = payload["access_token"] - _admin_token_at = now - return _admin_token - - def _generate_localpart(): - return "guest-" + secrets.token_hex(6) - - def _generate_displayname(): - return f"{random.choice(ADJ)}-{random.choice(NOUN)}" - - def _admin_api(admin_token, method, path, body=None): - return _json( - method, - f"{MAS_ADMIN_API_BASE}{path}", - headers={"Authorization": f"Bearer {admin_token}"}, - body=body, - timeout=20, - ) - - def _create_user(admin_token, username): - status, payload = _admin_api(admin_token, "POST", "/users", {"username": username}) - if status != 201: - return status, None - user = payload.get("data") or {} - return status, user.get("id") - - def _set_password(admin_token, user_id, password): - status, _payload = _admin_api( - admin_token, - "POST", - f"/users/{parse.quote(user_id)}/set-password", - {"password": password}, - ) - return status in (200, 204) - - def _login_password(username, password): - payload = { - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": f"@{username}:{SERVER_NAME}"}, - "password": password, - } - status, data = _json( - "POST", - f"{MAS_BASE}/_matrix/client/v3/login", - body=payload, - timeout=20, - ) - if status != 200: - return None, None - return data.get("access_token"), data.get("device_id") - - def _set_display_name(access_token, user_id, displayname): - _json( - "PUT", - f"{SYNAPSE_BASE}/_matrix/client/v3/profile/{parse.quote(user_id, safe='')}/displayname", - headers={"Authorization": f"Bearer {access_token}"}, - body={"displayname": displayname}, - timeout=20, - ) - - def _rate_check(ip, now): - win, cnt = _rate.get(ip, (now, 0)) - if now - win > RATE_WINDOW_SEC: - _rate[ip] = (now, 1) - return True - if cnt >= RATE_MAX: - return False - _rate[ip] = (win, cnt + 1) - return True - - class Handler(BaseHTTPRequestHandler): - server_version = "matrix-guest-register" - - def _send_json(self, code, payload): - body = json.dumps(payload).encode() - self.send_response(code) - self.send_header("Content-Type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") - self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") - self.send_header("Content-Length", str(len(body))) - self.end_headers() - self.wfile.write(body) - - def do_OPTIONS(self): # noqa: N802 - self.send_response(204) - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") - self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") - self.end_headers() - - def do_GET(self): # noqa: N802 - parsed = parse.urlparse(self.path) - if parsed.path in ("/healthz", "/"): - return self._send_json(200, {"ok": True}) - if parsed.path in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): - return self._send_json(200, {"flows": [{"stages": []}]}) - return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) - - def do_POST(self): # noqa: N802 - parsed = parse.urlparse(self.path) - if parsed.path not in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): - return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) - - qs = parse.parse_qs(parsed.query) - kind = (qs.get("kind") or ["user"])[0] - if kind != "guest": - return self._send_json( - 403, - { - "errcode": "M_FORBIDDEN", - "error": "Registration is disabled; use https://bstein.dev/request-access for accounts.", - }, - ) - - xfwd = self.headers.get("x-forwarded-for", "") - ip = (xfwd.split(",")[0].strip() if xfwd else "") or self.client_address[0] - now = __import__("time").time() - if not _rate_check(ip, now): - return self._send_json(429, {"errcode": "M_LIMIT_EXCEEDED", "error": "rate_limited"}) - - length = int(self.headers.get("content-length", "0") or "0") - raw = self.rfile.read(length) if length else b"{}" - try: - body = json.loads(raw.decode()) if raw else {} - if not isinstance(body, dict): - body = {} - except Exception: - body = {} - try: - admin_token = _mas_admin_access_token(now) - displayname = _generate_displayname() - - localpart = None - mas_user_id = None - for _ in range(5): - localpart = _generate_localpart() - status, mas_user_id = _create_user(admin_token, localpart) - if status == 201 and mas_user_id: - break - mas_user_id = None - if not mas_user_id or not localpart: - raise RuntimeError("add_user_failed") - - password = secrets.token_urlsafe(18) - if not _set_password(admin_token, mas_user_id, password): - raise RuntimeError("set_password_failed") - access_token, device_id = _login_password(localpart, password) - if not access_token: - raise RuntimeError("login_failed") - try: - _set_display_name(access_token, f"@{localpart}:{SERVER_NAME}", displayname) - except Exception: - pass - except Exception: - return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) - - resp = { - "user_id": f"@{localpart}:{SERVER_NAME}", - "access_token": access_token, - "device_id": device_id or "guest_device", - "home_server": SERVER_NAME, - } - return self._send_json(200, resp) - - def main(): - port = int(os.environ.get("PORT", "8080")) - HTTPServer(("0.0.0.0", port), Handler).serve_forever() - - if __name__ == "__main__": - main() diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 6b69c1e..2008843 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -9,10 +9,8 @@ resources: - livekit-config.yaml - element-call-config.yaml - element-call-deployment.yaml - - guest-register-configmap.yaml - guest-register-deployment.yaml - guest-register-service.yaml - - atlasbot-configmap.yaml - atlasbot-deployment.yaml - wellknown.yaml - atlasbot-rbac.yaml @@ -45,6 +43,36 @@ patches: - path: synapse-deployment-strategy-patch.yaml configMapGenerator: + - name: matrix-guest-register + files: + - server.py=scripts/guest-register/server.py + options: + disableNameSuffixHash: true + - name: atlasbot + files: + - bot.py=scripts/atlasbot/bot.py + options: + disableNameSuffixHash: true + - name: othrys-synapse-redis-health + files: + - ping_readiness_local.sh=scripts/synapse/redis/ping_readiness_local.sh + - ping_liveness_local.sh=scripts/synapse/redis/ping_liveness_local.sh + - ping_readiness_master.sh=scripts/synapse/redis/ping_readiness_master.sh + - ping_liveness_master.sh=scripts/synapse/redis/ping_liveness_master.sh + - ping_readiness_local_and_master.sh=scripts/synapse/redis/ping_readiness_local_and_master.sh + - ping_liveness_local_and_master.sh=scripts/synapse/redis/ping_liveness_local_and_master.sh + options: + disableNameSuffixHash: true + - name: othrys-synapse-redis-scripts + files: + - start-master.sh=scripts/synapse/redis/start-master.sh + options: + disableNameSuffixHash: true + - name: othrys-synapse-matrix-synapse-scripts + files: + - signing-key.sh=scripts/synapse/signing-key.sh + options: + disableNameSuffixHash: true - name: atlas-kb files: - INDEX.md=knowledge/INDEX.md diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py new file mode 100644 index 0000000..e8bd1a8 --- /dev/null +++ b/services/comms/scripts/atlasbot/bot.py @@ -0,0 +1,622 @@ +import collections +import json +import os +import re +import ssl +import time +from typing import Any +from urllib import error, parse, request + +BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008") +AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080") +USER = os.environ["BOT_USER"] +PASSWORD = os.environ["BOT_PASS"] +ROOM_ALIAS = "#othrys:live.bstein.dev" + +OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") +MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") +API_KEY = os.environ.get("CHAT_API_KEY", "") + +KB_DIR = os.environ.get("KB_DIR", "") +VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428") + +BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{USER},atlas") +SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") + +MAX_KB_CHARS = int(os.environ.get("ATLASBOT_MAX_KB_CHARS", "2500")) +MAX_TOOL_CHARS = int(os.environ.get("ATLASBOT_MAX_TOOL_CHARS", "2500")) + +TOKEN_RE = re.compile(r"[a-z0-9][a-z0-9_.-]{1,}", re.IGNORECASE) +HOST_RE = re.compile(r"(?i)([a-z0-9-]+(?:\\.[a-z0-9-]+)+)") +STOPWORDS = { + "the", + "and", + "for", + "with", + "this", + "that", + "from", + "into", + "what", + "how", + "why", + "when", + "where", + "which", + "who", + "can", + "could", + "should", + "would", + "please", + "help", + "atlas", + "othrys", +} + +METRIC_HINT_WORDS = { + "health", + "status", + "down", + "slow", + "error", + "unknown_error", + "timeout", + "crash", + "crashloop", + "restart", + "restarts", + "pending", + "unreachable", + "latency", +} + +def _tokens(text: str) -> list[str]: + toks = [t.lower() for t in TOKEN_RE.findall(text or "")] + return [t for t in toks if t not in STOPWORDS and len(t) >= 2] + + +# Mention detection (Matrix rich mentions + plain @atlas). +MENTION_TOKENS = [m.strip() for m in BOT_MENTIONS.split(",") if m.strip()] +MENTION_LOCALPARTS = [m.lstrip("@").split(":", 1)[0] for m in MENTION_TOKENS] +MENTION_RE = re.compile( + r"(? str: + t = token.strip() + if not t: + return "" + if t.startswith("@") and ":" in t: + return t + t = t.lstrip("@") + if ":" in t: + return f"@{t}" + return f"@{t}:{SERVER_NAME}" + +MENTION_USER_IDS = {normalize_user_id(t).lower() for t in MENTION_TOKENS if normalize_user_id(t)} + +def is_mentioned(content: dict, body: str) -> bool: + if MENTION_RE.search(body or "") is not None: + return True + mentions = content.get("m.mentions", {}) + user_ids = mentions.get("user_ids", []) + if not isinstance(user_ids, list): + return False + return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids) + + +# Matrix HTTP helper. +def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): + url = (base or BASE) + path + data = None + headers = {} + if body is not None: + data = json.dumps(body).encode() + headers["Content-Type"] = "application/json" + if token: + headers["Authorization"] = f"Bearer {token}" + r = request.Request(url, data=data, headers=headers, method=method) + with request.urlopen(r, timeout=timeout) as resp: + raw = resp.read() + return json.loads(raw.decode()) if raw else {} + +def login() -> str: + login_user = normalize_user_id(USER) + payload = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": login_user}, + "password": PASSWORD, + } + res = req("POST", "/_matrix/client/v3/login", body=payload, base=AUTH_BASE) + return res["access_token"] + +def resolve_alias(token: str, alias: str) -> str: + enc = parse.quote(alias) + res = req("GET", f"/_matrix/client/v3/directory/room/{enc}", token) + return res["room_id"] + +def join_room(token: str, room: str): + req("POST", f"/_matrix/client/v3/rooms/{parse.quote(room)}/join", token, body={}) + +def send_msg(token: str, room: str, text: str): + path = f"/_matrix/client/v3/rooms/{parse.quote(room)}/send/m.room.message" + req("POST", path, token, body={"msgtype": "m.text", "body": text}) + + +# Atlas KB loader (no external deps; files are pre-rendered JSON via scripts/knowledge_render_atlas.py). +KB = {"catalog": {}, "runbooks": []} +_HOST_INDEX: dict[str, list[dict]] = {} +_NAME_INDEX: set[str] = set() + +def _load_json_file(path: str) -> Any | None: + try: + with open(path, "rb") as f: + return json.loads(f.read().decode("utf-8")) + except Exception: + return None + +def load_kb(): + global KB, _HOST_INDEX, _NAME_INDEX + if not KB_DIR: + return + catalog = _load_json_file(os.path.join(KB_DIR, "catalog", "atlas.json")) or {} + runbooks = _load_json_file(os.path.join(KB_DIR, "catalog", "runbooks.json")) or [] + KB = {"catalog": catalog, "runbooks": runbooks} + + host_index: dict[str, list[dict]] = collections.defaultdict(list) + for ep in catalog.get("http_endpoints", []) if isinstance(catalog, dict) else []: + host = (ep.get("host") or "").lower() + if host: + host_index[host].append(ep) + _HOST_INDEX = {k: host_index[k] for k in sorted(host_index.keys())} + + names: set[str] = set() + for s in catalog.get("services", []) if isinstance(catalog, dict) else []: + if isinstance(s, dict) and s.get("name"): + names.add(str(s["name"]).lower()) + for w in catalog.get("workloads", []) if isinstance(catalog, dict) else []: + if isinstance(w, dict) and w.get("name"): + names.add(str(w["name"]).lower()) + _NAME_INDEX = names + +def kb_retrieve(query: str, *, limit: int = 3) -> str: + q = (query or "").strip() + if not q or not KB.get("runbooks"): + return "" + ql = q.lower() + q_tokens = _tokens(q) + if not q_tokens: + return "" + + scored: list[tuple[int, dict]] = [] + for doc in KB.get("runbooks", []): + if not isinstance(doc, dict): + continue + title = str(doc.get("title") or "") + body = str(doc.get("body") or "") + tags = doc.get("tags") or [] + entrypoints = doc.get("entrypoints") or [] + hay = (title + "\n" + " ".join(tags) + "\n" + " ".join(entrypoints) + "\n" + body).lower() + score = 0 + for t in set(q_tokens): + if t in hay: + score += 3 if t in title.lower() else 1 + for h in entrypoints: + if isinstance(h, str) and h.lower() in ql: + score += 4 + if score: + scored.append((score, doc)) + + scored.sort(key=lambda x: x[0], reverse=True) + picked = [d for _, d in scored[:limit]] + if not picked: + return "" + + parts: list[str] = ["Atlas KB (retrieved):"] + used = 0 + for d in picked: + path = d.get("path") or "" + title = d.get("title") or path + body = (d.get("body") or "").strip() + snippet = body[:900].strip() + chunk = f"- {title} ({path})\n{snippet}" + if used + len(chunk) > MAX_KB_CHARS: + break + parts.append(chunk) + used += len(chunk) + return "\n".join(parts).strip() + +def catalog_hints(query: str) -> tuple[str, list[tuple[str, str]]]: + q = (query or "").strip() + if not q or not KB.get("catalog"): + return "", [] + ql = q.lower() + hosts = {m.group(1).lower() for m in HOST_RE.finditer(ql) if m.group(1).lower().endswith("bstein.dev")} + + # Also match by known workload/service names. + for t in _tokens(ql): + if t in _NAME_INDEX: + hosts |= {ep["host"].lower() for ep in KB["catalog"].get("http_endpoints", []) if isinstance(ep, dict) and ep.get("backend", {}).get("service") == t} + + edges: list[tuple[str, str]] = [] + lines: list[str] = [] + for host in sorted(hosts): + for ep in _HOST_INDEX.get(host, []): + backend = ep.get("backend") or {} + ns = backend.get("namespace") or "" + svc = backend.get("service") or "" + path = ep.get("path") or "/" + if not svc: + continue + wk = backend.get("workloads") or [] + wk_str = ", ".join(f"{w.get('kind')}:{w.get('name')}" for w in wk if isinstance(w, dict) and w.get("name")) or "unknown" + lines.append(f"- {host}{path} → {ns}/{svc} → {wk_str}") + for w in wk: + if isinstance(w, dict) and w.get("name"): + edges.append((ns, str(w["name"]))) + if not lines: + return "", [] + return "Atlas endpoints (from GitOps):\n" + "\n".join(lines[:20]), edges + + +# Kubernetes API (read-only). RBAC is provided via ServiceAccount atlasbot. +_K8S_TOKEN: str | None = None +_K8S_CTX: ssl.SSLContext | None = None + +def _k8s_context() -> ssl.SSLContext: + global _K8S_CTX + if _K8S_CTX is not None: + return _K8S_CTX + ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + ctx = ssl.create_default_context(cafile=ca_path) + _K8S_CTX = ctx + return ctx + +def _k8s_token() -> str: + global _K8S_TOKEN + if _K8S_TOKEN: + return _K8S_TOKEN + token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" + with open(token_path, "r", encoding="utf-8") as f: + _K8S_TOKEN = f.read().strip() + return _K8S_TOKEN + +def k8s_get(path: str, timeout: int = 8) -> dict: + host = os.environ.get("KUBERNETES_SERVICE_HOST") + port = os.environ.get("KUBERNETES_SERVICE_PORT_HTTPS") or os.environ.get("KUBERNETES_SERVICE_PORT") or "443" + if not host: + raise RuntimeError("k8s host missing") + url = f"https://{host}:{port}{path}" + headers = {"Authorization": f"Bearer {_k8s_token()}"} + r = request.Request(url, headers=headers, method="GET") + with request.urlopen(r, timeout=timeout, context=_k8s_context()) as resp: + raw = resp.read() + return json.loads(raw.decode()) if raw else {} + +def k8s_pods(namespace: str) -> list[dict]: + data = k8s_get(f"/api/v1/namespaces/{parse.quote(namespace)}/pods?limit=500") + items = data.get("items") or [] + return items if isinstance(items, list) else [] + +def summarize_pods(namespace: str, prefixes: set[str] | None = None) -> str: + try: + pods = k8s_pods(namespace) + except Exception: + return "" + out: list[str] = [] + for p in pods: + md = p.get("metadata") or {} + st = p.get("status") or {} + name = md.get("name") or "" + if prefixes and not any(name.startswith(pref + "-") or name == pref or name.startswith(pref) for pref in prefixes): + continue + phase = st.get("phase") or "?" + cs = st.get("containerStatuses") or [] + restarts = 0 + ready = 0 + total = 0 + reason = st.get("reason") or "" + for c in cs if isinstance(cs, list) else []: + if not isinstance(c, dict): + continue + total += 1 + restarts += int(c.get("restartCount") or 0) + if c.get("ready"): + ready += 1 + state = c.get("state") or {} + if not reason and isinstance(state, dict): + waiting = state.get("waiting") or {} + if isinstance(waiting, dict) and waiting.get("reason"): + reason = waiting.get("reason") + extra = f" ({reason})" if reason else "" + out.append(f"- {namespace}/{name}: {phase} {ready}/{total} restarts={restarts}{extra}") + return "\n".join(out[:20]) + +def flux_not_ready() -> str: + try: + data = k8s_get( + "/apis/kustomize.toolkit.fluxcd.io/v1/namespaces/flux-system/kustomizations?limit=200" + ) + except Exception: + return "" + items = data.get("items") or [] + bad: list[str] = [] + for it in items if isinstance(items, list) else []: + md = it.get("metadata") or {} + st = it.get("status") or {} + name = md.get("name") or "" + conds = st.get("conditions") or [] + ready = None + msg = "" + for c in conds if isinstance(conds, list) else []: + if isinstance(c, dict) and c.get("type") == "Ready": + ready = c.get("status") + msg = c.get("message") or "" + if ready not in ("True", True): + bad.append(f"- flux kustomization/{name}: Ready={ready} {msg}".strip()) + return "\n".join(bad[:10]) + + +# VictoriaMetrics (PromQL) helpers. +def vm_query(query: str, timeout: int = 8) -> dict | None: + try: + url = VM_URL.rstrip("/") + "/api/v1/query?" + parse.urlencode({"query": query}) + with request.urlopen(url, timeout=timeout) as resp: + return json.loads(resp.read().decode()) + except Exception: + return None + +def _vm_value_series(res: dict) -> list[dict]: + if not res or (res.get("status") != "success"): + return [] + data = res.get("data") or {} + result = data.get("result") or [] + return result if isinstance(result, list) else [] + +def vm_render_result(res: dict | None, limit: int = 12) -> str: + if not res: + return "" + series = _vm_value_series(res) + if not series: + return "" + out: list[str] = [] + for r in series[:limit]: + if not isinstance(r, dict): + continue + metric = r.get("metric") or {} + value = r.get("value") or [] + val = value[1] if isinstance(value, list) and len(value) > 1 else "" + # Prefer common labels if present. + label_parts = [] + for k in ("namespace", "pod", "container", "node", "instance", "job", "phase"): + if isinstance(metric, dict) and metric.get(k): + label_parts.append(f"{k}={metric.get(k)}") + if not label_parts and isinstance(metric, dict): + for k in sorted(metric.keys()): + if k.startswith("__"): + continue + label_parts.append(f"{k}={metric.get(k)}") + if len(label_parts) >= 4: + break + labels = ", ".join(label_parts) if label_parts else "series" + out.append(f"- {labels}: {val}") + return "\n".join(out) + +def vm_top_restarts(hours: int = 1) -> str: + q = f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{hours}h])))" + res = vm_query(q) + if not res or (res.get("status") != "success"): + return "" + out: list[str] = [] + for r in (res.get("data") or {}).get("result") or []: + if not isinstance(r, dict): + continue + m = r.get("metric") or {} + v = r.get("value") or [] + ns = (m.get("namespace") or "").strip() + pod = (m.get("pod") or "").strip() + val = v[1] if isinstance(v, list) and len(v) > 1 else "" + if pod: + out.append(f"- restarts({hours}h): {ns}/{pod} = {val}") + return "\n".join(out) + +def vm_cluster_snapshot() -> str: + parts: list[str] = [] + # Node readiness (kube-state-metrics). + ready = vm_query('sum(kube_node_status_condition{condition="Ready",status="true"})') + not_ready = vm_query('sum(kube_node_status_condition{condition="Ready",status="false"})') + if ready and not_ready: + try: + r = _vm_value_series(ready)[0]["value"][1] + nr = _vm_value_series(not_ready)[0]["value"][1] + parts.append(f"- nodes ready: {r} (not ready: {nr})") + except Exception: + pass + + phases = vm_query("sum by (phase) (kube_pod_status_phase)") + pr = vm_render_result(phases, limit=8) + if pr: + parts.append("Pod phases:") + parts.append(pr) + return "\n".join(parts).strip() + + +# Conversation state. +history = collections.defaultdict(list) # (room_id, sender|None) -> list[str] (short transcript) + +def key_for(room_id: str, sender: str, is_dm: bool): + return (room_id, None) if is_dm else (room_id, sender) + +def build_context(prompt: str, *, allow_tools: bool, targets: list[tuple[str, str]]) -> str: + parts: list[str] = [] + + kb = kb_retrieve(prompt) + if kb: + parts.append(kb) + + endpoints, edges = catalog_hints(prompt) + if endpoints: + parts.append(endpoints) + + if allow_tools: + # Scope pod summaries to relevant namespaces/workloads when possible. + prefixes_by_ns: dict[str, set[str]] = collections.defaultdict(set) + for ns, name in (targets or []) + (edges or []): + if ns and name: + prefixes_by_ns[ns].add(name) + pod_lines: list[str] = [] + for ns in sorted(prefixes_by_ns.keys()): + summary = summarize_pods(ns, prefixes_by_ns[ns]) + if summary: + pod_lines.append(f"Pods (live):\n{summary}") + if pod_lines: + parts.append("\n".join(pod_lines)[:MAX_TOOL_CHARS]) + + flux_bad = flux_not_ready() + if flux_bad: + parts.append("Flux (not ready):\n" + flux_bad) + + p_l = (prompt or "").lower() + if any(w in p_l for w in METRIC_HINT_WORDS): + restarts = vm_top_restarts(1) + if restarts: + parts.append("VictoriaMetrics (top restarts 1h):\n" + restarts) + snap = vm_cluster_snapshot() + if snap: + parts.append("VictoriaMetrics (cluster snapshot):\n" + snap) + + return "\n\n".join([p for p in parts if p]).strip() + +def ollama_reply(hist_key, prompt: str, *, context: str) -> str: + try: + system = ( + "System: You are Atlas, the Titan lab assistant for Atlas/Othrys. " + "Be helpful, direct, and concise. " + "Prefer answering with exact repo paths and Kubernetes resource names. " + "Never include or request secret values." + ) + transcript_parts = [system] + if context: + transcript_parts.append("Context (grounded):\n" + context[:MAX_KB_CHARS]) + transcript_parts.extend(history[hist_key][-24:]) + transcript_parts.append(f"User: {prompt}") + transcript = "\n".join(transcript_parts) + + payload = {"model": MODEL, "message": transcript} + headers = {"Content-Type": "application/json"} + if API_KEY: + headers["x-api-key"] = API_KEY + r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers) + with request.urlopen(r, timeout=20) as resp: + data = json.loads(resp.read().decode()) + reply = data.get("message") or data.get("response") or data.get("reply") or "I'm here to help." + history[hist_key].append(f"Atlas: {reply}") + return reply + except Exception: + return "I’m here — but I couldn’t reach the model backend." + +def sync_loop(token: str, room_id: str): + since = None + try: + res = req("GET", "/_matrix/client/v3/sync?timeout=0", token, timeout=10) + since = res.get("next_batch") + except Exception: + pass + + while True: + params = {"timeout": 30000} + if since: + params["since"] = since + query = parse.urlencode(params) + try: + res = req("GET", f"/_matrix/client/v3/sync?{query}", token, timeout=35) + except Exception: + time.sleep(5) + continue + since = res.get("next_batch", since) + + # invites + for rid, data in res.get("rooms", {}).get("invite", {}).items(): + try: + join_room(token, rid) + except Exception: + pass + + # messages + for rid, data in res.get("rooms", {}).get("join", {}).items(): + timeline = data.get("timeline", {}).get("events", []) + joined_count = data.get("summary", {}).get("m.joined_member_count") + is_dm = joined_count is not None and joined_count <= 2 + + for ev in timeline: + if ev.get("type") != "m.room.message": + continue + content = ev.get("content", {}) + body = (content.get("body", "") or "").strip() + if not body: + continue + sender = ev.get("sender", "") + if sender == f"@{USER}:live.bstein.dev": + continue + + mentioned = is_mentioned(content, body) + hist_key = key_for(rid, sender, is_dm) + history[hist_key].append(f"{sender}: {body}") + history[hist_key] = history[hist_key][-80:] + + if not (is_dm or mentioned): + continue + + # Only do live cluster/metrics introspection in DMs. + allow_tools = is_dm + + promql = "" + if allow_tools: + m = re.match(r"(?is)^\\s*promql\\s*(?:\\:|\\s)\\s*(.+?)\\s*$", body) + if m: + promql = m.group(1).strip() + + # Attempt to scope tools to the most likely workloads when hostnames are mentioned. + targets: list[tuple[str, str]] = [] + for m in HOST_RE.finditer(body.lower()): + host = m.group(1).lower() + for ep in _HOST_INDEX.get(host, []): + backend = ep.get("backend") or {} + ns = backend.get("namespace") or "" + for w in backend.get("workloads") or []: + if isinstance(w, dict) and w.get("name"): + targets.append((ns, str(w["name"]))) + + context = build_context(body, allow_tools=allow_tools, targets=targets) + if allow_tools and promql: + res = vm_query(promql, timeout=20) + rendered = vm_render_result(res, limit=15) or "(no results)" + extra = "VictoriaMetrics (PromQL result):\n" + rendered + context = (context + "\n\n" + extra).strip() if context else extra + reply = ollama_reply(hist_key, body, context=context) + send_msg(token, rid, reply) + +def login_with_retry(): + last_err = None + for attempt in range(10): + try: + return login() + except Exception as exc: # noqa: BLE001 + last_err = exc + time.sleep(min(30, 2 ** attempt)) + raise last_err + +def main(): + load_kb() + token = login_with_retry() + try: + room_id = resolve_alias(token, ROOM_ALIAS) + join_room(token, room_id) + except Exception: + room_id = None + sync_loop(token, room_id) + +if __name__ == "__main__": + main() diff --git a/services/comms/scripts/guest-register/server.py b/services/comms/scripts/guest-register/server.py new file mode 100644 index 0000000..0e1fb4c --- /dev/null +++ b/services/comms/scripts/guest-register/server.py @@ -0,0 +1,264 @@ +import base64 +import json +import os +import random +import secrets +from http.server import BaseHTTPRequestHandler, HTTPServer +from urllib import error, parse, request + +MAS_BASE = os.environ.get("MAS_BASE", "http://matrix-authentication-service:8080").rstrip("/") +MAS_ADMIN_API_BASE = os.environ.get("MAS_ADMIN_API_BASE", "http://matrix-authentication-service:8081/api/admin/v1").rstrip("/") +SYNAPSE_BASE = os.environ.get("SYNAPSE_BASE", "http://othrys-synapse-matrix-synapse:8008").rstrip("/") +SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev") + +MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] +MAS_ADMIN_CLIENT_SECRET_FILE = os.environ.get("MAS_ADMIN_CLIENT_SECRET_FILE", "/etc/mas/admin-client/client_secret") +MAS_ADMIN_SCOPE = os.environ.get("MAS_ADMIN_SCOPE", "urn:mas:admin") +RATE_WINDOW_SEC = int(os.environ.get("RATE_WINDOW_SEC", "60")) +RATE_MAX = int(os.environ.get("RATE_MAX", "30")) +_rate = {} # ip -> [window_start, count] + +ADJ = [ + "brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty", + "amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow", + "quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind", +] +NOUN = [ + "otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit", + "breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak", + "pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr", +] + +def _json(method, url, *, headers=None, body=None, timeout=20): + hdrs = {"Content-Type": "application/json"} + if headers: + hdrs.update(headers) + data = None + if body is not None: + data = json.dumps(body).encode() + req = request.Request(url, data=data, headers=hdrs, method=method) + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload + except error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode()) if raw else {} + except Exception: + payload = {} + return e.code, payload + +def _form(method, url, *, headers=None, fields=None, timeout=20): + hdrs = {"Content-Type": "application/x-www-form-urlencoded"} + if headers: + hdrs.update(headers) + data = parse.urlencode(fields or {}).encode() + req = request.Request(url, data=data, headers=hdrs, method=method) + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload + except error.HTTPError as e: + raw = e.read() + try: + payload = json.loads(raw.decode()) if raw else {} + except Exception: + payload = {} + return e.code, payload + +_admin_token = None +_admin_token_at = 0.0 + +def _mas_admin_access_token(now): + global _admin_token, _admin_token_at + if _admin_token and (now - _admin_token_at) < 300: + return _admin_token + + with open(MAS_ADMIN_CLIENT_SECRET_FILE, encoding="utf-8") as fh: + client_secret = fh.read().strip() + basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{client_secret}".encode()).decode() + + status, payload = _form( + "POST", + f"{MAS_BASE}/oauth2/token", + headers={"Authorization": f"Basic {basic}"}, + fields={"grant_type": "client_credentials", "scope": MAS_ADMIN_SCOPE}, + timeout=20, + ) + if status != 200 or "access_token" not in payload: + raise RuntimeError("mas_admin_token_failed") + + _admin_token = payload["access_token"] + _admin_token_at = now + return _admin_token + +def _generate_localpart(): + return "guest-" + secrets.token_hex(6) + +def _generate_displayname(): + return f"{random.choice(ADJ)}-{random.choice(NOUN)}" + +def _admin_api(admin_token, method, path, body=None): + return _json( + method, + f"{MAS_ADMIN_API_BASE}{path}", + headers={"Authorization": f"Bearer {admin_token}"}, + body=body, + timeout=20, + ) + +def _create_user(admin_token, username): + status, payload = _admin_api(admin_token, "POST", "/users", {"username": username}) + if status != 201: + return status, None + user = payload.get("data") or {} + return status, user.get("id") + +def _set_password(admin_token, user_id, password): + status, _payload = _admin_api( + admin_token, + "POST", + f"/users/{parse.quote(user_id)}/set-password", + {"password": password}, + ) + return status in (200, 204) + +def _login_password(username, password): + payload = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": f"@{username}:{SERVER_NAME}"}, + "password": password, + } + status, data = _json( + "POST", + f"{MAS_BASE}/_matrix/client/v3/login", + body=payload, + timeout=20, + ) + if status != 200: + return None, None + return data.get("access_token"), data.get("device_id") + +def _set_display_name(access_token, user_id, displayname): + _json( + "PUT", + f"{SYNAPSE_BASE}/_matrix/client/v3/profile/{parse.quote(user_id, safe='')}/displayname", + headers={"Authorization": f"Bearer {access_token}"}, + body={"displayname": displayname}, + timeout=20, + ) + +def _rate_check(ip, now): + win, cnt = _rate.get(ip, (now, 0)) + if now - win > RATE_WINDOW_SEC: + _rate[ip] = (now, 1) + return True + if cnt >= RATE_MAX: + return False + _rate[ip] = (win, cnt + 1) + return True + +class Handler(BaseHTTPRequestHandler): + server_version = "matrix-guest-register" + + def _send_json(self, code, payload): + body = json.dumps(payload).encode() + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_OPTIONS(self): # noqa: N802 + self.send_response(204) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") + self.end_headers() + + def do_GET(self): # noqa: N802 + parsed = parse.urlparse(self.path) + if parsed.path in ("/healthz", "/"): + return self._send_json(200, {"ok": True}) + if parsed.path in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): + return self._send_json(200, {"flows": [{"stages": []}]}) + return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) + + def do_POST(self): # noqa: N802 + parsed = parse.urlparse(self.path) + if parsed.path not in ("/_matrix/client/v3/register", "/_matrix/client/r0/register"): + return self._send_json(404, {"errcode": "M_NOT_FOUND", "error": "not_found"}) + + qs = parse.parse_qs(parsed.query) + kind = (qs.get("kind") or ["user"])[0] + if kind != "guest": + return self._send_json( + 403, + { + "errcode": "M_FORBIDDEN", + "error": "Registration is disabled; use https://bstein.dev/request-access for accounts.", + }, + ) + + xfwd = self.headers.get("x-forwarded-for", "") + ip = (xfwd.split(",")[0].strip() if xfwd else "") or self.client_address[0] + now = __import__("time").time() + if not _rate_check(ip, now): + return self._send_json(429, {"errcode": "M_LIMIT_EXCEEDED", "error": "rate_limited"}) + + length = int(self.headers.get("content-length", "0") or "0") + raw = self.rfile.read(length) if length else b"{}" + try: + body = json.loads(raw.decode()) if raw else {} + if not isinstance(body, dict): + body = {} + except Exception: + body = {} + try: + admin_token = _mas_admin_access_token(now) + displayname = _generate_displayname() + + localpart = None + mas_user_id = None + for _ in range(5): + localpart = _generate_localpart() + status, mas_user_id = _create_user(admin_token, localpart) + if status == 201 and mas_user_id: + break + mas_user_id = None + if not mas_user_id or not localpart: + raise RuntimeError("add_user_failed") + + password = secrets.token_urlsafe(18) + if not _set_password(admin_token, mas_user_id, password): + raise RuntimeError("set_password_failed") + access_token, device_id = _login_password(localpart, password) + if not access_token: + raise RuntimeError("login_failed") + try: + _set_display_name(access_token, f"@{localpart}:{SERVER_NAME}", displayname) + except Exception: + pass + except Exception: + return self._send_json(502, {"errcode": "M_UNKNOWN", "error": "guest_provision_failed"}) + + resp = { + "user_id": f"@{localpart}:{SERVER_NAME}", + "access_token": access_token, + "device_id": device_id or "guest_device", + "home_server": SERVER_NAME, + } + return self._send_json(200, resp) + +def main(): + port = int(os.environ.get("PORT", "8080")) + HTTPServer(("0.0.0.0", port), Handler).serve_forever() + +if __name__ == "__main__": + main() diff --git a/services/comms/scripts/synapse/redis/ping_liveness_local.sh b/services/comms/scripts/synapse/redis/ping_liveness_local.sh new file mode 100644 index 0000000..964e552 --- /dev/null +++ b/services/comms/scripts/synapse/redis/ping_liveness_local.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +[[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" +[[ -n "$REDIS_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_PASSWORD" +response=$( + timeout -s 15 $1 \ + redis-cli \ + -h localhost \ + -p $REDIS_PORT \ + ping +) +if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 +fi +responseFirstWord=$(echo $response | head -n1 | awk '{print $1;}') +if [ "$response" != "PONG" ] && [ "$responseFirstWord" != "LOADING" ] && [ "$responseFirstWord" != "MASTERDOWN" ]; then + echo "$response" + exit 1 +fi \ No newline at end of file diff --git a/services/comms/scripts/synapse/redis/ping_liveness_local_and_master.sh b/services/comms/scripts/synapse/redis/ping_liveness_local_and_master.sh new file mode 100644 index 0000000..c343f82 --- /dev/null +++ b/services/comms/scripts/synapse/redis/ping_liveness_local_and_master.sh @@ -0,0 +1,5 @@ +script_dir="$(dirname "$0")" +exit_status=0 +"$script_dir/ping_liveness_local.sh" $1 || exit_status=$? +"$script_dir/ping_liveness_master.sh" $1 || exit_status=$? +exit $exit_status \ No newline at end of file diff --git a/services/comms/scripts/synapse/redis/ping_liveness_master.sh b/services/comms/scripts/synapse/redis/ping_liveness_master.sh new file mode 100644 index 0000000..849982a --- /dev/null +++ b/services/comms/scripts/synapse/redis/ping_liveness_master.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +[[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")" +[[ -n "$REDIS_MASTER_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_MASTER_PASSWORD" +response=$( + timeout -s 15 $1 \ + redis-cli \ + -h $REDIS_MASTER_HOST \ + -p $REDIS_MASTER_PORT_NUMBER \ + ping +) +if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 +fi +responseFirstWord=$(echo $response | head -n1 | awk '{print $1;}') +if [ "$response" != "PONG" ] && [ "$responseFirstWord" != "LOADING" ]; then + echo "$response" + exit 1 +fi \ No newline at end of file diff --git a/services/comms/scripts/synapse/redis/ping_readiness_local.sh b/services/comms/scripts/synapse/redis/ping_readiness_local.sh new file mode 100644 index 0000000..080273f --- /dev/null +++ b/services/comms/scripts/synapse/redis/ping_readiness_local.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +[[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" +[[ -n "$REDIS_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_PASSWORD" +response=$( + timeout -s 15 $1 \ + redis-cli \ + -h localhost \ + -p $REDIS_PORT \ + ping +) +if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 +fi +if [ "$response" != "PONG" ]; then + echo "$response" + exit 1 +fi \ No newline at end of file diff --git a/services/comms/scripts/synapse/redis/ping_readiness_local_and_master.sh b/services/comms/scripts/synapse/redis/ping_readiness_local_and_master.sh new file mode 100644 index 0000000..0ba63cc --- /dev/null +++ b/services/comms/scripts/synapse/redis/ping_readiness_local_and_master.sh @@ -0,0 +1,5 @@ +script_dir="$(dirname "$0")" +exit_status=0 +"$script_dir/ping_readiness_local.sh" $1 || exit_status=$? +"$script_dir/ping_readiness_master.sh" $1 || exit_status=$? +exit $exit_status \ No newline at end of file diff --git a/services/comms/scripts/synapse/redis/ping_readiness_master.sh b/services/comms/scripts/synapse/redis/ping_readiness_master.sh new file mode 100644 index 0000000..95ced76 --- /dev/null +++ b/services/comms/scripts/synapse/redis/ping_readiness_master.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +[[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")" +[[ -n "$REDIS_MASTER_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_MASTER_PASSWORD" +response=$( + timeout -s 15 $1 \ + redis-cli \ + -h $REDIS_MASTER_HOST \ + -p $REDIS_MASTER_PORT_NUMBER \ + ping +) +if [ "$?" -eq "124" ]; then + echo "Timed out" + exit 1 +fi +if [ "$response" != "PONG" ]; then + echo "$response" + exit 1 +fi \ No newline at end of file diff --git a/services/comms/scripts/synapse/redis/start-master.sh b/services/comms/scripts/synapse/redis/start-master.sh new file mode 100644 index 0000000..4284839 --- /dev/null +++ b/services/comms/scripts/synapse/redis/start-master.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +[[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" +if [[ -f /opt/bitnami/redis/mounted-etc/master.conf ]];then + cp /opt/bitnami/redis/mounted-etc/master.conf /opt/bitnami/redis/etc/master.conf +fi +if [[ -f /opt/bitnami/redis/mounted-etc/redis.conf ]];then + cp /opt/bitnami/redis/mounted-etc/redis.conf /opt/bitnami/redis/etc/redis.conf +fi +ARGS=("--port" "${REDIS_PORT}") +ARGS+=("--requirepass" "${REDIS_PASSWORD}") +ARGS+=("--masterauth" "${REDIS_PASSWORD}") +ARGS+=("--include" "/opt/bitnami/redis/etc/redis.conf") +ARGS+=("--include" "/opt/bitnami/redis/etc/master.conf") +exec redis-server "${ARGS[@]}" diff --git a/services/comms/scripts/synapse/signing-key.sh b/services/comms/scripts/synapse/signing-key.sh new file mode 100644 index 0000000..5d1b941 --- /dev/null +++ b/services/comms/scripts/synapse/signing-key.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +set -eu + +check_key() { + set +e + + echo "Checking for existing signing key..." + key="$(kubectl get secret "$SECRET_NAME" -o jsonpath="{.data['signing\.key']}" 2> /dev/null)" + [ $? -ne 0 ] && return 1 + [ -z "$key" ] && return 2 + return 0 +} + +create_key() { + echo "Waiting for new signing key to be generated..." + begin=$(date +%s) + end=$((begin + 300)) # 5 minutes + while true; do + [ -f /synapse/keys/signing.key ] && return 0 + [ "$(date +%s)" -gt $end ] && return 1 + sleep 5 + done +} + +store_key() { + echo "Storing signing key in Kubernetes secret..." + kubectl patch secret "$SECRET_NAME" -p "{\"data\":{\"signing.key\":\"$(base64 /synapse/keys/signing.key | tr -d '\n')\"}}" +} + +if check_key; then + echo "Key already in place, exiting." + exit +fi + +if ! create_key; then + echo "Timed out waiting for a signing key to appear." + exit 1 +fi + +store_key diff --git a/services/comms/synapse-rendered.yaml b/services/comms/synapse-rendered.yaml index 097189a..83fce79 100644 --- a/services/comms/synapse-rendered.yaml +++ b/services/comms/synapse-rendered.yaml @@ -82,140 +82,6 @@ data: rename-command FLUSHALL "" # End of replica configuration --- -# Source: matrix-synapse/charts/redis/templates/health-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-synapse-redis-health - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 -data: - ping_readiness_local.sh: |- - #!/bin/bash - - [[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" - [[ -n "$REDIS_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_PASSWORD" - response=$( - timeout -s 15 $1 \ - redis-cli \ - -h localhost \ - -p $REDIS_PORT \ - ping - ) - if [ "$?" -eq "124" ]; then - echo "Timed out" - exit 1 - fi - if [ "$response" != "PONG" ]; then - echo "$response" - exit 1 - fi - ping_liveness_local.sh: |- - #!/bin/bash - - [[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" - [[ -n "$REDIS_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_PASSWORD" - response=$( - timeout -s 15 $1 \ - redis-cli \ - -h localhost \ - -p $REDIS_PORT \ - ping - ) - if [ "$?" -eq "124" ]; then - echo "Timed out" - exit 1 - fi - responseFirstWord=$(echo $response | head -n1 | awk '{print $1;}') - if [ "$response" != "PONG" ] && [ "$responseFirstWord" != "LOADING" ] && [ "$responseFirstWord" != "MASTERDOWN" ]; then - echo "$response" - exit 1 - fi - ping_readiness_master.sh: |- - #!/bin/bash - - [[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")" - [[ -n "$REDIS_MASTER_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_MASTER_PASSWORD" - response=$( - timeout -s 15 $1 \ - redis-cli \ - -h $REDIS_MASTER_HOST \ - -p $REDIS_MASTER_PORT_NUMBER \ - ping - ) - if [ "$?" -eq "124" ]; then - echo "Timed out" - exit 1 - fi - if [ "$response" != "PONG" ]; then - echo "$response" - exit 1 - fi - ping_liveness_master.sh: |- - #!/bin/bash - - [[ -f $REDIS_MASTER_PASSWORD_FILE ]] && export REDIS_MASTER_PASSWORD="$(< "${REDIS_MASTER_PASSWORD_FILE}")" - [[ -n "$REDIS_MASTER_PASSWORD" ]] && export REDISCLI_AUTH="$REDIS_MASTER_PASSWORD" - response=$( - timeout -s 15 $1 \ - redis-cli \ - -h $REDIS_MASTER_HOST \ - -p $REDIS_MASTER_PORT_NUMBER \ - ping - ) - if [ "$?" -eq "124" ]; then - echo "Timed out" - exit 1 - fi - responseFirstWord=$(echo $response | head -n1 | awk '{print $1;}') - if [ "$response" != "PONG" ] && [ "$responseFirstWord" != "LOADING" ]; then - echo "$response" - exit 1 - fi - ping_readiness_local_and_master.sh: |- - script_dir="$(dirname "$0")" - exit_status=0 - "$script_dir/ping_readiness_local.sh" $1 || exit_status=$? - "$script_dir/ping_readiness_master.sh" $1 || exit_status=$? - exit $exit_status - ping_liveness_local_and_master.sh: |- - script_dir="$(dirname "$0")" - exit_status=0 - "$script_dir/ping_liveness_local.sh" $1 || exit_status=$? - "$script_dir/ping_liveness_master.sh" $1 || exit_status=$? - exit $exit_status ---- -# Source: matrix-synapse/charts/redis/templates/scripts-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-synapse-redis-scripts - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 -data: - start-master.sh: | - #!/bin/bash - - [[ -f $REDIS_PASSWORD_FILE ]] && export REDIS_PASSWORD="$(< "${REDIS_PASSWORD_FILE}")" - if [[ -f /opt/bitnami/redis/mounted-etc/master.conf ]];then - cp /opt/bitnami/redis/mounted-etc/master.conf /opt/bitnami/redis/etc/master.conf - fi - if [[ -f /opt/bitnami/redis/mounted-etc/redis.conf ]];then - cp /opt/bitnami/redis/mounted-etc/redis.conf /opt/bitnami/redis/etc/redis.conf - fi - ARGS=("--port" "${REDIS_PORT}") - ARGS+=("--requirepass" "${REDIS_PASSWORD}") - ARGS+=("--masterauth" "${REDIS_PASSWORD}") - ARGS+=("--include" "/opt/bitnami/redis/etc/redis.conf") - ARGS+=("--include" "/opt/bitnami/redis/etc/master.conf") - exec redis-server "${ARGS[@]}" ---- # Source: matrix-synapse/templates/configuration.yaml apiVersion: v1 kind: ConfigMap @@ -870,64 +736,6 @@ metadata: app.kubernetes.io/component: signingkey-job --- # Source: matrix-synapse/templates/signing-key-job.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-synapse-matrix-synapse-scripts - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - annotations: - helm.sh/hook: pre-install - helm.sh/hook-delete-policy: hook-succeeded -data: - signing-key.sh: | - #!/bin/sh - - set -eu - - check_key() { - set +e - - echo "Checking for existing signing key..." - key="$(kubectl get secret "$SECRET_NAME" -o jsonpath="{.data['signing\.key']}" 2> /dev/null)" - [ $? -ne 0 ] && return 1 - [ -z "$key" ] && return 2 - return 0 - } - - create_key() { - echo "Waiting for new signing key to be generated..." - begin=$(date +%s) - end=$((begin + 300)) # 5 minutes - while true; do - [ -f /synapse/keys/signing.key ] && return 0 - [ "$(date +%s)" -gt $end ] && return 1 - sleep 5 - done - } - - store_key() { - echo "Storing signing key in Kubernetes secret..." - kubectl patch secret "$SECRET_NAME" -p "{\"data\":{\"signing.key\":\"$(base64 /synapse/keys/signing.key | tr -d '\n')\"}}" - } - - if check_key; then - echo "Key already in place, exiting." - exit - fi - - if ! create_key; then - echo "Timed out waiting for a signing key to appear." - exit 1 - fi - - store_key ---- -# Source: matrix-synapse/templates/signing-key-job.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: diff --git a/services/jenkins/configmap-init-scripts.yaml b/services/jenkins/configmap-init-scripts.yaml deleted file mode 100644 index ed87720..0000000 --- a/services/jenkins/configmap-init-scripts.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# services/jenkins/configmap-init-scripts.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: jenkins-init-scripts - namespace: jenkins -data: - theme.groovy: | - import jenkins.model.Jenkins - import org.codefirst.SimpleThemeDecorator - - def instance = Jenkins.get() - def decorators = instance.getExtensionList(SimpleThemeDecorator.class) - - if (decorators?.size() > 0) { - def theme = decorators[0] - theme.setCssUrl("https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css") - theme.setJsUrl("") - theme.setTheme("") - instance.save() - println("Applied simple-theme-plugin dark theme") - } else { - println("simple-theme-plugin not installed; skipping theme configuration") - } diff --git a/services/jenkins/kustomization.yaml b/services/jenkins/kustomization.yaml index c183a4f..acb6fb4 100644 --- a/services/jenkins/kustomization.yaml +++ b/services/jenkins/kustomization.yaml @@ -7,8 +7,15 @@ resources: - serviceaccount.yaml - pvc.yaml - configmap-jcasc.yaml - - configmap-init-scripts.yaml - configmap-plugins.yaml - deployment.yaml - service.yaml - ingress.yaml + +configMapGenerator: + - name: jenkins-init-scripts + namespace: jenkins + files: + - theme.groovy=scripts/theme.groovy + options: + disableNameSuffixHash: true diff --git a/services/jenkins/scripts/theme.groovy b/services/jenkins/scripts/theme.groovy new file mode 100644 index 0000000..cf171f7 --- /dev/null +++ b/services/jenkins/scripts/theme.groovy @@ -0,0 +1,16 @@ +import jenkins.model.Jenkins +import org.codefirst.SimpleThemeDecorator + +def instance = Jenkins.get() +def decorators = instance.getExtensionList(SimpleThemeDecorator.class) + +if (decorators?.size() > 0) { + def theme = decorators[0] + theme.setCssUrl("https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css") + theme.setJsUrl("") + theme.setTheme("") + instance.save() + println("Applied simple-theme-plugin dark theme") +} else { + println("simple-theme-plugin not installed; skipping theme configuration") +} diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index 3c40da2..fe010f6 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -6,11 +6,8 @@ resources: - opensearch-dashboards-objects.yaml - opensearch-observability-objects.yaml - node-log-rotation-serviceaccount.yaml - - node-log-rotation-script.yaml - node-image-gc-rpi4-serviceaccount.yaml - - node-image-gc-rpi4-script.yaml - node-image-prune-rpi5-serviceaccount.yaml - - node-image-prune-rpi5-script.yaml - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml @@ -26,3 +23,35 @@ resources: - node-image-prune-rpi5-daemonset.yaml - oauth2-proxy.yaml - ingress.yaml + +configMapGenerator: + - name: node-log-rotation-script + namespace: logging + files: + - node_log_rotation.sh=scripts/node_log_rotation.sh + options: + disableNameSuffixHash: true + - name: node-image-gc-rpi4-script + namespace: logging + files: + - node_image_gc_rpi4.sh=scripts/node_image_gc_rpi4.sh + options: + disableNameSuffixHash: true + - name: node-image-prune-rpi5-script + namespace: logging + files: + - node_image_prune_rpi5.sh=scripts/node_image_prune_rpi5.sh + options: + disableNameSuffixHash: true + - name: opensearch-prune-script + namespace: logging + files: + - prune.py=scripts/opensearch_prune.py + options: + disableNameSuffixHash: true + - name: opensearch-observability-script + namespace: logging + files: + - seed.py=scripts/opensearch_observability_seed.py + options: + disableNameSuffixHash: true diff --git a/services/logging/node-image-gc-rpi4-script.yaml b/services/logging/node-image-gc-rpi4-script.yaml deleted file mode 100644 index 44c4c16..0000000 --- a/services/logging/node-image-gc-rpi4-script.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# services/logging/node-image-gc-rpi4-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: node-image-gc-rpi4-script - namespace: logging -data: - node_image_gc_rpi4.sh: | - #!/usr/bin/env bash - set -euo pipefail - - changed=0 - k3s_changed=0 - k3s_agent_changed=0 - - k3s_dropin="/host/etc/systemd/system/k3s.service.d/98-image-gc.conf" - k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/98-image-gc.conf" - - if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_dropin}" ]; then - mkdir -p "$(dirname "${k3s_dropin}")" - printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_dropin}" - changed=1 - k3s_changed=1 - fi - - if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then - mkdir -p "$(dirname "${k3s_agent_dropin}")" - printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_agent_dropin}" - changed=1 - k3s_agent_changed=1 - fi - - if [ "${changed}" -eq 1 ]; then - sleep "$(( (RANDOM % 300) + 10 ))" - chroot /host /bin/systemctl daemon-reload - if [ "${k3s_changed}" -eq 1 ]; then - chroot /host /bin/systemctl restart k3s - fi - if [ "${k3s_agent_changed}" -eq 1 ]; then - chroot /host /bin/systemctl restart k3s-agent - fi - fi - - sleep infinity diff --git a/services/logging/node-image-prune-rpi5-script.yaml b/services/logging/node-image-prune-rpi5-script.yaml deleted file mode 100644 index ae79ce3..0000000 --- a/services/logging/node-image-prune-rpi5-script.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# services/logging/node-image-prune-rpi5-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: node-image-prune-rpi5-script - namespace: logging -data: - node_image_prune_rpi5.sh: | - #!/usr/bin/env bash - set -euo pipefail - - threshold=70 - - sleep "$(( (RANDOM % 300) + 10 ))" - - while true; do - usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') - if [ -z "${usage}" ]; then - sleep 1800 - continue - fi - - if [ "${usage}" -ge "${threshold}" ]; then - chroot /host /bin/sh -c ' - if command -v crictl >/dev/null 2>&1; then - crictl --runtime-endpoint=unix:///run/k3s/containerd/containerd.sock rmi --prune || true - elif [ -x /usr/local/bin/crictl ]; then - /usr/local/bin/crictl --runtime-endpoint=unix:///run/k3s/containerd/containerd.sock rmi --prune || true - fi - ' - fi - - sleep 21600 - done diff --git a/services/logging/node-log-rotation-script.yaml b/services/logging/node-log-rotation-script.yaml deleted file mode 100644 index 7926e0d..0000000 --- a/services/logging/node-log-rotation-script.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# services/logging/node-log-rotation-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: node-log-rotation-script - namespace: logging -data: - node_log_rotation.sh: | - #!/usr/bin/env bash - set -euo pipefail - - changed=0 - journald_changed=0 - k3s_changed=0 - k3s_agent_changed=0 - - journald_dropin="/host/etc/systemd/journald.conf.d/99-logging.conf" - k3s_dropin="/host/etc/systemd/system/k3s.service.d/99-logging.conf" - k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/99-logging.conf" - k3s_image_gc_dropin="/host/etc/systemd/system/k3s.service.d/98-image-gc.conf" - k3s_agent_image_gc_dropin="/host/etc/systemd/system/k3s-agent.service.d/98-image-gc.conf" - - if [ ! -f "${journald_dropin}" ]; then - mkdir -p "$(dirname "${journald_dropin}")" - printf "[Journal]\nStorage=volatile\nRuntimeMaxUse=200M\nRuntimeKeepFree=512M\nMaxFileSec=1h\n" > "${journald_dropin}" - changed=1 - journald_changed=1 - fi - - if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_dropin}" ]; then - mkdir -p "$(dirname "${k3s_dropin}")" - printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_dropin}" - changed=1 - k3s_changed=1 - fi - - if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_image_gc_dropin}" ]; then - mkdir -p "$(dirname "${k3s_image_gc_dropin}")" - printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_image_gc_dropin}" - changed=1 - k3s_changed=1 - fi - - if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then - mkdir -p "$(dirname "${k3s_agent_dropin}")" - printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_agent_dropin}" - changed=1 - k3s_agent_changed=1 - fi - - if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_image_gc_dropin}" ]; then - mkdir -p "$(dirname "${k3s_agent_image_gc_dropin}")" - printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_agent_image_gc_dropin}" - changed=1 - k3s_agent_changed=1 - fi - - if [ "${changed}" -eq 1 ]; then - sleep "$(( (RANDOM % 300) + 10 ))" - chroot /host /bin/systemctl daemon-reload - if [ "${journald_changed}" -eq 1 ]; then - chroot /host /bin/systemctl restart systemd-journald - fi - if [ "${k3s_changed}" -eq 1 ]; then - chroot /host /bin/systemctl restart k3s - fi - if [ "${k3s_agent_changed}" -eq 1 ]; then - chroot /host /bin/systemctl restart k3s-agent - fi - fi - - sleep infinity diff --git a/services/logging/opensearch-observability-setup-job.yaml b/services/logging/opensearch-observability-setup-job.yaml index 75e65b2..e4590fb 100644 --- a/services/logging/opensearch-observability-setup-job.yaml +++ b/services/logging/opensearch-observability-setup-job.yaml @@ -1,152 +1,4 @@ # services/logging/opensearch-observability-setup-job.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: opensearch-observability-script - namespace: logging -data: - seed.py: | - import json - import os - import time - import urllib.error - import urllib.request - - OSD_URL = os.environ.get( - "OSD_URL", - "http://opensearch-dashboards.logging.svc.cluster.local:5601", - ).rstrip("/") - OBJECT_DIR = "/config" - - def request_json(method, path, payload=None): - url = f"{OSD_URL}{path}" - data = None - headers = {"osd-xsrf": "true"} - if payload is not None: - data = json.dumps(payload).encode("utf-8") - headers["Content-Type"] = "application/json" - - req = urllib.request.Request(url, data=data, method=method) - for key, value in headers.items(): - req.add_header(key, value) - - try: - with urllib.request.urlopen(req, timeout=30) as response: - body = response.read().decode("utf-8") - except urllib.error.HTTPError as exc: - detail = exc.read().decode("utf-8") - raise SystemExit(f"{method} {path} failed: {exc.code} {detail}") - - if not body: - return {} - return json.loads(body) - - - def wait_ready(): - for _ in range(60): - try: - request_json("GET", "/api/status") - return - except Exception: - time.sleep(5) - raise SystemExit("OpenSearch Dashboards did not become ready in time") - - - def load_payload(name): - path = os.path.join(OBJECT_DIR, name) - with open(path, "r", encoding="utf-8") as handle: - return json.load(handle) - - - def index_by_name(items, key): - lookup = {} - for item in items: - obj = item.get(key, {}) - name = obj.get("name") - if not name: - continue - lookup.setdefault(name, item) - return lookup - - - def ensure_applications(apps): - existing = request_json("GET", "/api/observability/application/").get("data", []) - existing_by_name = {app.get("name"): app for app in existing if app.get("name")} - - for app in apps: - name = app.get("name") - if not name: - continue - current = existing_by_name.get(name) - if not current: - request_json("POST", "/api/observability/application/", app) - print(f"created application: {name}") - continue - - if app.get("baseQuery") != current.get("baseQuery"): - print(f"baseQuery differs for {name}; skipping update") - - update_body = {} - for key in ("description", "servicesEntities", "traceGroups"): - if app.get(key, "") != current.get(key, ""): - update_body[key] = app.get(key, "") - - if update_body: - request_json( - "PUT", - "/api/observability/application/", - {"appId": current["id"], "updateBody": update_body}, - ) - print(f"updated application: {name}") - - - def ensure_saved_objects(objects, object_type, endpoint): - existing = request_json( - "GET", - f"/api/observability/event_analytics/saved_objects?objectType={object_type}", - ).get("observabilityObjectList", []) - key = "savedQuery" if object_type == "savedQuery" else "savedVisualization" - existing_by_name = index_by_name(existing, key) - - for obj in objects: - name = obj.get("name") - if not name: - continue - current = existing_by_name.get(name) - if not current: - request_json("POST", endpoint, {"object": obj}) - print(f"created {object_type}: {name}") - continue - - current_body = current.get(key, {}) - if current_body != obj: - request_json( - "PUT", - endpoint, - {"object_id": current["objectId"], "object": obj}, - ) - print(f"updated {object_type}: {name}") - - - def main(): - wait_ready() - - applications = load_payload("applications.json") - queries = load_payload("saved_queries.json") - visualizations = load_payload("saved_visualizations.json") - - ensure_applications(applications) - ensure_saved_objects(queries, "savedQuery", "/api/observability/event_analytics/saved_objects/query") - ensure_saved_objects( - visualizations, - "savedVisualization", - "/api/observability/event_analytics/saved_objects/vis", - ) - - - if __name__ == "__main__": - main() ---- apiVersion: batch/v1 kind: Job metadata: diff --git a/services/logging/opensearch-prune-cronjob.yaml b/services/logging/opensearch-prune-cronjob.yaml index 83aee1a..75e72db 100644 --- a/services/logging/opensearch-prune-cronjob.yaml +++ b/services/logging/opensearch-prune-cronjob.yaml @@ -1,89 +1,4 @@ # services/logging/opensearch-prune-cronjob.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: opensearch-prune-script - namespace: logging -data: - prune.py: | - import json - import os - import re - import sys - import urllib.error - import urllib.request - - os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/") - limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4))) - patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()] - - UNITS = { - "b": 1, - "kb": 1024, - "mb": 1024**2, - "gb": 1024**3, - "tb": 1024**4, - } - - def parse_size(value: str) -> int: - if not value: - return 0 - text = value.strip().lower() - if text in ("-", "0"): - return 0 - match = re.match(r"^([0-9.]+)([a-z]+)$", text) - if not match: - return 0 - number = float(match.group(1)) - unit = match.group(2) - if unit not in UNITS: - return 0 - return int(number * UNITS[unit]) - - def request_json(path: str): - url = f"{os_url}{path}" - with urllib.request.urlopen(url, timeout=30) as response: - payload = response.read().decode("utf-8") - return json.loads(payload) - - def delete_index(index: str) -> None: - url = f"{os_url}/{index}" - req = urllib.request.Request(url, method="DELETE") - with urllib.request.urlopen(req, timeout=30) as response: - _ = response.read() - print(f"deleted {index}") - - indices = [] - for pattern in patterns: - try: - data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date") - except urllib.error.HTTPError as exc: - if exc.code == 404: - continue - raise - for item in data: - index = item.get("index") - if not index or index.startswith("."): - continue - size = parse_size(item.get("store.size", "")) - created = int(item.get("creation.date", "0") or 0) - indices.append({"index": index, "size": size, "created": created}) - - total = sum(item["size"] for item in indices) - print(f"total_log_bytes={total}") - if total <= limit_bytes: - print("within limit") - sys.exit(0) - - indices.sort(key=lambda item: item["created"]) - for item in indices: - if total <= limit_bytes: - break - delete_index(item["index"]) - total -= item["size"] - - print(f"remaining_log_bytes={total}") ---- apiVersion: batch/v1 kind: CronJob metadata: diff --git a/services/logging/scripts/node_image_gc_rpi4.sh b/services/logging/scripts/node_image_gc_rpi4.sh new file mode 100644 index 0000000..81f27b1 --- /dev/null +++ b/services/logging/scripts/node_image_gc_rpi4.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +changed=0 +k3s_changed=0 +k3s_agent_changed=0 + +k3s_dropin="/host/etc/systemd/system/k3s.service.d/98-image-gc.conf" +k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/98-image-gc.conf" + +if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_dropin}" ]; then + mkdir -p "$(dirname "${k3s_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_dropin}" + changed=1 + k3s_changed=1 +fi + +if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then + mkdir -p "$(dirname "${k3s_agent_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_agent_dropin}" + changed=1 + k3s_agent_changed=1 +fi + +if [ "${changed}" -eq 1 ]; then + sleep "$(( (RANDOM % 300) + 10 ))" + chroot /host /bin/systemctl daemon-reload + if [ "${k3s_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s + fi + if [ "${k3s_agent_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s-agent + fi +fi + +sleep infinity diff --git a/services/logging/scripts/node_image_prune_rpi5.sh b/services/logging/scripts/node_image_prune_rpi5.sh new file mode 100644 index 0000000..eb54b77 --- /dev/null +++ b/services/logging/scripts/node_image_prune_rpi5.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +threshold=70 + +sleep "$(( (RANDOM % 300) + 10 ))" + +while true; do + usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') + if [ -z "${usage}" ]; then + sleep 1800 + continue + fi + + if [ "${usage}" -ge "${threshold}" ]; then + chroot /host /bin/sh -c ' + if command -v crictl >/dev/null 2>&1; then + crictl --runtime-endpoint=unix:///run/k3s/containerd/containerd.sock rmi --prune || true + elif [ -x /usr/local/bin/crictl ]; then + /usr/local/bin/crictl --runtime-endpoint=unix:///run/k3s/containerd/containerd.sock rmi --prune || true + fi + ' + fi + + sleep 21600 +done diff --git a/services/logging/scripts/node_log_rotation.sh b/services/logging/scripts/node_log_rotation.sh new file mode 100644 index 0000000..534806f --- /dev/null +++ b/services/logging/scripts/node_log_rotation.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +changed=0 +journald_changed=0 +k3s_changed=0 +k3s_agent_changed=0 + +journald_dropin="/host/etc/systemd/journald.conf.d/99-logging.conf" +k3s_dropin="/host/etc/systemd/system/k3s.service.d/99-logging.conf" +k3s_agent_dropin="/host/etc/systemd/system/k3s-agent.service.d/99-logging.conf" +k3s_image_gc_dropin="/host/etc/systemd/system/k3s.service.d/98-image-gc.conf" +k3s_agent_image_gc_dropin="/host/etc/systemd/system/k3s-agent.service.d/98-image-gc.conf" + +if [ ! -f "${journald_dropin}" ]; then + mkdir -p "$(dirname "${journald_dropin}")" + printf "[Journal]\nStorage=volatile\nRuntimeMaxUse=200M\nRuntimeKeepFree=512M\nMaxFileSec=1h\n" > "${journald_dropin}" + changed=1 + journald_changed=1 +fi + +if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_dropin}" ]; then + mkdir -p "$(dirname "${k3s_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_dropin}" + changed=1 + k3s_changed=1 +fi + +if [ -f "/host/etc/systemd/system/k3s.service" ] && [ ! -f "${k3s_image_gc_dropin}" ]; then + mkdir -p "$(dirname "${k3s_image_gc_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_image_gc_dropin}" + changed=1 + k3s_changed=1 +fi + +if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_dropin}" ]; then + mkdir -p "$(dirname "${k3s_agent_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-size=10Mi\"\nEnvironment=\"K3S_KUBELET_ARG=container-log-max-files=2\"\n" > "${k3s_agent_dropin}" + changed=1 + k3s_agent_changed=1 +fi + +if [ -f "/host/etc/systemd/system/k3s-agent.service" ] && [ ! -f "${k3s_agent_image_gc_dropin}" ]; then + mkdir -p "$(dirname "${k3s_agent_image_gc_dropin}")" + printf "[Service]\nEnvironment=\"K3S_KUBELET_ARG=image-gc-high-threshold=70\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-low-threshold=60\"\nEnvironment=\"K3S_KUBELET_ARG=image-gc-minimum-available=5Gi\"\n" > "${k3s_agent_image_gc_dropin}" + changed=1 + k3s_agent_changed=1 +fi + +if [ "${changed}" -eq 1 ]; then + sleep "$(( (RANDOM % 300) + 10 ))" + chroot /host /bin/systemctl daemon-reload + if [ "${journald_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart systemd-journald + fi + if [ "${k3s_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s + fi + if [ "${k3s_agent_changed}" -eq 1 ]; then + chroot /host /bin/systemctl restart k3s-agent + fi +fi + +sleep infinity diff --git a/services/logging/scripts/opensearch_observability_seed.py b/services/logging/scripts/opensearch_observability_seed.py new file mode 100644 index 0000000..d7bf808 --- /dev/null +++ b/services/logging/scripts/opensearch_observability_seed.py @@ -0,0 +1,140 @@ +import json +import os +import time +import urllib.error +import urllib.request + +OSD_URL = os.environ.get( + "OSD_URL", + "http://opensearch-dashboards.logging.svc.cluster.local:5601", +).rstrip("/") +OBJECT_DIR = "/config" + +def request_json(method, path, payload=None): + url = f"{OSD_URL}{path}" + data = None + headers = {"osd-xsrf": "true"} + if payload is not None: + data = json.dumps(payload).encode("utf-8") + headers["Content-Type"] = "application/json" + + req = urllib.request.Request(url, data=data, method=method) + for key, value in headers.items(): + req.add_header(key, value) + + try: + with urllib.request.urlopen(req, timeout=30) as response: + body = response.read().decode("utf-8") + except urllib.error.HTTPError as exc: + detail = exc.read().decode("utf-8") + raise SystemExit(f"{method} {path} failed: {exc.code} {detail}") + + if not body: + return {} + return json.loads(body) + + +def wait_ready(): + for _ in range(60): + try: + request_json("GET", "/api/status") + return + except Exception: + time.sleep(5) + raise SystemExit("OpenSearch Dashboards did not become ready in time") + + +def load_payload(name): + path = os.path.join(OBJECT_DIR, name) + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + +def index_by_name(items, key): + lookup = {} + for item in items: + obj = item.get(key, {}) + name = obj.get("name") + if not name: + continue + lookup.setdefault(name, item) + return lookup + + +def ensure_applications(apps): + existing = request_json("GET", "/api/observability/application/").get("data", []) + existing_by_name = {app.get("name"): app for app in existing if app.get("name")} + + for app in apps: + name = app.get("name") + if not name: + continue + current = existing_by_name.get(name) + if not current: + request_json("POST", "/api/observability/application/", app) + print(f"created application: {name}") + continue + + if app.get("baseQuery") != current.get("baseQuery"): + print(f"baseQuery differs for {name}; skipping update") + + update_body = {} + for key in ("description", "servicesEntities", "traceGroups"): + if app.get(key, "") != current.get(key, ""): + update_body[key] = app.get(key, "") + + if update_body: + request_json( + "PUT", + "/api/observability/application/", + {"appId": current["id"], "updateBody": update_body}, + ) + print(f"updated application: {name}") + + +def ensure_saved_objects(objects, object_type, endpoint): + existing = request_json( + "GET", + f"/api/observability/event_analytics/saved_objects?objectType={object_type}", + ).get("observabilityObjectList", []) + key = "savedQuery" if object_type == "savedQuery" else "savedVisualization" + existing_by_name = index_by_name(existing, key) + + for obj in objects: + name = obj.get("name") + if not name: + continue + current = existing_by_name.get(name) + if not current: + request_json("POST", endpoint, {"object": obj}) + print(f"created {object_type}: {name}") + continue + + current_body = current.get(key, {}) + if current_body != obj: + request_json( + "PUT", + endpoint, + {"object_id": current["objectId"], "object": obj}, + ) + print(f"updated {object_type}: {name}") + + +def main(): + wait_ready() + + applications = load_payload("applications.json") + queries = load_payload("saved_queries.json") + visualizations = load_payload("saved_visualizations.json") + + ensure_applications(applications) + ensure_saved_objects(queries, "savedQuery", "/api/observability/event_analytics/saved_objects/query") + ensure_saved_objects( + visualizations, + "savedVisualization", + "/api/observability/event_analytics/saved_objects/vis", + ) + + +if __name__ == "__main__": + main() diff --git a/services/logging/scripts/opensearch_prune.py b/services/logging/scripts/opensearch_prune.py new file mode 100644 index 0000000..ad84d5b --- /dev/null +++ b/services/logging/scripts/opensearch_prune.py @@ -0,0 +1,77 @@ +import json +import os +import re +import sys +import urllib.error +import urllib.request + +os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/") +limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4))) +patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()] + +UNITS = { + "b": 1, + "kb": 1024, + "mb": 1024**2, + "gb": 1024**3, + "tb": 1024**4, +} + +def parse_size(value: str) -> int: + if not value: + return 0 + text = value.strip().lower() + if text in ("-", "0"): + return 0 + match = re.match(r"^([0-9.]+)([a-z]+)$", text) + if not match: + return 0 + number = float(match.group(1)) + unit = match.group(2) + if unit not in UNITS: + return 0 + return int(number * UNITS[unit]) + +def request_json(path: str): + url = f"{os_url}{path}" + with urllib.request.urlopen(url, timeout=30) as response: + payload = response.read().decode("utf-8") + return json.loads(payload) + +def delete_index(index: str) -> None: + url = f"{os_url}/{index}" + req = urllib.request.Request(url, method="DELETE") + with urllib.request.urlopen(req, timeout=30) as response: + _ = response.read() + print(f"deleted {index}") + +indices = [] +for pattern in patterns: + try: + data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date") + except urllib.error.HTTPError as exc: + if exc.code == 404: + continue + raise + for item in data: + index = item.get("index") + if not index or index.startswith("."): + continue + size = parse_size(item.get("store.size", "")) + created = int(item.get("creation.date", "0") or 0) + indices.append({"index": index, "size": size, "created": created}) + +total = sum(item["size"] for item in indices) +print(f"total_log_bytes={total}") +if total <= limit_bytes: + print("within limit") + sys.exit(0) + +indices.sort(key=lambda item: item["created"]) +for item in indices: + if total <= limit_bytes: + break + delete_index(item["index"]) + total -= item["size"] + +print(f"remaining_log_bytes={total}") diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml index ccb5e7e..ce34afb 100644 --- a/services/maintenance/kustomization.yaml +++ b/services/maintenance/kustomization.yaml @@ -5,11 +5,28 @@ resources: - namespace.yaml - node-nofile-serviceaccount.yaml - pod-cleaner-rbac.yaml - - node-nofile-script.yaml - - pod-cleaner-script.yaml - node-nofile-daemonset.yaml - pod-cleaner-cronjob.yaml - node-image-sweeper-serviceaccount.yaml - - node-image-sweeper-script.yaml - node-image-sweeper-daemonset.yaml - image-sweeper-cronjob.yaml + +configMapGenerator: + - name: node-nofile-script + namespace: maintenance + files: + - node_nofile.sh=scripts/node_nofile.sh + options: + disableNameSuffixHash: true + - name: pod-cleaner-script + namespace: maintenance + files: + - pod_cleaner.sh=scripts/pod_cleaner.sh + options: + disableNameSuffixHash: true + - name: node-image-sweeper-script + namespace: maintenance + files: + - node_image_sweeper.sh=scripts/node_image_sweeper.sh + options: + disableNameSuffixHash: true diff --git a/services/maintenance/node-image-sweeper-script.yaml b/services/maintenance/node-image-sweeper-script.yaml deleted file mode 100644 index 6e3b02c..0000000 --- a/services/maintenance/node-image-sweeper-script.yaml +++ /dev/null @@ -1,100 +0,0 @@ -# services/maintenance/node-image-sweeper-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: node-image-sweeper-script - namespace: maintenance -data: - node_image_sweeper.sh: | - #!/bin/sh - set -eu - - ONE_SHOT=${ONE_SHOT:-false} - THRESHOLD_DAYS=14 - - usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage="" - if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then - THRESHOLD_DAYS=3 - fi - - cutoff=$(python3 - <<'PY' - import time, os - print(int(time.time()) - int(os.environ.get("THRESHOLD_DAYS", "14")) * 86400) - PY - ) - - RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ') - IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}') - - SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause" - - prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY' - import json, os, sys, time - - try: - data = json.load(sys.stdin) - except Exception: - print("", end="") - sys.exit(0) - - cutoff = int(os.environ.get("CUTOFF", "0")) - running = set(os.environ.get("RUNNING", "").split()) - skip = os.environ.get("SKIP", "").split() - now = int(time.time()) - prune = [] - - - def is_skip(tags): - if not tags: - return False - for t in tags: - for prefix in skip: - if prefix and t.startswith(prefix): - return True - return False - - - for img in data.get("images", []): - image_id = img.get("id", "") - if not image_id: - continue - if image_id in running: - continue - tags = img.get("repoTags") or [] - if is_skip(tags): - continue - created = img.get("createdAt") or 0 - try: - created = int(str(created)) // 1000000000 - except Exception: - created = 0 - if created and created > now: - created = now - if cutoff and created and created < cutoff: - prune.append(image_id) - - seen = set() - for p in prune: - if p in seen: - continue - seen.add(p) - print(p) - PY - ) - - if [ -n "${prune_list}" ]; then - printf "%s" "${prune_list}" | while read -r image_id; do - if [ -n "${image_id}" ]; then - chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true - fi - done - fi - - find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true - find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true - - if [ "${ONE_SHOT}" = "true" ]; then - exit 0 - fi - - sleep infinity diff --git a/services/maintenance/node-nofile-script.yaml b/services/maintenance/node-nofile-script.yaml deleted file mode 100644 index 2e2b440..0000000 --- a/services/maintenance/node-nofile-script.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# services/maintenance/node-nofile-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: node-nofile-script - namespace: maintenance -data: - node_nofile.sh: | - #!/usr/bin/env bash - set -euo pipefail - - limit_line="LimitNOFILE=1048576" - changed=0 - - for unit in k3s k3s-agent; do - unit_file="/host/etc/systemd/system/${unit}.service" - if [ -f "${unit_file}" ]; then - dropin_dir="/host/etc/systemd/system/${unit}.service.d" - dropin_file="${dropin_dir}/99-nofile.conf" - if [ ! -f "${dropin_file}" ] || ! grep -q "${limit_line}" "${dropin_file}"; then - mkdir -p "${dropin_dir}" - printf "[Service]\n%s\n" "${limit_line}" > "${dropin_file}" - changed=1 - fi - fi - done - - if [ "${changed}" -eq 1 ]; then - sleep "$(( (RANDOM % 300) + 10 ))" - chroot /host /bin/systemctl daemon-reload - for unit in k3s k3s-agent; do - if [ -f "/host/etc/systemd/system/${unit}.service" ]; then - chroot /host /bin/systemctl restart "${unit}" - fi - done - fi - - sleep infinity diff --git a/services/maintenance/pod-cleaner-script.yaml b/services/maintenance/pod-cleaner-script.yaml deleted file mode 100644 index 909a37c..0000000 --- a/services/maintenance/pod-cleaner-script.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# services/maintenance/pod-cleaner-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: pod-cleaner-script - namespace: maintenance -data: - pod_cleaner.sh: | - #!/usr/bin/env bash - set -euo pipefail - - for phase in Succeeded Failed; do - kubectl get pods -A --field-selector="status.phase=${phase}" \ - -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' \ - | while read -r namespace name; do - if [ -n "${namespace}" ] && [ -n "${name}" ]; then - kubectl delete pod -n "${namespace}" "${name}" --ignore-not-found --grace-period=0 --wait=false - fi - done - done diff --git a/services/maintenance/scripts/node_image_sweeper.sh b/services/maintenance/scripts/node_image_sweeper.sh new file mode 100644 index 0000000..2ad7b47 --- /dev/null +++ b/services/maintenance/scripts/node_image_sweeper.sh @@ -0,0 +1,92 @@ +#!/bin/sh +set -eu + +ONE_SHOT=${ONE_SHOT:-false} +THRESHOLD_DAYS=14 + +usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage="" +if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then + THRESHOLD_DAYS=3 +fi + +cutoff=$(python3 - <<'PY' +import time, os +print(int(time.time()) - int(os.environ.get("THRESHOLD_DAYS", "14")) * 86400) +PY +) + +RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ') +IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}') + +SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause" + +prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY' +import json, os, sys, time + +try: + data = json.load(sys.stdin) +except Exception: + print("", end="") + sys.exit(0) + +cutoff = int(os.environ.get("CUTOFF", "0")) +running = set(os.environ.get("RUNNING", "").split()) +skip = os.environ.get("SKIP", "").split() +now = int(time.time()) +prune = [] + + +def is_skip(tags): + if not tags: + return False + for t in tags: + for prefix in skip: + if prefix and t.startswith(prefix): + return True + return False + + +for img in data.get("images", []): + image_id = img.get("id", "") + if not image_id: + continue + if image_id in running: + continue + tags = img.get("repoTags") or [] + if is_skip(tags): + continue + created = img.get("createdAt") or 0 + try: + created = int(str(created)) // 1000000000 + except Exception: + created = 0 + if created and created > now: + created = now + if cutoff and created and created < cutoff: + prune.append(image_id) + +seen = set() +for p in prune: + if p in seen: + continue + seen.add(p) + print(p) +PY +) + +if [ -n "${prune_list}" ]; then + printf "%s" "${prune_list}" | while read -r image_id; do + if [ -n "${image_id}" ]; then + chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true + fi + done +fi + +find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true +find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true + +if [ "${ONE_SHOT}" = "true" ]; then + exit 0 +fi + +sleep infinity diff --git a/services/maintenance/scripts/node_nofile.sh b/services/maintenance/scripts/node_nofile.sh new file mode 100644 index 0000000..cf6c5d9 --- /dev/null +++ b/services/maintenance/scripts/node_nofile.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +limit_line="LimitNOFILE=1048576" +changed=0 + +for unit in k3s k3s-agent; do + unit_file="/host/etc/systemd/system/${unit}.service" + if [ -f "${unit_file}" ]; then + dropin_dir="/host/etc/systemd/system/${unit}.service.d" + dropin_file="${dropin_dir}/99-nofile.conf" + if [ ! -f "${dropin_file}" ] || ! grep -q "${limit_line}" "${dropin_file}"; then + mkdir -p "${dropin_dir}" + printf "[Service]\n%s\n" "${limit_line}" > "${dropin_file}" + changed=1 + fi + fi +done + +if [ "${changed}" -eq 1 ]; then + sleep "$(( (RANDOM % 300) + 10 ))" + chroot /host /bin/systemctl daemon-reload + for unit in k3s k3s-agent; do + if [ -f "/host/etc/systemd/system/${unit}.service" ]; then + chroot /host /bin/systemctl restart "${unit}" + fi + done +fi + +sleep infinity diff --git a/services/maintenance/scripts/pod_cleaner.sh b/services/maintenance/scripts/pod_cleaner.sh new file mode 100644 index 0000000..2ec043e --- /dev/null +++ b/services/maintenance/scripts/pod_cleaner.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +for phase in Succeeded Failed; do + kubectl get pods -A --field-selector="status.phase=${phase}" \ + -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' \ + | while read -r namespace name; do + if [ -n "${namespace}" ] && [ -n "${name}" ]; then + kubectl delete pod -n "${namespace}" "${name}" --ignore-not-found --grace-period=0 --wait=false + fi + done +done diff --git a/services/monitoring/grafana-smtp-sync-script.yaml b/services/monitoring/grafana-smtp-sync-script.yaml deleted file mode 100644 index 0a58a3c..0000000 --- a/services/monitoring/grafana-smtp-sync-script.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# services/monitoring/grafana-smtp-sync-script.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: grafana-smtp-sync-script - namespace: monitoring -data: - sync.sh: | - #!/bin/sh - set -euo pipefail - - SOURCE_NS=${SOURCE_NS:-mailu-mailserver} - SOURCE_SECRET=${SOURCE_SECRET:-mailu-postmark-relay} - TARGET_NS=${TARGET_NS:-monitoring} - TARGET_SECRET=${TARGET_SECRET:-grafana-smtp} - - tmp=$(mktemp) - cleanup() { rm -f "$tmp"; } - trap cleanup EXIT - - kubectl -n "$SOURCE_NS" get secret "$SOURCE_SECRET" -o json > "$tmp" - - pass=$(jq -r '.data["relay-password"]' "$tmp") - user=$pass - - if [ -z "$user" ] || [ -z "$pass" ] || [ "$user" = "null" ] || [ "$pass" = "null" ]; then - echo "missing credentials from $SOURCE_NS/$SOURCE_SECRET" >&2 - exit 1 - fi - - cat < dict: - today = dt.date.today() - fromdate = today - dt.timedelta(days=window.days) - params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()} - headers = { - "Accept": "application/json", - "X-Postmark-Server-Token": token, - } - response = requests.get( - f"{API_BASE}/stats/outbound", - headers=headers, - params=params, - timeout=15, - ) - response.raise_for_status() - return response.json() - - - def update_metrics(token: str) -> None: - sent_by_window = {} - for window in WINDOWS: - data = fetch_outbound_stats(token, window) - sent = int(data.get("Sent", 0) or 0) - bounced = int(data.get("Bounced", 0) or 0) - rate = (bounced / sent * 100.0) if sent else 0.0 - sent_by_window[window.label] = sent - POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) - POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) - POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) - - POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT) - limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0) - POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent) - if SENDING_LIMIT: - POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0) - else: - POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0) - - - def main() -> None: - if not PRIMARY_TOKEN and not FALLBACK_TOKEN: - raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required") - - start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS) - - tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token] - token_index = 0 - - while True: - token = tokens[token_index % len(tokens)] - token_index += 1 - try: - update_metrics(token) - POSTMARK_API_UP.set(1) - POSTMARK_LAST_SUCCESS.set(time.time()) - except Exception as exc: # noqa: BLE001 - POSTMARK_API_UP.set(0) - POSTMARK_REQUEST_ERRORS.inc() - print(f"postmark_exporter: refresh failed: {exc}", flush=True) - time.sleep(POLL_INTERVAL_SECONDS) - - - if __name__ == "__main__": - main() diff --git a/services/monitoring/scripts/grafana_smtp_sync.sh b/services/monitoring/scripts/grafana_smtp_sync.sh new file mode 100644 index 0000000..c8207ad --- /dev/null +++ b/services/monitoring/scripts/grafana_smtp_sync.sh @@ -0,0 +1,31 @@ +#!/bin/sh +set -euo pipefail + +SOURCE_NS=${SOURCE_NS:-mailu-mailserver} +SOURCE_SECRET=${SOURCE_SECRET:-mailu-postmark-relay} +TARGET_NS=${TARGET_NS:-monitoring} +TARGET_SECRET=${TARGET_SECRET:-grafana-smtp} + +tmp=$(mktemp) +cleanup() { rm -f "$tmp"; } +trap cleanup EXIT + +kubectl -n "$SOURCE_NS" get secret "$SOURCE_SECRET" -o json > "$tmp" + +pass=$(jq -r '.data["relay-password"]' "$tmp") +user=$pass + +if [ -z "$user" ] || [ -z "$pass" ] || [ "$user" = "null" ] || [ "$pass" = "null" ]; then + echo "missing credentials from $SOURCE_NS/$SOURCE_SECRET" >&2 + exit 1 +fi + +cat < dict: + today = dt.date.today() + fromdate = today - dt.timedelta(days=window.days) + params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()} + headers = { + "Accept": "application/json", + "X-Postmark-Server-Token": token, + } + response = requests.get( + f"{API_BASE}/stats/outbound", + headers=headers, + params=params, + timeout=15, + ) + response.raise_for_status() + return response.json() + + +def update_metrics(token: str) -> None: + sent_by_window = {} + for window in WINDOWS: + data = fetch_outbound_stats(token, window) + sent = int(data.get("Sent", 0) or 0) + bounced = int(data.get("Bounced", 0) or 0) + rate = (bounced / sent * 100.0) if sent else 0.0 + sent_by_window[window.label] = sent + POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) + POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) + POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + + POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT) + limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0) + POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent) + if SENDING_LIMIT: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0) + else: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0) + + +def main() -> None: + if not PRIMARY_TOKEN and not FALLBACK_TOKEN: + raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required") + + start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS) + + tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token] + token_index = 0 + + while True: + token = tokens[token_index % len(tokens)] + token_index += 1 + try: + update_metrics(token) + POSTMARK_API_UP.set(1) + POSTMARK_LAST_SUCCESS.set(time.time()) + except Exception as exc: # noqa: BLE001 + POSTMARK_API_UP.set(0) + POSTMARK_REQUEST_ERRORS.inc() + print(f"postmark_exporter: refresh failed: {exc}", flush=True) + time.sleep(POLL_INTERVAL_SECONDS) + + +if __name__ == "__main__": + main() -- 2.47.2 From 3fc9f7bbdba369765c09a9b3cdfc560cf4338424 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 13 Jan 2026 12:07:03 -0300 Subject: [PATCH 681/684] iac: localize configmap scripts --- scripts/tests/test_mailu_sync.py | 8 +++++++- services/keycloak/kustomization.yaml | 6 +++--- .../scripts}/sso_portal_e2e_client_secret_sync.sh | 0 .../scripts}/tests/test_keycloak_execute_actions_email.py | 0 .../keycloak/scripts}/tests/test_portal_token_exchange.py | 0 services/mailu/kustomization.yaml | 4 ++-- {scripts => services/mailu/scripts}/mailu_sync.py | 0 .../mailu/scripts}/mailu_sync_listener.py | 0 services/nextcloud-mail-sync/kustomization.yaml | 2 +- .../nextcloud-mail-sync/scripts}/nextcloud-mail-sync.sh | 0 services/nextcloud/kustomization.yaml | 2 +- .../nextcloud/scripts}/nextcloud-maintenance.sh | 0 12 files changed, 14 insertions(+), 8 deletions(-) rename {scripts => services/keycloak/scripts}/sso_portal_e2e_client_secret_sync.sh (100%) rename {scripts => services/keycloak/scripts}/tests/test_keycloak_execute_actions_email.py (100%) rename {scripts => services/keycloak/scripts}/tests/test_portal_token_exchange.py (100%) rename {scripts => services/mailu/scripts}/mailu_sync.py (100%) rename {scripts => services/mailu/scripts}/mailu_sync_listener.py (100%) rename {scripts => services/nextcloud-mail-sync/scripts}/nextcloud-mail-sync.sh (100%) rename {scripts => services/nextcloud/scripts}/nextcloud-maintenance.sh (100%) diff --git a/scripts/tests/test_mailu_sync.py b/scripts/tests/test_mailu_sync.py index 9e5f383..49bd2e4 100644 --- a/scripts/tests/test_mailu_sync.py +++ b/scripts/tests/test_mailu_sync.py @@ -20,7 +20,13 @@ def load_sync_module(monkeypatch): } for k, v in env.items(): monkeypatch.setenv(k, v) - module_path = pathlib.Path(__file__).resolve().parents[1] / "mailu_sync.py" + module_path = ( + pathlib.Path(__file__).resolve().parents[2] + / "services" + / "mailu" + / "scripts" + / "mailu_sync.py" + ) spec = importlib.util.spec_from_file_location("mailu_sync_testmod", module_path) module = importlib.util.module_from_spec(spec) assert spec.loader is not None diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 05639e5..ddb4ab2 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -26,8 +26,8 @@ generatorOptions: configMapGenerator: - name: portal-e2e-tests files: - - test_portal_token_exchange.py=../../scripts/tests/test_portal_token_exchange.py - - test_keycloak_execute_actions_email.py=../../scripts/tests/test_keycloak_execute_actions_email.py + - test_portal_token_exchange.py=scripts/tests/test_portal_token_exchange.py + - test_keycloak_execute_actions_email.py=scripts/tests/test_keycloak_execute_actions_email.py - name: portal-e2e-client-secret-sync-script files: - - sso_portal_e2e_client_secret_sync.sh=../../scripts/sso_portal_e2e_client_secret_sync.sh + - sso_portal_e2e_client_secret_sync.sh=scripts/sso_portal_e2e_client_secret_sync.sh diff --git a/scripts/sso_portal_e2e_client_secret_sync.sh b/services/keycloak/scripts/sso_portal_e2e_client_secret_sync.sh similarity index 100% rename from scripts/sso_portal_e2e_client_secret_sync.sh rename to services/keycloak/scripts/sso_portal_e2e_client_secret_sync.sh diff --git a/scripts/tests/test_keycloak_execute_actions_email.py b/services/keycloak/scripts/tests/test_keycloak_execute_actions_email.py similarity index 100% rename from scripts/tests/test_keycloak_execute_actions_email.py rename to services/keycloak/scripts/tests/test_keycloak_execute_actions_email.py diff --git a/scripts/tests/test_portal_token_exchange.py b/services/keycloak/scripts/tests/test_portal_token_exchange.py similarity index 100% rename from scripts/tests/test_portal_token_exchange.py rename to services/keycloak/scripts/tests/test_portal_token_exchange.py diff --git a/services/mailu/kustomization.yaml b/services/mailu/kustomization.yaml index 9e9359b..af4b2b1 100644 --- a/services/mailu/kustomization.yaml +++ b/services/mailu/kustomization.yaml @@ -19,10 +19,10 @@ configMapGenerator: - name: mailu-sync-script namespace: mailu-mailserver files: - - sync.py=../../scripts/mailu_sync.py + - sync.py=scripts/mailu_sync.py options: disableNameSuffixHash: true - name: mailu-sync-listener namespace: mailu-mailserver files: - - listener.py=../../scripts/mailu_sync_listener.py + - listener.py=scripts/mailu_sync_listener.py diff --git a/scripts/mailu_sync.py b/services/mailu/scripts/mailu_sync.py similarity index 100% rename from scripts/mailu_sync.py rename to services/mailu/scripts/mailu_sync.py diff --git a/scripts/mailu_sync_listener.py b/services/mailu/scripts/mailu_sync_listener.py similarity index 100% rename from scripts/mailu_sync_listener.py rename to services/mailu/scripts/mailu_sync_listener.py diff --git a/services/nextcloud-mail-sync/kustomization.yaml b/services/nextcloud-mail-sync/kustomization.yaml index fb18550..fb2a077 100644 --- a/services/nextcloud-mail-sync/kustomization.yaml +++ b/services/nextcloud-mail-sync/kustomization.yaml @@ -8,6 +8,6 @@ resources: configMapGenerator: - name: nextcloud-mail-sync-script files: - - sync.sh=../../scripts/nextcloud-mail-sync.sh + - sync.sh=scripts/nextcloud-mail-sync.sh options: disableNameSuffixHash: true diff --git a/scripts/nextcloud-mail-sync.sh b/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh similarity index 100% rename from scripts/nextcloud-mail-sync.sh rename to services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index 8192690..14e0ec1 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -15,6 +15,6 @@ resources: configMapGenerator: - name: nextcloud-maintenance-script files: - - maintenance.sh=../../scripts/nextcloud-maintenance.sh + - maintenance.sh=scripts/nextcloud-maintenance.sh options: disableNameSuffixHash: true diff --git a/scripts/nextcloud-maintenance.sh b/services/nextcloud/scripts/nextcloud-maintenance.sh similarity index 100% rename from scripts/nextcloud-maintenance.sh rename to services/nextcloud/scripts/nextcloud-maintenance.sh -- 2.47.2 From 5aeec67bfb8ef37868df65347bb79af023bf79c0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 13 Jan 2026 12:35:59 -0300 Subject: [PATCH 682/684] postgres: add flux + vault csi --- .../applications/kustomization.yaml | 1 + .../applications/postgres/kustomization.yaml | 24 +++++++++++++++++++ services/postgres/kustomization.yaml | 2 ++ services/postgres/secretproviderclass.yaml | 15 ++++++++++++ services/postgres/serviceaccount.yaml | 6 +++++ services/postgres/statefulset.yaml | 18 ++++++++++---- 6 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 clusters/atlas/flux-system/applications/postgres/kustomization.yaml create mode 100644 services/postgres/secretproviderclass.yaml create mode 100644 services/postgres/serviceaccount.yaml diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index d48cf9e..6788653 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -25,5 +25,6 @@ resources: - ai-llm/kustomization.yaml - nextcloud/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml + - postgres/kustomization.yaml - outline/kustomization.yaml - planka/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/postgres/kustomization.yaml b/clusters/atlas/flux-system/applications/postgres/kustomization.yaml new file mode 100644 index 0000000..07df4c7 --- /dev/null +++ b/clusters/atlas/flux-system/applications/postgres/kustomization.yaml @@ -0,0 +1,24 @@ +# clusters/atlas/flux-system/applications/postgres/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: postgres + namespace: flux-system +spec: + interval: 10m + path: ./services/postgres + prune: true + force: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: postgres + dependsOn: + - name: vault + - name: vault-csi + healthChecks: + - apiVersion: apps/v1 + kind: StatefulSet + name: postgres + namespace: postgres + wait: true diff --git a/services/postgres/kustomization.yaml b/services/postgres/kustomization.yaml index 1d7c8c0..e9d2c98 100644 --- a/services/postgres/kustomization.yaml +++ b/services/postgres/kustomization.yaml @@ -4,5 +4,7 @@ kind: Kustomization namespace: postgres resources: - namespace.yaml + - serviceaccount.yaml + - secretproviderclass.yaml - service.yaml - statefulset.yaml diff --git a/services/postgres/secretproviderclass.yaml b/services/postgres/secretproviderclass.yaml new file mode 100644 index 0000000..31d247e --- /dev/null +++ b/services/postgres/secretproviderclass.yaml @@ -0,0 +1,15 @@ +# services/postgres/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: postgres-vault + namespace: postgres +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "postgres" + objects: | + - objectName: "postgres_password" + secretPath: "kv/data/postgres" + secretKey: "POSTGRES_PASSWORD" diff --git a/services/postgres/serviceaccount.yaml b/services/postgres/serviceaccount.yaml new file mode 100644 index 0000000..0c3db0c --- /dev/null +++ b/services/postgres/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/postgres/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: postgres-vault + namespace: postgres diff --git a/services/postgres/statefulset.yaml b/services/postgres/statefulset.yaml index 014567b..aa96003 100644 --- a/services/postgres/statefulset.yaml +++ b/services/postgres/statefulset.yaml @@ -22,6 +22,7 @@ spec: labels: app: postgres spec: + serviceAccountName: postgres-vault nodeSelector: node-role.kubernetes.io/worker: "true" affinity: @@ -47,16 +48,23 @@ spec: value: /var/lib/postgresql/data/pgdata - name: POSTGRES_USER value: postgres - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: postgres-auth - key: POSTGRES_PASSWORD + - name: POSTGRES_PASSWORD_FILE + value: /mnt/vault/postgres_password - name: POSTGRES_DB value: postgres volumeMounts: - name: postgres-data mountPath: /var/lib/postgresql/data + - name: vault-secrets + mountPath: /mnt/vault + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: postgres-vault volumeClaimTemplates: - metadata: name: postgres-data -- 2.47.2 From 073b44e0c351a3d52f15ca484c9d1384dbca54f2 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 13 Jan 2026 12:47:41 -0300 Subject: [PATCH 683/684] gitea: auto-link oidc accounts --- services/gitea/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/gitea/deployment.yaml b/services/gitea/deployment.yaml index d17a007..ed2cd63 100644 --- a/services/gitea/deployment.yaml +++ b/services/gitea/deployment.yaml @@ -125,6 +125,8 @@ spec: value: "true" - name: GITEA__oauth2_client__ENABLE_AUTO_REGISTRATION value: "true" + - name: GITEA__oauth2_client__ACCOUNT_LINKING + value: "auto" - name: GITEA__service__ALLOW_ONLY_EXTERNAL_REGISTRATION value: "true" - name: GITEA__service__DISABLE_REGISTRATION -- 2.47.2 From b09100e787ea7d430ff46304c223f78468445598 Mon Sep 17 00:00:00 2001 From: flux-bot Date: Tue, 13 Jan 2026 15:57:24 +0000 Subject: [PATCH 684/684] chore(bstein-dev-home): automated image update --- services/bstein-dev-home/kustomization.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index ae8cf4b..81220e8 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -16,13 +16,11 @@ resources: - vaultwarden-cred-sync-cronjob.yaml - portal-onboarding-e2e-test-job.yaml - ingress.yaml - images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} - + newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} configMapGenerator: - name: chat-ai-gateway namespace: bstein-dev-home -- 2.47.2