From f3bed1938beaf94c8abc184c80c91cfb5a059934 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:02:21 -0300 Subject: [PATCH 01/18] monero ingress + move pegasus to arm64 --- services/crypto/monerod/ingress.yaml | 25 ++++++++++++++++++++++ services/crypto/monerod/kustomization.yaml | 1 + services/pegasus/deployment.yaml | 3 ++- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 services/crypto/monerod/ingress.yaml diff --git a/services/crypto/monerod/ingress.yaml b/services/crypto/monerod/ingress.yaml new file mode 100644 index 00000000..1ac06c7d --- /dev/null +++ b/services/crypto/monerod/ingress.yaml @@ -0,0 +1,25 @@ +# services/crypto/monerod/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: monerod + namespace: crypto + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: [ "monero.bstein.dev" ] + secretName: monero-bstein-dev-tls + rules: + - host: monero.bstein.dev + http: + paths: + - pathType: Prefix + path: / + backend: + service: + name: monerod + port: { number: 18081 } diff --git a/services/crypto/monerod/kustomization.yaml b/services/crypto/monerod/kustomization.yaml index 73b6cf6b..d6c1b19a 100644 --- a/services/crypto/monerod/kustomization.yaml +++ b/services/crypto/monerod/kustomization.yaml @@ -6,3 +6,4 @@ resources: - cm-release-keys.yaml - deployment.yaml - service.yaml + - ingress.yaml diff --git a/services/pegasus/deployment.yaml b/services/pegasus/deployment.yaml index 3a10dfe0..34270b0f 100644 --- a/services/pegasus/deployment.yaml +++ b/services/pegasus/deployment.yaml @@ -16,7 +16,8 @@ spec: metadata: { labels: { app: pegasus } } spec: nodeSelector: - kubernetes.io/arch: amd64 + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" securityContext: runAsNonRoot: true runAsUser: 65532 From 21d77d000dcca03ed2af8a3e75ccd3e5cf9e8538 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:19:59 -0300 Subject: [PATCH 02/18] point flux to feature/bstein-dev-home branch --- clusters/atlas/flux-system/gotk-sync.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 006bdd31..5318f120 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -8,7 +8,7 @@ metadata: spec: interval: 1m0s ref: - branch: feature/ci-gitops + branch: feature/bstein-dev-home secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git From c26c23c1d527130850021782ea984eb95c06997e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:50:31 -0300 Subject: [PATCH 03/18] Jenkins: add dark theme via simple-theme-plugin --- services/jenkins/helmrelease.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index c94c7fbd..34d5a016 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -50,6 +50,7 @@ spec: - oic-auth - job-dsl - configuration-as-code-support + - simple-theme-plugin containerEnv: - name: ENABLE_OIDC value: "true" @@ -287,6 +288,11 @@ spec: } } } + theme.yaml: | + unclassified: + simple-theme-plugin: + elements: + - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" pipelineJob('bstein-dev-home') { triggers { scm('H/2 * * * *') From d7b77d82e9e4d1679f5586042a119ec9fad6732d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:55:25 -0300 Subject: [PATCH 04/18] Fix Jenkins JCasC theme/job blocks --- services/jenkins/helmrelease.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 34d5a016..a6d1f4a5 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -288,11 +288,6 @@ spec: } } } - theme.yaml: | - unclassified: - simple-theme-plugin: - elements: - - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" pipelineJob('bstein-dev-home') { triggers { scm('H/2 * * * *') @@ -312,6 +307,11 @@ spec: } } } + theme: | + unclassified: + simple-theme-plugin: + elements: + - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" persistence: enabled: true storageClass: astreae From 9ed31c3927864d697ad3ff8315f565aa4cc73552 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 03:01:58 -0300 Subject: [PATCH 05/18] Jenkins theme: merge into base config to avoid JCasC conflict --- services/jenkins/helmrelease.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index a6d1f4a5..a12e9a93 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -237,6 +237,9 @@ spec: tokenGenerationOnCreationEnabled: false usageStatisticsEnabled: true unclassified: + simple-theme-plugin: + elements: + - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" creds.yaml: | credentials: system: @@ -307,11 +310,6 @@ spec: } } } - theme: | - unclassified: - simple-theme-plugin: - elements: - - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" persistence: enabled: true storageClass: astreae From 14c1535ad10d79be3ecdd0175f1cc0292485f460 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 03:12:22 -0300 Subject: [PATCH 06/18] Jenkins: add startupProbe delay to avoid premature restarts --- services/jenkins/helmrelease.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index a12e9a93..74587db3 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -30,6 +30,14 @@ spec: controller: nodeSelector: hardware: rpi4 + startupProbe: + httpGet: + path: /login + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 20 jenkinsUrl: https://ci.bstein.dev ingress: enabled: true From 9320e776f0f7bd9014f24c3f213deb100d468c2f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 10:41:37 -0300 Subject: [PATCH 07/18] Apply Jenkins theme via init script --- services/jenkins/helmrelease.yaml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 74587db3..a5c2af3f 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -170,6 +170,23 @@ spec: println("Failed to configure OIDC realm: ${e}") throw e } + theme.groovy: | + import jenkins.model.Jenkins + import org.codefirst.SimpleThemeDecorator + + def instance = Jenkins.get() + def decorators = instance.getExtensionList(SimpleThemeDecorator.class) + + if (decorators?.size() > 0) { + def theme = decorators[0] + theme.setCssUrl("https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css") + theme.setJsUrl("") + theme.setTheme("") + instance.save() + println("Applied simple-theme-plugin dark theme") + } else { + println("simple-theme-plugin not installed; skipping theme configuration") + } JCasC: defaultConfig: false securityRealm: "" @@ -244,10 +261,6 @@ spec: creationOfLegacyTokenEnabled: false tokenGenerationOnCreationEnabled: false usageStatisticsEnabled: true - unclassified: - simple-theme-plugin: - elements: - - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" creds.yaml: | credentials: system: From e8c6d3635d0a546907b54d18288c05f72fef5b25 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 10:56:43 -0300 Subject: [PATCH 08/18] jenkins: prefer rpi5 and bump controller resources --- services/jenkins/helmrelease.yaml | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index a5c2af3f..343c67b5 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -29,7 +29,36 @@ spec: values: controller: nodeSelector: - hardware: rpi4 + kubernetes.io/arch: arm64 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + resources: + requests: + cpu: 750m + memory: 1.5Gi + limits: + cpu: 1500m + memory: 3Gi + javaOpts: "-Xms512m -Xmx2048m" startupProbe: httpGet: path: /login From 79a5d1f23e432fed046fe0333919e8c8b4650114 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:01:08 -0300 Subject: [PATCH 09/18] Add pipeline utility steps plugin --- services/jenkins/helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 343c67b5..6b3b38ec 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -83,6 +83,7 @@ spec: - kubernetes - workflow-aggregator - git + - pipeline-utility-steps - configuration-as-code - oic-auth - job-dsl From 09f4494c82c20f3731cb95d7a6df8dd09d4373fa Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:47:40 -0300 Subject: [PATCH 10/18] Align bstein-dev-home image automation with current branch --- .../applications/bstein-dev-home/image-automation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index 889aa2d6..6245fb0d 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -13,14 +13,14 @@ spec: git: checkout: ref: - branch: feature/ci-gitops + branch: feature/bstein-dev-home commit: author: email: ops@bstein.dev name: flux-bot messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" push: - branch: feature/ci-gitops + branch: feature/bstein-dev-home update: strategy: Setters path: services/bstein-dev-home From 83c782796c8a379a4d759402193935a309ef32b3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:53:46 -0300 Subject: [PATCH 11/18] Allow prerelease semver tags for dev-home images --- services/bstein-dev-home/image.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/image.yaml b/services/bstein-dev-home/image.yaml index 4462ba57..ba2e050a 100644 --- a/services/bstein-dev-home/image.yaml +++ b/services/bstein-dev-home/image.yaml @@ -21,7 +21,7 @@ spec: extract: '$version' policy: semver: - range: ">=0.1.0" + range: ">=0.1.0-0" --- apiVersion: image.toolkit.fluxcd.io/v1beta2 kind: ImageRepository @@ -45,4 +45,4 @@ spec: extract: '$version' policy: semver: - range: ">=0.1.0" + range: ">=0.1.0-0" From bb27caa3766f6a8d3b4d1785e58768c34289b62f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:56:02 -0300 Subject: [PATCH 12/18] Loosen image tag regex for dev-home policies --- services/bstein-dev-home/image.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/image.yaml b/services/bstein-dev-home/image.yaml index ba2e050a..64f399de 100644 --- a/services/bstein-dev-home/image.yaml +++ b/services/bstein-dev-home/image.yaml @@ -17,7 +17,7 @@ spec: imageRepositoryRef: name: bstein-dev-home-frontend filterTags: - pattern: '^v?(?P[0-9]+\\.[0-9]+\\.[0-9]+(?:[-.][0-9A-Za-z]+)?)$' + pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' extract: '$version' policy: semver: @@ -41,7 +41,7 @@ spec: imageRepositoryRef: name: bstein-dev-home-backend filterTags: - pattern: '^v?(?P[0-9]+\\.[0-9]+\\.[0-9]+(?:[-.][0-9A-Za-z]+)?)$' + pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' extract: '$version' policy: semver: From 7eb1839b10c35cf986da65d7c8985e17ebf69adb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:59:29 -0300 Subject: [PATCH 13/18] Let ImagePolicies rely on semver parsing without regex filters --- services/bstein-dev-home/image.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/services/bstein-dev-home/image.yaml b/services/bstein-dev-home/image.yaml index 64f399de..3b6c7579 100644 --- a/services/bstein-dev-home/image.yaml +++ b/services/bstein-dev-home/image.yaml @@ -16,9 +16,6 @@ metadata: spec: imageRepositoryRef: name: bstein-dev-home-frontend - filterTags: - pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' - extract: '$version' policy: semver: range: ">=0.1.0-0" @@ -40,9 +37,6 @@ metadata: spec: imageRepositoryRef: name: bstein-dev-home-backend - filterTags: - pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' - extract: '$version' policy: semver: range: ">=0.1.0-0" From 926eb272dcf806377475a4d27927fa38ba84cf1f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 12:03:01 -0300 Subject: [PATCH 14/18] Pin dev-home deploys to current semver tag --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index dd4d6e7c..3268f6d5 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: 0.1.0-11 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: 0.1.0-11 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} From 403a652e2ba9a4e25df2c68b51ca81c35849dd55 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 12:14:07 -0300 Subject: [PATCH 15/18] Work around occasional OIDC DNS failures with hostAlias --- services/jenkins/helmrelease.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 6b3b38ec..4cdede0c 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -79,6 +79,10 @@ spec: - secretName: jenkins-tls hosts: - ci.bstein.dev + hostAliases: + - ip: 38.28.125.112 + hostnames: + - sso.bstein.dev installPlugins: - kubernetes - workflow-aggregator From 2f6988189bd19fb70e7a04e6dbb4f9d5cf0a66dc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 13:46:34 -0300 Subject: [PATCH 16/18] Expand Atlas availability window to 1y --- scripts/dashboards_render_atlas.py | 4 ++-- services/monitoring/dashboards/atlas-overview.json | 4 ++-- services/monitoring/grafana-dashboard-overview.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index a14f3967..54a33ee7 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -204,7 +204,7 @@ STUCK_TERMINATING_EXPR = ( ')) ' "or on() vector(0)" ) -UPTIME_WINDOW = "30d" +UPTIME_WINDOW = "365d" TRAEFIK_READY_EXPR = ( "(" 'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})' @@ -735,7 +735,7 @@ def build_overview(): }, { "id": 27, - "title": "Atlas Availability (30d)", + "title": "Atlas Availability", "expr": UPTIME_PERCENT_EXPR, "kind": "stat", "thresholds": UPTIME_PERCENT_THRESHOLDS, diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 9088ea1d..46ced4de 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -213,7 +213,7 @@ { "id": 27, "type": "stat", - "title": "Atlas Availability (30d)", + "title": "Atlas Availability", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -226,7 +226,7 @@ }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 5fe8b6fc..3f2a3ede 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -222,7 +222,7 @@ data: { "id": 27, "type": "stat", - "title": "Atlas Availability (30d)", + "title": "Atlas Availability", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -235,7 +235,7 @@ data: }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", "refId": "A" } ], From 2ab38d620596180923648f014c45fb0d515f8690 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 14:56:29 -0300 Subject: [PATCH 17/18] Reduce Atlas availability query density --- scripts/dashboards_render_atlas.py | 4 +++- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 54a33ee7..882cdde2 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -205,6 +205,8 @@ STUCK_TERMINATING_EXPR = ( "or on() vector(0)" ) UPTIME_WINDOW = "365d" +# Keep the subquery step coarse so we don't request an excessive number of points. +UPTIME_STEP = "1h" TRAEFIK_READY_EXPR = ( "(" 'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})' @@ -225,7 +227,7 @@ NODE_TIEBREAKER = " + ".join( f"({node_filter(node)}) * 1e-6 * {idx}" for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1) ) -UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:5m])" +UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:{UPTIME_STEP}])" UPTIME_PERCENT_EXPR = UPTIME_AVG_EXPR UPTIME_NINES_EXPR = f"-log10(1 - clamp_max({UPTIME_AVG_EXPR}, 0.999999999))" UPTIME_THRESHOLDS = { diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 46ced4de..96c271b2 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -226,7 +226,7 @@ }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 3f2a3ede..7df2f3e2 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -235,7 +235,7 @@ data: }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])", "refId": "A" } ], From 1b57ea7adb1ace9b86c0582798463ff2b62ac55f Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 15:18:14 -0300 Subject: [PATCH 18/18] Increase Atlas availability stat to 4 decimals --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 882cdde2..7ad117b9 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -742,7 +742,7 @@ def build_overview(): "kind": "stat", "thresholds": UPTIME_PERCENT_THRESHOLDS, "unit": "percentunit", - "decimals": 3, + "decimals": 4, "text_mode": "value", }, { diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 96c271b2..70062e09 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -265,7 +265,7 @@ "custom": { "displayMode": "auto" }, - "decimals": 3 + "decimals": 4 }, "overrides": [] }, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 7df2f3e2..cfd2cd6e 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -274,7 +274,7 @@ data: "custom": { "displayMode": "auto" }, - "decimals": 3 + "decimals": 4 }, "overrides": [] },