From b1f8981b6c7ce1162b7a00e980e9d7711b7cfc47 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:02:21 -0300 Subject: [PATCH 01/18] monero ingress + move pegasus to arm64 --- services/crypto/monerod/ingress.yaml | 25 ++++++++++++++++++++++ services/crypto/monerod/kustomization.yaml | 1 + services/pegasus/deployment.yaml | 3 ++- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 services/crypto/monerod/ingress.yaml diff --git a/services/crypto/monerod/ingress.yaml b/services/crypto/monerod/ingress.yaml new file mode 100644 index 0000000..1ac06c7 --- /dev/null +++ b/services/crypto/monerod/ingress.yaml @@ -0,0 +1,25 @@ +# services/crypto/monerod/ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: monerod + namespace: crypto + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: [ "monero.bstein.dev" ] + secretName: monero-bstein-dev-tls + rules: + - host: monero.bstein.dev + http: + paths: + - pathType: Prefix + path: / + backend: + service: + name: monerod + port: { number: 18081 } diff --git a/services/crypto/monerod/kustomization.yaml b/services/crypto/monerod/kustomization.yaml index 73b6cf6..d6c1b19 100644 --- a/services/crypto/monerod/kustomization.yaml +++ b/services/crypto/monerod/kustomization.yaml @@ -6,3 +6,4 @@ resources: - cm-release-keys.yaml - deployment.yaml - service.yaml + - ingress.yaml diff --git a/services/pegasus/deployment.yaml b/services/pegasus/deployment.yaml index 3a10dfe..34270b0 100644 --- a/services/pegasus/deployment.yaml +++ b/services/pegasus/deployment.yaml @@ -16,7 +16,8 @@ spec: metadata: { labels: { app: pegasus } } spec: nodeSelector: - kubernetes.io/arch: amd64 + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" securityContext: runAsNonRoot: true runAsUser: 65532 From 85cc80525c4df63f285ce0f72c2681a996444372 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:19:59 -0300 Subject: [PATCH 02/18] point flux to feature/bstein-dev-home branch --- clusters/atlas/flux-system/gotk-sync.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 006bdd3..5318f12 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -8,7 +8,7 @@ metadata: spec: interval: 1m0s ref: - branch: feature/ci-gitops + branch: feature/bstein-dev-home secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git From 8e977f162cf6c6d5f87d5eaa9c00149d97df3164 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:50:31 -0300 Subject: [PATCH 03/18] Jenkins: add dark theme via simple-theme-plugin --- services/jenkins/helmrelease.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index c94c7fb..34d5a01 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -50,6 +50,7 @@ spec: - oic-auth - job-dsl - configuration-as-code-support + - simple-theme-plugin containerEnv: - name: ENABLE_OIDC value: "true" @@ -287,6 +288,11 @@ spec: } } } + theme.yaml: | + unclassified: + simple-theme-plugin: + elements: + - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" pipelineJob('bstein-dev-home') { triggers { scm('H/2 * * * *') From 6d8dbfc214f4e872186b97e00c48118352b598c0 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 02:55:25 -0300 Subject: [PATCH 04/18] Fix Jenkins JCasC theme/job blocks --- services/jenkins/helmrelease.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 34d5a01..a6d1f4a 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -288,11 +288,6 @@ spec: } } } - theme.yaml: | - unclassified: - simple-theme-plugin: - elements: - - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" pipelineJob('bstein-dev-home') { triggers { scm('H/2 * * * *') @@ -312,6 +307,11 @@ spec: } } } + theme: | + unclassified: + simple-theme-plugin: + elements: + - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" persistence: enabled: true storageClass: astreae From ac41f612408de7d858104038b5fbbcefe585f950 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 03:01:58 -0300 Subject: [PATCH 05/18] Jenkins theme: merge into base config to avoid JCasC conflict --- services/jenkins/helmrelease.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index a6d1f4a..a12e9a9 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -237,6 +237,9 @@ spec: tokenGenerationOnCreationEnabled: false usageStatisticsEnabled: true unclassified: + simple-theme-plugin: + elements: + - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" creds.yaml: | credentials: system: @@ -307,11 +310,6 @@ spec: } } } - theme: | - unclassified: - simple-theme-plugin: - elements: - - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" persistence: enabled: true storageClass: astreae From 5531ec045d0d0dbd16a8670c597d05a9b8875251 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 03:12:22 -0300 Subject: [PATCH 06/18] Jenkins: add startupProbe delay to avoid premature restarts --- services/jenkins/helmrelease.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index a12e9a9..74587db 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -30,6 +30,14 @@ spec: controller: nodeSelector: hardware: rpi4 + startupProbe: + httpGet: + path: /login + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 20 jenkinsUrl: https://ci.bstein.dev ingress: enabled: true From b7e45457345f6c8eaeb3d773fca8208776e2695a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 10:41:37 -0300 Subject: [PATCH 07/18] Apply Jenkins theme via init script --- services/jenkins/helmrelease.yaml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 74587db..a5c2af3 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -170,6 +170,23 @@ spec: println("Failed to configure OIDC realm: ${e}") throw e } + theme.groovy: | + import jenkins.model.Jenkins + import org.codefirst.SimpleThemeDecorator + + def instance = Jenkins.get() + def decorators = instance.getExtensionList(SimpleThemeDecorator.class) + + if (decorators?.size() > 0) { + def theme = decorators[0] + theme.setCssUrl("https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css") + theme.setJsUrl("") + theme.setTheme("") + instance.save() + println("Applied simple-theme-plugin dark theme") + } else { + println("simple-theme-plugin not installed; skipping theme configuration") + } JCasC: defaultConfig: false securityRealm: "" @@ -244,10 +261,6 @@ spec: creationOfLegacyTokenEnabled: false tokenGenerationOnCreationEnabled: false usageStatisticsEnabled: true - unclassified: - simple-theme-plugin: - elements: - - cssUrl: "https://jenkins-contrib-themes.github.io/jenkins-material-theme/dist/material-ocean.css" creds.yaml: | credentials: system: From 25ec2b03541b9dc34e07ee2d4de625b4131012a3 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 10:56:43 -0300 Subject: [PATCH 08/18] jenkins: prefer rpi5 and bump controller resources --- services/jenkins/helmrelease.yaml | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index a5c2af3..343c67b 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -29,7 +29,36 @@ spec: values: controller: nodeSelector: - hardware: rpi4 + kubernetes.io/arch: arm64 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 90 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + resources: + requests: + cpu: 750m + memory: 1.5Gi + limits: + cpu: 1500m + memory: 3Gi + javaOpts: "-Xms512m -Xmx2048m" startupProbe: httpGet: path: /login From f3c96b7f8d81a51c68959dd28bffee55b1c78a90 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:01:08 -0300 Subject: [PATCH 09/18] Add pipeline utility steps plugin --- services/jenkins/helmrelease.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 343c67b..6b3b38e 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -83,6 +83,7 @@ spec: - kubernetes - workflow-aggregator - git + - pipeline-utility-steps - configuration-as-code - oic-auth - job-dsl From b6bea73ab798b307eb4aa4e70704c49ff88a4991 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:47:40 -0300 Subject: [PATCH 10/18] Align bstein-dev-home image automation with current branch --- .../applications/bstein-dev-home/image-automation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index 889aa2d..6245fb0 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -13,14 +13,14 @@ spec: git: checkout: ref: - branch: feature/ci-gitops + branch: feature/bstein-dev-home commit: author: email: ops@bstein.dev name: flux-bot messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" push: - branch: feature/ci-gitops + branch: feature/bstein-dev-home update: strategy: Setters path: services/bstein-dev-home From d8863d84de6eda1d010947764e593bb8dad8d659 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:53:46 -0300 Subject: [PATCH 11/18] Allow prerelease semver tags for dev-home images --- services/bstein-dev-home/image.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/image.yaml b/services/bstein-dev-home/image.yaml index 4462ba5..ba2e050 100644 --- a/services/bstein-dev-home/image.yaml +++ b/services/bstein-dev-home/image.yaml @@ -21,7 +21,7 @@ spec: extract: '$version' policy: semver: - range: ">=0.1.0" + range: ">=0.1.0-0" --- apiVersion: image.toolkit.fluxcd.io/v1beta2 kind: ImageRepository @@ -45,4 +45,4 @@ spec: extract: '$version' policy: semver: - range: ">=0.1.0" + range: ">=0.1.0-0" From d4b64067261ae4655cfd2fd363c56aee5eceab72 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:56:02 -0300 Subject: [PATCH 12/18] Loosen image tag regex for dev-home policies --- services/bstein-dev-home/image.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/image.yaml b/services/bstein-dev-home/image.yaml index ba2e050..64f399d 100644 --- a/services/bstein-dev-home/image.yaml +++ b/services/bstein-dev-home/image.yaml @@ -17,7 +17,7 @@ spec: imageRepositoryRef: name: bstein-dev-home-frontend filterTags: - pattern: '^v?(?P[0-9]+\\.[0-9]+\\.[0-9]+(?:[-.][0-9A-Za-z]+)?)$' + pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' extract: '$version' policy: semver: @@ -41,7 +41,7 @@ spec: imageRepositoryRef: name: bstein-dev-home-backend filterTags: - pattern: '^v?(?P[0-9]+\\.[0-9]+\\.[0-9]+(?:[-.][0-9A-Za-z]+)?)$' + pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' extract: '$version' policy: semver: From eac965b2836fa88b1967d7d82858d3125aab8042 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 11:59:29 -0300 Subject: [PATCH 13/18] Let ImagePolicies rely on semver parsing without regex filters --- services/bstein-dev-home/image.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/services/bstein-dev-home/image.yaml b/services/bstein-dev-home/image.yaml index 64f399d..3b6c757 100644 --- a/services/bstein-dev-home/image.yaml +++ b/services/bstein-dev-home/image.yaml @@ -16,9 +16,6 @@ metadata: spec: imageRepositoryRef: name: bstein-dev-home-frontend - filterTags: - pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' - extract: '$version' policy: semver: range: ">=0.1.0-0" @@ -40,9 +37,6 @@ metadata: spec: imageRepositoryRef: name: bstein-dev-home-backend - filterTags: - pattern: '^(?P[0-9]+\\.[0-9]+\\.[0-9]+.*)$' - extract: '$version' policy: semver: range: ">=0.1.0-0" From 5b113d798b411a4f506e41903dcd5b198c005f5b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 12:03:01 -0300 Subject: [PATCH 14/18] Pin dev-home deploys to current semver tag --- services/bstein-dev-home/kustomization.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index dd4d6e7..3268f6d 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -12,6 +12,6 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: 0.1.0-11 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: latest # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: 0.1.0-11 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} From 6aadbadb9aae80ccd8c014298c04079523f2ff16 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 18 Dec 2025 12:14:07 -0300 Subject: [PATCH 15/18] Work around occasional OIDC DNS failures with hostAlias --- services/jenkins/helmrelease.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/jenkins/helmrelease.yaml b/services/jenkins/helmrelease.yaml index 6b3b38e..4cdede0 100644 --- a/services/jenkins/helmrelease.yaml +++ b/services/jenkins/helmrelease.yaml @@ -79,6 +79,10 @@ spec: - secretName: jenkins-tls hosts: - ci.bstein.dev + hostAliases: + - ip: 38.28.125.112 + hostnames: + - sso.bstein.dev installPlugins: - kubernetes - workflow-aggregator From 8be89cbd531f58512fb2b85c93b5475b402cf889 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 13:46:34 -0300 Subject: [PATCH 16/18] Expand Atlas availability window to 1y --- scripts/dashboards_render_atlas.py | 4 ++-- services/monitoring/dashboards/atlas-overview.json | 4 ++-- services/monitoring/grafana-dashboard-overview.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index a14f396..54a33ee 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -204,7 +204,7 @@ STUCK_TERMINATING_EXPR = ( ')) ' "or on() vector(0)" ) -UPTIME_WINDOW = "30d" +UPTIME_WINDOW = "365d" TRAEFIK_READY_EXPR = ( "(" 'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})' @@ -735,7 +735,7 @@ def build_overview(): }, { "id": 27, - "title": "Atlas Availability (30d)", + "title": "Atlas Availability", "expr": UPTIME_PERCENT_EXPR, "kind": "stat", "thresholds": UPTIME_PERCENT_THRESHOLDS, diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 9088ea1..46ced4d 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -213,7 +213,7 @@ { "id": 27, "type": "stat", - "title": "Atlas Availability (30d)", + "title": "Atlas Availability", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -226,7 +226,7 @@ }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 5fe8b6f..3f2a3ed 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -222,7 +222,7 @@ data: { "id": 27, "type": "stat", - "title": "Atlas Availability (30d)", + "title": "Atlas Availability", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -235,7 +235,7 @@ data: }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", "refId": "A" } ], From 89f95157d8cbc21e5fc98a7ba9079eed8aee7756 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 14:56:29 -0300 Subject: [PATCH 17/18] Reduce Atlas availability query density --- scripts/dashboards_render_atlas.py | 4 +++- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 54a33ee..882cdde 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -205,6 +205,8 @@ STUCK_TERMINATING_EXPR = ( "or on() vector(0)" ) UPTIME_WINDOW = "365d" +# Keep the subquery step coarse so we don't request an excessive number of points. +UPTIME_STEP = "1h" TRAEFIK_READY_EXPR = ( "(" 'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})' @@ -225,7 +227,7 @@ NODE_TIEBREAKER = " + ".join( f"({node_filter(node)}) * 1e-6 * {idx}" for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1) ) -UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:5m])" +UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:{UPTIME_STEP}])" UPTIME_PERCENT_EXPR = UPTIME_AVG_EXPR UPTIME_NINES_EXPR = f"-log10(1 - clamp_max({UPTIME_AVG_EXPR}, 0.999999999))" UPTIME_THRESHOLDS = { diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 46ced4d..96c271b 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -226,7 +226,7 @@ }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 3f2a3ed..7df2f3e 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -235,7 +235,7 @@ data: }, "targets": [ { - "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])", "refId": "A" } ], From a2b34c571280784ce74e12e7a2e8018b0b3bdffc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 19 Dec 2025 15:18:14 -0300 Subject: [PATCH 18/18] Increase Atlas availability stat to 4 decimals --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-overview.json | 2 +- services/monitoring/grafana-dashboard-overview.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 882cdde..7ad117b 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -742,7 +742,7 @@ def build_overview(): "kind": "stat", "thresholds": UPTIME_PERCENT_THRESHOLDS, "unit": "percentunit", - "decimals": 3, + "decimals": 4, "text_mode": "value", }, { diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 96c271b..70062e0 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -265,7 +265,7 @@ "custom": { "displayMode": "auto" }, - "decimals": 3 + "decimals": 4 }, "overrides": [] }, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 7df2f3e..cfd2cd6 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -274,7 +274,7 @@ data: "custom": { "displayMode": "auto" }, - "decimals": 3 + "decimals": 4 }, "overrides": [] },