Compare commits

..

No commits in common. "main" and "fea/titan24-gpu" have entirely different histories.

634 changed files with 4360 additions and 81663 deletions

10
.gitignore vendored
View File

@ -1,10 +0,0 @@
*.md
!README.md
!knowledge/**/*.md
!services/comms/knowledge/**/*.md
__pycache__/
*.py[cod]
.pytest_cache
.venv
.venv-ci
tmp/

77
Jenkinsfile vendored
View File

@ -1,77 +0,0 @@
// Mirror of ci/Jenkinsfile.titan-iac for multibranch discovery.
pipeline {
agent {
kubernetes {
defaultContainer 'python'
yaml """
apiVersion: v1
kind: Pod
spec:
nodeSelector:
hardware: rpi5
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: python
image: python:3.12-slim
command:
- cat
tty: true
"""
}
}
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
}
stages {
stage('Checkout') {
steps {
checkout scm
}
}
stage('Install deps') {
steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Glue tests') {
steps {
sh 'pytest -q ci/tests/glue'
}
}
stage('Resolve Flux branch') {
steps {
script {
env.FLUX_BRANCH = sh(
returnStdout: true,
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
).trim()
if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml')
}
echo "Flux branch: ${env.FLUX_BRANCH}"
}
}
}
stage('Promote') {
when {
expression {
def branch = env.BRANCH_NAME ?: (env.GIT_BRANCH ?: '').replaceFirst('origin/', '')
return env.FLUX_BRANCH && branch == env.FLUX_BRANCH
}
}
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
}
}
}
}
}

View File

@ -1,3 +0,0 @@
# titan-iac
Flux-managed Kubernetes cluster for bstein.dev services.

View File

@ -1,76 +0,0 @@
pipeline {
agent {
kubernetes {
defaultContainer 'python'
yaml """
apiVersion: v1
kind: Pod
spec:
nodeSelector:
hardware: rpi5
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: python
image: python:3.12-slim
command:
- cat
tty: true
"""
}
}
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
}
stages {
stage('Checkout') {
steps {
checkout scm
}
}
stage('Install deps') {
steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Glue tests') {
steps {
sh 'pytest -q ci/tests/glue'
}
}
stage('Resolve Flux branch') {
steps {
script {
env.FLUX_BRANCH = sh(
returnStdout: true,
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
).trim()
if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml')
}
echo "Flux branch: ${env.FLUX_BRANCH}"
}
}
}
stage('Promote') {
when {
expression {
def branch = env.BRANCH_NAME ?: (env.GIT_BRANCH ?: '').replaceFirst('origin/', '')
return env.FLUX_BRANCH && branch == env.FLUX_BRANCH
}
}
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
}
}
}
}
}

View File

@ -1,4 +0,0 @@
pytest==8.3.4
kubernetes==30.1.0
PyYAML==6.0.2
requests==2.32.3

View File

@ -1,16 +0,0 @@
max_success_age_hours: 48
allow_suspended:
- bstein-dev-home/vaultwarden-cred-sync
- comms/othrys-room-reset
- comms/pin-othrys-invite
- comms/seed-othrys-room
- finance/firefly-user-sync
- health/wger-admin-ensure
- health/wger-user-sync
- mailu-mailserver/mailu-sync-nightly
- nextcloud/nextcloud-mail-sync
ariadne_schedule_tasks:
- schedule.mailu_sync
- schedule.nextcloud_sync
- schedule.vaultwarden_sync
- schedule.wger_admin

View File

@ -1,46 +0,0 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
import yaml
from kubernetes import client, config
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _load_kube():
try:
config.load_incluster_config()
except config.ConfigException:
config.load_kube_config()
def test_glue_cronjobs_recent_success():
cfg = _load_config()
max_age_hours = int(cfg.get("max_success_age_hours", 48))
allow_suspended = set(cfg.get("allow_suspended", []))
_load_kube()
batch = client.BatchV1Api()
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
now = datetime.now(timezone.utc)
for cronjob in cronjobs:
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
if cronjob.spec.suspend:
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
continue
last_success = cronjob.status.last_successful_time
assert last_success is not None, f"{name} has no lastSuccessfulTime"
age_hours = (now - last_success).total_seconds() / 3600
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"

View File

@ -1,48 +0,0 @@
from __future__ import annotations
import os
from pathlib import Path
import requests
import yaml
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _query(promql: str) -> list[dict]:
response = requests.get(f"{VM_URL}/api/v1/query", params={"query": promql}, timeout=10)
response.raise_for_status()
payload = response.json()
return payload.get("data", {}).get("result", [])
def test_glue_metrics_present():
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
assert series, "No glue cronjob label series found"
def test_glue_metrics_success_join():
query = (
"kube_cronjob_status_last_successful_time "
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
)
series = _query(query)
assert series, "No glue cronjob last success series found"
def test_ariadne_schedule_metrics_present():
cfg = _load_config()
expected = cfg.get("ariadne_schedule_tasks", [])
if not expected:
return
series = _query("ariadne_schedule_next_run_timestamp_seconds")
tasks = {item.get("metric", {}).get("task") for item in series}
missing = [task for task in expected if task not in tasks]
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"

View File

@ -1,17 +0,0 @@
# clusters/atlas/flux-system/applications/bstein-dev-home-migrations/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: bstein-dev-home-migrations
namespace: flux-system
spec:
interval: 10m
path: ./services/bstein-dev-home/oneoffs/migrations
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: bstein-dev-home
wait: false
suspend: true

View File

@ -1,26 +0,0 @@
# clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: bstein-dev-home
namespace: bstein-dev-home
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/ariadne
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(bstein-dev-home): automated image update"
push:
branch: feature/ariadne
update:
strategy: Setters
path: services/bstein-dev-home

View File

@ -1,15 +0,0 @@
# clusters/atlas/flux-system/applications/bstein-dev-home/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: bstein-dev-home
namespace: flux-system
spec:
interval: 10m
path: ./services/bstein-dev-home
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: bstein-dev-home
wait: false

View File

@ -1,17 +0,0 @@
# clusters/atlas/flux-system/applications/comms/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: comms
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/comms
targetNamespace: comms
timeout: 2m
dependsOn:
- name: traefik

View File

@ -1,24 +0,0 @@
# clusters/atlas/flux-system/applications/finance/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: finance
namespace: flux-system
spec:
interval: 10m
path: ./services/finance
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: finance
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: actual-budget
namespace: finance
- apiVersion: apps/v1
kind: Deployment
name: firefly
namespace: finance
wait: false

View File

@ -1,27 +0,0 @@
# clusters/atlas/flux-system/applications/harbor/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: harbor
namespace: harbor
spec:
suspend: true
interval: 5m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/ci-gitops
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(harbor): apply image updates"
push:
branch: feature/ci-gitops
update:
strategy: Setters
path: ./services/harbor

View File

@ -1,25 +0,0 @@
# clusters/atlas/flux-system/applications/health/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: health
namespace: flux-system
spec:
interval: 10m
path: ./services/health
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: health
dependsOn:
- name: keycloak
- name: postgres
- name: traefik
- name: vault
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: wger
namespace: health
wait: false

View File

@ -1,27 +0,0 @@
# clusters/atlas/flux-system/applications/jenkins/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: jenkins
namespace: flux-system
spec:
interval: 10m
path: ./services/jenkins
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: jenkins
dependsOn:
- name: helm
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: jenkins
namespace: jenkins
- apiVersion: v1
kind: Service
name: jenkins
namespace: jenkins
wait: false

View File

@ -1,15 +0,0 @@
# clusters/atlas/flux-system/applications/keycloak/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: keycloak
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/keycloak
targetNamespace: sso
timeout: 2m

View File

@ -1,33 +0,0 @@
# clusters/atlas/flux-system/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gitea/kustomization.yaml
- vault/kustomization.yaml
- vaultwarden/kustomization.yaml
- comms/kustomization.yaml
- crypto/kustomization.yaml
- monerod/kustomization.yaml
- pegasus/kustomization.yaml
- pegasus/image-automation.yaml
- bstein-dev-home/kustomization.yaml
- bstein-dev-home/image-automation.yaml
- bstein-dev-home-migrations/kustomization.yaml
- harbor/kustomization.yaml
- harbor/image-automation.yaml
- jellyfin/kustomization.yaml
- xmr-miner/kustomization.yaml
- wallet-monero-temp/kustomization.yaml
- sui-metrics/kustomization.yaml
- openldap/kustomization.yaml
- keycloak/kustomization.yaml
- oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml
- jenkins/kustomization.yaml
- ai-llm/kustomization.yaml
- nextcloud/kustomization.yaml
- nextcloud-mail-sync/kustomization.yaml
- outline/kustomization.yaml
- planka/kustomization.yaml
- finance/kustomization.yaml
- health/kustomization.yaml

View File

@ -1,18 +0,0 @@
# clusters/atlas/flux-system/applications/mailu/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: mailu
namespace: flux-system
spec:
interval: 10m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./services/mailu
targetNamespace: mailu-mailserver
prune: true
wait: true
dependsOn:
- name: helm

View File

@ -1,17 +0,0 @@
# clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: nextcloud-mail-sync
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/nextcloud-mail-sync
targetNamespace: nextcloud
timeout: 2m
dependsOn:
- name: keycloak

View File

@ -1,16 +0,0 @@
# clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: nextcloud
namespace: flux-system
spec:
interval: 10m
path: ./services/nextcloud
targetNamespace: nextcloud
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true

View File

@ -1,15 +0,0 @@
# clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: oauth2-proxy
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/oauth2-proxy
targetNamespace: sso
timeout: 2m

View File

@ -1,28 +0,0 @@
# clusters/atlas/flux-system/applications/outline/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: outline
namespace: flux-system
spec:
interval: 10m
path: ./services/outline
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: outline
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: outline
namespace: outline
- apiVersion: v1
kind: Service
name: outline
namespace: outline
wait: false

View File

@ -1,26 +0,0 @@
# clusters/atlas/flux-system/applications/pegasus/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: pegasus
namespace: jellyfin
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/ci-gitops
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(pegasus): apply image updates"
push:
branch: feature/ci-gitops
update:
strategy: Setters
path: services/pegasus

View File

@ -1,28 +0,0 @@
# clusters/atlas/flux-system/applications/planka/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: planka
namespace: flux-system
spec:
interval: 10m
path: ./services/planka
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: planka
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: planka
namespace: planka
- apiVersion: v1
kind: Service
name: planka
namespace: planka
wait: false

View File

@ -1,19 +0,0 @@
# clusters/atlas/flux-system/applications/sui-metrics/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: sui-metrics
namespace: flux-system
spec:
interval: 10m
path: ./services/sui-metrics/overlays/atlas
prune: true
dependsOn:
- name: monitoring
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
timeout: 5m
targetNamespace: sui-metrics

View File

@ -1,20 +0,0 @@
# clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: vaultwarden
namespace: flux-system
spec:
interval: 10m
suspend: false
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./services/vaultwarden
targetNamespace: vaultwarden
prune: true
wait: true
dependsOn:
- name: helm
- name: traefik

View File

@ -1,19 +0,0 @@
# clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: wallet-monero-temp
namespace: flux-system
spec:
interval: 10m
path: ./services/crypto/wallet-monero-temp
targetNamespace: crypto
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: crypto
- name: xmr-miner
wait: true

View File

@ -1,8 +0,0 @@
# clusters/atlas/flux-system/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gotk-components.yaml
- gotk-sync.yaml
- platform
- applications

View File

@ -1,17 +0,0 @@
# clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cert-manager-cleanup
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/cert-manager/cleanup
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: cert-manager
wait: true

View File

@ -1,19 +0,0 @@
# clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cert-manager
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/cert-manager
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: cert-manager
dependsOn:
- name: helm
wait: true

View File

@ -1,15 +0,0 @@
# clusters/atlas/flux-system/platform/core/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: core
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/core
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: false

View File

@ -1,20 +0,0 @@
# clusters/atlas/flux-system/platform/gitops-ui/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: gitops-ui
namespace: flux-system
spec:
interval: 10m
timeout: 10m
path: ./services/gitops-ui
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: flux-system
dependsOn:
- name: helm
- name: traefik
wait: true

View File

@ -1,20 +0,0 @@
# clusters/atlas/flux-system/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- core/kustomization.yaml
- helm/kustomization.yaml
- cert-manager/kustomization.yaml
- metallb/kustomization.yaml
- traefik/kustomization.yaml
- gitops-ui/kustomization.yaml
- monitoring/kustomization.yaml
- logging/kustomization.yaml
- maintenance/kustomization.yaml
- maintenance/image-automation.yaml
- longhorn-adopt/kustomization.yaml
- longhorn/kustomization.yaml
- longhorn-ui/kustomization.yaml
- postgres/kustomization.yaml
- ../platform/vault-csi/kustomization.yaml
- ../platform/vault-injector/kustomization.yaml

View File

@ -1,14 +0,0 @@
# clusters/atlas/flux-system/platform/logging/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: logging
namespace: flux-system
spec:
interval: 10m
path: ./services/logging
prune: true
sourceRef:
kind: GitRepository
name: flux-system
wait: false

View File

@ -1,17 +0,0 @@
# clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: longhorn-adopt
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/longhorn/adopt
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: longhorn-system
wait: true

View File

@ -1,20 +0,0 @@
# clusters/atlas/flux-system/platform/longhorn/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: longhorn
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/longhorn/core
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: longhorn-system
dependsOn:
- name: helm
- name: longhorn-adopt
wait: false

View File

@ -1,26 +0,0 @@
# clusters/atlas/flux-system/platform/maintenance/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: maintenance
namespace: maintenance
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/ariadne
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(maintenance): automated image update"
push:
branch: feature/ariadne
update:
strategy: Setters
path: services/maintenance

View File

@ -1,15 +0,0 @@
# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: maintenance
namespace: flux-system
spec:
interval: 10m
path: ./services/maintenance
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
wait: false

View File

@ -1,16 +0,0 @@
# clusters/atlas/flux-system/platform/metallb/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: metallb
namespace: flux-system
spec:
interval: 30m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./infrastructure/metallb
prune: true
wait: true
targetNamespace: metallb-system

View File

@ -1,24 +0,0 @@
# clusters/atlas/flux-system/platform/postgres/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: postgres
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/postgres
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: postgres
dependsOn:
- name: vault
- name: vault-csi
healthChecks:
- apiVersion: apps/v1
kind: StatefulSet
name: postgres
namespace: postgres
wait: true

View File

@ -1,16 +0,0 @@
# clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: vault-csi
namespace: flux-system
spec:
interval: 30m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./infrastructure/vault-csi
prune: true
wait: true
targetNamespace: kube-system

View File

@ -1,16 +0,0 @@
# clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: vault-injector
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/vault-injector
targetNamespace: vault
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true

View File

@ -1,4 +0,0 @@
# clusters/oceanus/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@ -1,9 +0,0 @@
# clusters/oceanus/flux-system/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Populate when oceanus cluster is bootstrapped with Flux.
# - gotk-components.yaml
# - gotk-sync.yaml
- ../platform
- ../applications

View File

@ -1,6 +0,0 @@
# clusters/oceanus/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../infrastructure/modules/base
- ../../infrastructure/modules/profiles/oceanus-validator

View File

@ -1,5 +0,0 @@
FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary

View File

@ -1,16 +0,0 @@
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
&& mkdir -p /var/log/data-prepper
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER 10001
WORKDIR /usr/share/data-prepper
CMD ["bin/data-prepper"]

View File

@ -1,9 +0,0 @@
FROM registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/harbor/entrypoint.sh"]

View File

@ -1,9 +0,0 @@
FROM registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/harbor/entrypoint.sh"]

View File

@ -1,9 +0,0 @@
FROM registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/home/harbor/entrypoint.sh"]

View File

@ -1,9 +0,0 @@
FROM registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/home/harbor/start.sh"]

View File

@ -1,10 +0,0 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"]

View File

@ -1,10 +0,0 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"]

View File

@ -1,10 +0,0 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"]

View File

@ -1,34 +0,0 @@
#!/bin/sh
set -eu
if [ -n "${VAULT_ENV_FILE:-}" ]; then
if [ -f "${VAULT_ENV_FILE}" ]; then
# shellcheck disable=SC1090
. "${VAULT_ENV_FILE}"
else
echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2
exit 1
fi
fi
if [ -n "${VAULT_COPY_FILES:-}" ]; then
old_ifs="$IFS"
IFS=','
for pair in ${VAULT_COPY_FILES}; do
src="${pair%%:*}"
dest="${pair#*:}"
if [ -z "${src}" ] || [ -z "${dest}" ]; then
echo "Vault copy entry malformed: ${pair}" >&2
exit 1
fi
if [ ! -f "${src}" ]; then
echo "Vault file not found: ${src}" >&2
exit 1
fi
mkdir -p "$(dirname "${dest}")"
cp "${src}" "${dest}"
done
IFS="$old_ifs"
fi
exec "$@"

View File

@ -1,2 +0,0 @@
# hosts/group_vars/all.yaml
validator_version: latest

View File

@ -1,2 +0,0 @@
# hosts/host_vars/titan-24.yaml
validator_compose_path: /opt/sui-validator

View File

@ -1,28 +0,0 @@
# hosts/inventory/lab.yaml
# Replace ansible_host and ansible_user values with real connectivity details.
all:
children:
atlas:
hosts:
titan-24:
ansible_host: REPLACE_ME
ansible_user: ubuntu
roleset: tethys_hybrid
titan-22:
ansible_host: REPLACE_ME
ansible_user: debian
roleset: minipc_gpu
baremetal:
hosts:
titan-db:
ansible_host: REPLACE_ME
ansible_user: postgres
roleset: database
titan-jh:
ansible_host: REPLACE_ME
ansible_user: jump
roleset: jumphost
oceanus:
ansible_host: REPLACE_ME
ansible_user: validator
roleset: validator

View File

@ -1,29 +0,0 @@
# hosts/playbooks/site.yaml
---
- name: Configure titan-db
hosts: titan-db
gather_facts: true
roles:
- common
- titan_db
- name: Configure titan-jh
hosts: titan-jh
gather_facts: true
roles:
- common
- titan_jh
- name: Configure oceanus validator host
hosts: oceanus
gather_facts: true
roles:
- common
- oceanus_base
- name: Prepare hybrid tethys node
hosts: titan-24
gather_facts: true
roles:
- common
- tethys_canary

View File

@ -1,9 +0,0 @@
# hosts/roles/common/tasks/main.yaml
---
- name: Ensure base packages present
ansible.builtin.package:
name:
- curl
- vim
state: present
tags: ['common', 'packages']

View File

@ -1,6 +0,0 @@
# hosts/roles/oceanus_base/tasks/main.yaml
---
- name: Placeholder for oceanus base configuration
ansible.builtin.debug:
msg: "Install validator prerequisites and monitoring exporters here."
tags: ['oceanus']

View File

@ -1,6 +0,0 @@
# hosts/roles/tethys_canary/tasks/main.yaml
---
- name: Placeholder for SUI validator container runtime setup
ansible.builtin.debug:
msg: "Configure container runtime and validator compose stack here."
tags: ['tethys', 'validator']

View File

@ -1,6 +0,0 @@
# hosts/roles/titan_db/tasks/main.yaml
---
- name: Placeholder for titan-db provisioning
ansible.builtin.debug:
msg: "Install database packages, configure backups, and manage users here."
tags: ['titan_db']

View File

@ -1,19 +0,0 @@
# hosts/roles/titan_jh/tasks/main.yaml
---
- name: Install node exporter
ansible.builtin.package:
name: prometheus-node-exporter
state: present
tags: ['jumphost', 'monitoring']
- name: Enable node exporter
ansible.builtin.service:
name: prometheus-node-exporter
enabled: true
state: started
tags: ['jumphost', 'monitoring']
- name: Placeholder for jumphost hardening
ansible.builtin.debug:
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
tags: ['jumphost']

View File

@ -1,40 +0,0 @@
# infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: cert-manager-cleanup-2
namespace: cert-manager
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: cert-manager-cleanup
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: cleanup
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/usr/bin/env", "bash"]
args: ["/scripts/cert_manager_cleanup.sh"]
volumeMounts:
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: script
configMap:
name: cert-manager-cleanup-script
defaultMode: 0555

View File

@ -1,58 +0,0 @@
# infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: cert-manager-cleanup
namespace: cert-manager
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cert-manager-cleanup
rules:
- apiGroups: [""]
resources:
- pods
- services
- endpoints
- configmaps
- secrets
- serviceaccounts
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["apps"]
resources:
- deployments
- daemonsets
- statefulsets
- replicasets
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["batch"]
resources:
- jobs
- cronjobs
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources:
- roles
- rolebindings
- clusterroles
- clusterrolebindings
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["admissionregistration.k8s.io"]
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
verbs: ["get", "list", "watch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cert-manager-cleanup
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cert-manager-cleanup
subjects:
- kind: ServiceAccount
name: cert-manager-cleanup
namespace: cert-manager

View File

@ -1,15 +0,0 @@
# infrastructure/cert-manager/cleanup/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- cert-manager-cleanup-rbac.yaml
- cert-manager-cleanup-job.yaml
configMapGenerator:
- name: cert-manager-cleanup-script
namespace: cert-manager
files:
- cert_manager_cleanup.sh=scripts/cert_manager_cleanup.sh
options:
disableNameSuffixHash: true

View File

@ -1,5 +0,0 @@
# infrastructure/cert-manager/cleanup/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: cert-manager

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
namespace="cert-manager"
selectors=(
"app.kubernetes.io/name=cert-manager"
"app.kubernetes.io/instance=cert-manager"
"app.kubernetes.io/instance=certmanager-prod"
)
delete_namespaced() {
local selector="$1"
kubectl -n "${namespace}" delete deployment,daemonset,statefulset,replicaset \
--selector "${selector}" --ignore-not-found --wait=false
kubectl -n "${namespace}" delete pod,service,endpoints,serviceaccount,configmap,secret \
--selector "${selector}" --ignore-not-found --wait=false
kubectl -n "${namespace}" delete role,rolebinding \
--selector "${selector}" --ignore-not-found --wait=false
kubectl -n "${namespace}" delete job,cronjob \
--selector "${selector}" --ignore-not-found --wait=false
}
delete_cluster_scoped() {
local selector="$1"
kubectl delete clusterrole,clusterrolebinding \
--selector "${selector}" --ignore-not-found --wait=false
kubectl delete mutatingwebhookconfiguration,validatingwebhookconfiguration \
--selector "${selector}" --ignore-not-found --wait=false
}
for selector in "${selectors[@]}"; do
delete_namespaced "${selector}"
delete_cluster_scoped "${selector}"
done
kubectl delete mutatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false
kubectl delete validatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false

View File

@ -1,67 +0,0 @@
# infrastructure/cert-manager/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: cert-manager
namespace: cert-manager
spec:
interval: 30m
chart:
spec:
chart: cert-manager
version: v1.17.0
sourceRef:
kind: HelmRepository
name: jetstack
namespace: flux-system
install:
crds: CreateReplace
remediation: { retries: 3 }
timeout: 10m
upgrade:
crds: CreateReplace
remediation:
retries: 3
remediateLastFailure: true
cleanupOnFail: true
timeout: 10m
values:
installCRDs: true
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4
webhook:
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4
cainjector:
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4

View File

@ -1,6 +0,0 @@
# infrastructure/cert-manager/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -1,5 +0,0 @@
# infrastructure/cert-manager/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: cert-manager

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/kustomization.yaml # infrastructure/core/base/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/priorityclass/kustomization.yaml # infrastructure/core/base/priorityclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/priorityclass/scavenger.yaml # infrastructure/core/base/priorityclass/scavenger.yaml
apiVersion: scheduling.k8s.io/v1 apiVersion: scheduling.k8s.io/v1
kind: PriorityClass kind: PriorityClass
metadata: metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/runtimeclass/kustomization.yaml # infrastructure/core/base/storageclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/runtimeclass/runtimeclass.yaml # services/jellyfin/runtimeclass.yaml
apiVersion: node.k8s.io/v1 apiVersion: node.k8s.io/v1
kind: RuntimeClass kind: RuntimeClass
metadata: metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/storageclass/asteria.yaml # infrastructure/core/base/storageclass/asteria.yaml
apiVersion: storage.k8s.io/v1 apiVersion: storage.k8s.io/v1
kind: StorageClass kind: StorageClass
metadata: metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/base/storageclass/astreae.yaml # infrastructure/core/base/storageclass/astreae.yaml
apiVersion: storage.k8s.io/v1 apiVersion: storage.k8s.io/v1
kind: StorageClass kind: StorageClass
metadata: metadata:

View File

@ -1,7 +1,6 @@
# infrastructure/modules/base/storageclass/kustomization.yaml # infrastructure/core/base/storageclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- asteria.yaml - asteria.yaml
- asteria-encrypted.yaml
- astreae.yaml - astreae.yaml

View File

@ -1,47 +0,0 @@
# infrastructure/core/coredns-custom.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns-custom
namespace: kube-system
data:
bstein-dev.server: |
bstein.dev:53 {
errors
cache 30
hosts {
192.168.22.9 alerts.bstein.dev
192.168.22.9 auth.bstein.dev
192.168.22.9 bstein.dev
10.43.6.87 budget.bstein.dev
192.168.22.9 call.live.bstein.dev
192.168.22.9 cd.bstein.dev
192.168.22.9 chat.ai.bstein.dev
192.168.22.9 ci.bstein.dev
192.168.22.9 cloud.bstein.dev
192.168.22.9 health.bstein.dev
192.168.22.9 kit.live.bstein.dev
192.168.22.9 live.bstein.dev
192.168.22.9 logs.bstein.dev
192.168.22.9 longhorn.bstein.dev
192.168.22.4 mail.bstein.dev
192.168.22.9 matrix.live.bstein.dev
192.168.22.9 metrics.bstein.dev
192.168.22.9 monero.bstein.dev
10.43.6.87 money.bstein.dev
192.168.22.9 notes.bstein.dev
192.168.22.9 office.bstein.dev
192.168.22.9 pegasus.bstein.dev
3.136.224.193 pm-bounces.bstein.dev
3.150.68.49 pm-bounces.bstein.dev
18.189.137.81 pm-bounces.bstein.dev
192.168.22.9 registry.bstein.dev
192.168.22.9 scm.bstein.dev
192.168.22.9 secret.bstein.dev
192.168.22.9 sso.bstein.dev
192.168.22.9 stream.bstein.dev
192.168.22.9 tasks.bstein.dev
192.168.22.9 vault.bstein.dev
fallthrough
}
}

View File

@ -1,141 +0,0 @@
# infrastructure/core/coredns-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/name: CoreDNS
spec:
progressDeadlineSeconds: 600
replicas: 2
revisionHistoryLimit: 0
selector:
matchLabels:
k8s-app: kube-dns
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 25%
maxUnavailable: 1
template:
metadata:
labels:
k8s-app: kube-dns
spec:
containers:
- name: coredns
image: registry.bstein.dev/infra/coredns:1.12.1
imagePullPolicy: IfNotPresent
args:
- -conf
- /etc/coredns/Corefile
ports:
- containerPort: 53
name: dns
protocol: UDP
- containerPort: 53
name: dns-tcp
protocol: TCP
- containerPort: 9153
name: metrics
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: 8080
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 8181
scheme: HTTP
periodSeconds: 2
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
resources:
limits:
memory: 170Mi
requests:
cpu: 100m
memory: 70Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- NET_BIND_SERVICE
drop:
- all
readOnlyRootFilesystem: true
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
readOnly: true
- name: custom-config-volume
mountPath: /etc/coredns/custom
readOnly: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4
- key: node-role.kubernetes.io/worker
operator: In
values:
- "true"
dnsPolicy: Default
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
restartPolicy: Always
schedulerName: default-scheduler
serviceAccountName: coredns
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
k8s-app: kube-dns
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
k8s-app: kube-dns
volumes:
- name: config-volume
configMap:
name: coredns
defaultMode: 420
items:
- key: Corefile
path: Corefile
- key: NodeHosts
path: NodeHosts
- name: custom-config-volume
configMap:
name: coredns-custom
optional: true
defaultMode: 420

View File

@ -1,4 +1,4 @@
# infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml # infrastructure/core/gpu/daemonsets/device-plugin-jetson/daemonset.yaml
apiVersion: apps/v1 apiVersion: apps/v1
kind: DaemonSet kind: DaemonSet
metadata: metadata:
@ -30,8 +30,7 @@ spec:
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
args: args:
- "--fail-on-init-error=false" - "--fail-on-init-error=false"
- "--device-list-strategy=envvar" - "--device-list-strategy=envvar,cdi"
- "--config-file=/config/config.yaml"
securityContext: securityContext:
privileged: true privileged: true
env: env:
@ -42,12 +41,7 @@ spec:
volumeMounts: volumeMounts:
- name: device-plugin - name: device-plugin
mountPath: /var/lib/kubelet/device-plugins mountPath: /var/lib/kubelet/device-plugins
- name: config
mountPath: /config
volumes: volumes:
- name: device-plugin - name: device-plugin
hostPath: hostPath:
path: /var/lib/kubelet/device-plugins path: /var/lib/kubelet/device-plugins
- name: config
configMap:
name: nvidia-device-plugin-config

View File

@ -1,4 +1,4 @@
# infrastructure/modules/profiles/components/device-plugin-jetson/kustomization.yaml # infrastructure/core/gpu/daemonsets/device-plugin-jetson/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml # infrastructure/core/gpu/daemonsets/device-plugin-minipc/daemonset.yaml
apiVersion: apps/v1 apiVersion: apps/v1
kind: DaemonSet kind: DaemonSet
metadata: metadata:
@ -24,6 +24,7 @@ spec:
tolerations: tolerations:
- operator: Exists - operator: Exists
priorityClassName: system-node-critical priorityClassName: system-node-critical
runtimeClassName: nvidia
containers: containers:
- name: nvidia-device-plugin-ctr - name: nvidia-device-plugin-ctr
image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2 image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2
@ -32,7 +33,6 @@ spec:
- "--fail-on-init-error=false" - "--fail-on-init-error=false"
- "--device-list-strategy=envvar" - "--device-list-strategy=envvar"
- "--mig-strategy=none" - "--mig-strategy=none"
- "--config-file=/config/config.yaml"
securityContext: securityContext:
privileged: true privileged: true
env: env:
@ -43,12 +43,7 @@ spec:
volumeMounts: volumeMounts:
- name: device-plugin - name: device-plugin
mountPath: /var/lib/kubelet/device-plugins mountPath: /var/lib/kubelet/device-plugins
- name: config
mountPath: /config
volumes: volumes:
- name: device-plugin - name: device-plugin
hostPath: hostPath:
path: /var/lib/kubelet/device-plugins path: /var/lib/kubelet/device-plugins
- name: config
configMap:
name: nvidia-device-plugin-config

View File

@ -1,4 +1,4 @@
# infrastructure/modules/profiles/components/device-plugin-minipc/kustomization.yaml # infrastructure/core/gpu/daemonsets/device-plugin-minipc/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:

View File

@ -1,4 +1,4 @@
# infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml # infrastructure/core/gpu/daemonsets/device-plugin-tethys/daemonset.yaml
apiVersion: apps/v1 apiVersion: apps/v1
kind: DaemonSet kind: DaemonSet
metadata: metadata:
@ -33,7 +33,6 @@ spec:
- "--fail-on-init-error=false" - "--fail-on-init-error=false"
- "--device-list-strategy=envvar" - "--device-list-strategy=envvar"
- "--mig-strategy=none" - "--mig-strategy=none"
- "--config-file=/config/config.yaml"
securityContext: securityContext:
privileged: true privileged: true
env: env:
@ -44,12 +43,7 @@ spec:
volumeMounts: volumeMounts:
- name: device-plugin - name: device-plugin
mountPath: /var/lib/kubelet/device-plugins mountPath: /var/lib/kubelet/device-plugins
- name: config
mountPath: /config
volumes: volumes:
- name: device-plugin - name: device-plugin
hostPath: hostPath:
path: /var/lib/kubelet/device-plugins path: /var/lib/kubelet/device-plugins
- name: config
configMap:
name: nvidia-device-plugin-config

View File

@ -1,4 +1,4 @@
# infrastructure/modules/profiles/components/device-plugin-tethys/kustomization.yaml # infrastructure/core/gpu/daemonsets/device-plugin-tethys/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:

View File

@ -0,0 +1,6 @@
# infrastructure/core/gpu/daemonsets/profiles/jetson-and-tethys/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-jetson
- ../../device-plugin-tethys

View File

@ -0,0 +1,5 @@
# infrastructure/core/gpu/daemonsets/profiles/jetson-only/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-jetson

View File

@ -0,0 +1,7 @@
# infrastructure/core/gpu/daemonsets/profiles/minipc-and-jetson-and-tethys/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-minipc
- ../../device-plugin-tethys
- ../../device-plugin-jetson

View File

@ -0,0 +1,6 @@
# infrastructure/core/gpu/daemonsets/profiles/minipc-and-jetson/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-minipc
- ../../device-plugin-jetson

View File

@ -0,0 +1,6 @@
# infrastructure/core/gpu/daemonsets/profiles/minipc-and-tethys/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-minipc
- ../../device-plugin-tethys

View File

@ -1,6 +1,5 @@
# infrastructure/vault-csi/kustomization.yaml # infrastructure/core/gpu/daemonsets/profiles/minipc-only/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- secrets-store-csi-driver.yaml - ../../device-plugin-minipc
- vault-csi-provider.yaml

View File

@ -0,0 +1,5 @@
# infrastructure/core/gpu/daemonsets/profiles/tethys-only/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-tethys

View File

@ -0,0 +1,5 @@
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: nvidia
handler: nvidia

View File

@ -2,10 +2,8 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- ../modules/base - base
- ../modules/profiles/atlas-ha # - gpu/profiles/jetson-only
- coredns-custom.yaml # - gpu/profiles/minipc-and-jetson
- coredns-deployment.yaml # - gpu/profiles/minipc-only
- ntp-sync-daemonset.yaml - gpu/profiles/tethys-only
- ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -1,50 +0,0 @@
# infrastructure/core/ntp-sync-daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: ntp-sync
namespace: kube-system
labels:
app: ntp-sync
spec:
selector:
matchLabels:
app: ntp-sync
template:
metadata:
labels:
app: ntp-sync
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: DoesNotExist
- key: node-role.kubernetes.io/master
operator: DoesNotExist
containers:
- name: ntp-sync
image: public.ecr.aws/docker/library/busybox:1.36.1
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
while true; do
ntpd -q -p pool.ntp.org || true
sleep 300
done
securityContext:
capabilities:
add: ["SYS_TIME"]
runAsUser: 0
runAsGroup: 0
resources:
requests:
cpu: 10m
memory: 16Mi
limits:
cpu: 50m
memory: 64Mi

Some files were not shown because too many files have changed in this diff Show More