veles: stage atlas infrastructure
This commit is contained in:
parent
e1d091eb14
commit
654900b8a2
@ -28,6 +28,7 @@ resources:
|
|||||||
- ai-llm/kustomization.yaml
|
- ai-llm/kustomization.yaml
|
||||||
- openclaw/kustomization.yaml
|
- openclaw/kustomization.yaml
|
||||||
- game-stream/kustomization.yaml
|
- game-stream/kustomization.yaml
|
||||||
|
- veles/kustomization.yaml
|
||||||
- typhon/kustomization.yaml
|
- typhon/kustomization.yaml
|
||||||
- nextcloud/kustomization.yaml
|
- nextcloud/kustomization.yaml
|
||||||
- nextcloud-mail-sync/kustomization.yaml
|
- nextcloud-mail-sync/kustomization.yaml
|
||||||
|
|||||||
@ -0,0 +1,29 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/veles/image-automation.yaml
|
||||||
|
# Staged for the first Veles image rollout. Add this file to the parent
|
||||||
|
# applications kustomization after the namespace exists and the Harbor repos
|
||||||
|
# have initial tags.
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageUpdateAutomation
|
||||||
|
metadata:
|
||||||
|
name: veles
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
interval: 1m0s
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
git:
|
||||||
|
checkout:
|
||||||
|
ref:
|
||||||
|
branch: main
|
||||||
|
commit:
|
||||||
|
author:
|
||||||
|
email: ops@bstein.dev
|
||||||
|
name: flux-bot
|
||||||
|
messageTemplate: "chore(veles): automated image update"
|
||||||
|
push:
|
||||||
|
branch: main
|
||||||
|
update:
|
||||||
|
strategy: Setters
|
||||||
|
path: services/veles
|
||||||
@ -0,0 +1,28 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/veles/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: veles
|
||||||
|
namespace: flux-system
|
||||||
|
annotations:
|
||||||
|
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/veles
|
||||||
|
targetNamespace: veles
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
dependsOn:
|
||||||
|
- name: cert-manager
|
||||||
|
- name: core
|
||||||
|
- name: keycloak
|
||||||
|
- name: longhorn
|
||||||
|
- name: traefik
|
||||||
|
- name: vault
|
||||||
|
- name: vault-csi
|
||||||
|
- name: vault-injector
|
||||||
|
wait: false
|
||||||
|
timeout: 20m
|
||||||
@ -55,6 +55,20 @@ spec:
|
|||||||
k label node titan-22 atlas.bstein.dev/general-compute=last-resort --overwrite=true || true
|
k label node titan-22 atlas.bstein.dev/general-compute=last-resort --overwrite=true || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if k get node titan-23 >/dev/null 2>&1; then
|
||||||
|
k label node titan-23 \
|
||||||
|
veles.bstein.dev/simulation=true \
|
||||||
|
veles.bstein.dev/node-pool=oceanus \
|
||||||
|
node-role.kubernetes.io/veles-sim=true \
|
||||||
|
longhorn-host=true \
|
||||||
|
hardware=oceanus \
|
||||||
|
--overwrite=true || true
|
||||||
|
k label node titan-23 node-role.kubernetes.io/worker- || true
|
||||||
|
k taint node titan-23 veles.bstein.dev/simulation=true:NoSchedule --overwrite=true || true
|
||||||
|
else
|
||||||
|
echo "skipping missing node titan-23"
|
||||||
|
fi
|
||||||
|
|
||||||
for node in titan-13 titan-15 titan-17 titan-19; do
|
for node in titan-13 titan-15 titan-17 titan-19; do
|
||||||
if k get node "${node}" >/dev/null 2>&1; then
|
if k get node "${node}" >/dev/null 2>&1; then
|
||||||
k label node "${node}" atlas.bstein.dev/spillover=true longhorn-host=true --overwrite=true || true
|
k label node "${node}" atlas.bstein.dev/spillover=true longhorn-host=true --overwrite=true || true
|
||||||
|
|||||||
@ -81,7 +81,13 @@ spec:
|
|||||||
tag: v2.16.0
|
tag: v2.16.0
|
||||||
defaultSettings:
|
defaultSettings:
|
||||||
systemManagedPodsImagePullPolicy: Always
|
systemManagedPodsImagePullPolicy: Always
|
||||||
|
taintToleration: veles.bstein.dev/simulation=true:NoSchedule
|
||||||
longhornManager:
|
longhornManager:
|
||||||
|
tolerations:
|
||||||
|
- key: veles.bstein.dev/simulation
|
||||||
|
operator: Equal
|
||||||
|
value: "true"
|
||||||
|
effect: NoSchedule
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
longhorn-host: "true"
|
longhorn-host: "true"
|
||||||
longhornDriver:
|
longhornDriver:
|
||||||
|
|||||||
@ -7,6 +7,7 @@ resources:
|
|||||||
- secretproviderclass.yaml
|
- secretproviderclass.yaml
|
||||||
- vault-sync-deployment.yaml
|
- vault-sync-deployment.yaml
|
||||||
- helmrelease.yaml
|
- helmrelease.yaml
|
||||||
|
- veles-recurring-jobs.yaml
|
||||||
- longhorn-settings-ensure-job.yaml
|
- longhorn-settings-ensure-job.yaml
|
||||||
- longhorn-disk-tags-ensure-job.yaml
|
- longhorn-disk-tags-ensure-job.yaml
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
name: longhorn-disk-tags-ensure-1
|
name: longhorn-disk-tags-ensure-3
|
||||||
namespace: longhorn-system
|
namespace: longhorn-system
|
||||||
spec:
|
spec:
|
||||||
backoffLimit: 0
|
backoffLimit: 0
|
||||||
|
|||||||
@ -17,10 +17,28 @@ import urllib.request
|
|||||||
|
|
||||||
LONGHORN_NS = "longhorn-system"
|
LONGHORN_NS = "longhorn-system"
|
||||||
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
|
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
|
||||||
DESIRED_TAGS = {
|
DESIRED_DISK_TAGS = {
|
||||||
"/mnt/astreae": "astreae",
|
"/mnt/astreae": ["astreae"],
|
||||||
"/mnt/asteria": "asteria",
|
"/mnt/asteria": ["asteria"],
|
||||||
|
"/mnt/veles": ["veles-oceanus", "veles-db", "veles-artifacts"],
|
||||||
|
"/mnt/veles-db": ["veles-oceanus", "veles-db"],
|
||||||
|
"/mnt/veles-artifacts": ["veles-oceanus", "veles-artifacts"],
|
||||||
}
|
}
|
||||||
|
DESIRED_NODE_TAGS = {
|
||||||
|
"titan-23": ["veles-oceanus"],
|
||||||
|
}
|
||||||
|
DESIRED_NODE_DISKS = {
|
||||||
|
"titan-23": {
|
||||||
|
"veles-oceanus": {
|
||||||
|
"path": "/mnt/veles",
|
||||||
|
"allowScheduling": True,
|
||||||
|
"evictionRequested": False,
|
||||||
|
"storageReserved": 0,
|
||||||
|
"tags": ["veles-oceanus", "veles-db", "veles-artifacts"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DISABLE_DEFAULT_DISK_NODES = {"titan-23"}
|
||||||
|
|
||||||
|
|
||||||
def api_base() -> str:
|
def api_base() -> str:
|
||||||
@ -63,8 +81,30 @@ def list_nodes() -> list[dict]:
|
|||||||
return data.get("items", [])
|
return data.get("items", [])
|
||||||
|
|
||||||
|
|
||||||
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
|
def merged_tags(current_tags: list[str], desired_tags: list[str]) -> list[str]:
|
||||||
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
|
return sorted(dict.fromkeys([*current_tags, *desired_tags]))
|
||||||
|
|
||||||
|
|
||||||
|
def patch_node_tags(node_name: str, desired_tags: list[str]) -> None:
|
||||||
|
body = {"spec": {"tags": desired_tags}}
|
||||||
|
request_json(
|
||||||
|
"PATCH",
|
||||||
|
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
|
||||||
|
body=body,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def patch_disk_tags(node_name: str, disk_name: str, desired_tags: list[str]) -> None:
|
||||||
|
body = {"spec": {"disks": {disk_name: {"tags": desired_tags}}}}
|
||||||
|
request_json(
|
||||||
|
"PATCH",
|
||||||
|
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
|
||||||
|
body=body,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def patch_disks(node_name: str, disks: dict) -> None:
|
||||||
|
body = {"spec": {"disks": disks}}
|
||||||
request_json(
|
request_json(
|
||||||
"PATCH",
|
"PATCH",
|
||||||
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
|
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
|
||||||
@ -78,18 +118,52 @@ def main() -> int:
|
|||||||
|
|
||||||
for node in list_nodes():
|
for node in list_nodes():
|
||||||
name = node.get("metadata", {}).get("name", "")
|
name = node.get("metadata", {}).get("name", "")
|
||||||
|
desired_node_tags = DESIRED_NODE_TAGS.get(name)
|
||||||
|
if desired_node_tags:
|
||||||
|
current_node_tags = node.get("spec", {}).get("tags") or []
|
||||||
|
next_node_tags = merged_tags(current_node_tags, desired_node_tags)
|
||||||
|
if current_node_tags != next_node_tags:
|
||||||
|
print(f"patching {name} node tags={current_node_tags!r} -> {next_node_tags!r}")
|
||||||
|
patch_node_tags(name, next_node_tags)
|
||||||
|
changed += 1
|
||||||
|
else:
|
||||||
|
skipped += 1
|
||||||
|
|
||||||
spec_disks = node.get("spec", {}).get("disks", {}) or {}
|
spec_disks = node.get("spec", {}).get("disks", {}) or {}
|
||||||
|
desired_disks = DESIRED_NODE_DISKS.get(name, {})
|
||||||
|
missing_disks = {
|
||||||
|
disk_name: disk_spec
|
||||||
|
for disk_name, disk_spec in desired_disks.items()
|
||||||
|
if disk_name not in spec_disks
|
||||||
|
}
|
||||||
|
if missing_disks:
|
||||||
|
print(f"adding {name} disks={sorted(missing_disks)}")
|
||||||
|
patch_disks(name, missing_disks)
|
||||||
|
changed += len(missing_disks)
|
||||||
|
spec_disks = {**spec_disks, **missing_disks}
|
||||||
|
|
||||||
|
if name in DISABLE_DEFAULT_DISK_NODES:
|
||||||
|
disable_patch = {}
|
||||||
|
for disk_name, disk in spec_disks.items():
|
||||||
|
disk_path = (disk.get("path") or "").rstrip("/")
|
||||||
|
if disk_path == "/var/lib/longhorn" and disk.get("allowScheduling", True):
|
||||||
|
disable_patch[disk_name] = {"allowScheduling": False}
|
||||||
|
if disable_patch:
|
||||||
|
print(f"disabling default Longhorn scheduling on {name} disks={sorted(disable_patch)}")
|
||||||
|
patch_disks(name, disable_patch)
|
||||||
|
changed += len(disable_patch)
|
||||||
|
|
||||||
for disk_name, disk in spec_disks.items():
|
for disk_name, disk in spec_disks.items():
|
||||||
disk_path = disk.get("path")
|
disk_path = disk.get("path")
|
||||||
desired_tag = DESIRED_TAGS.get(disk_path)
|
desired_disk_tags = DESIRED_DISK_TAGS.get(disk_path)
|
||||||
if not desired_tag:
|
if not desired_disk_tags:
|
||||||
continue
|
continue
|
||||||
current_tags = disk.get("tags") or []
|
current_tags = disk.get("tags") or []
|
||||||
if current_tags == [desired_tag]:
|
if current_tags == desired_disk_tags:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
|
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {desired_disk_tags!r}")
|
||||||
patch_disk_tags(name, disk_name, desired_tag)
|
patch_disk_tags(name, disk_name, desired_disk_tags)
|
||||||
changed += 1
|
changed += 1
|
||||||
|
|
||||||
print(f"done: changed={changed} skipped={skipped}")
|
print(f"done: changed={changed} skipped={skipped}")
|
||||||
|
|||||||
28
infrastructure/longhorn/core/veles-recurring-jobs.yaml
Normal file
28
infrastructure/longhorn/core/veles-recurring-jobs.yaml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# infrastructure/longhorn/core/veles-recurring-jobs.yaml
|
||||||
|
apiVersion: longhorn.io/v1beta2
|
||||||
|
kind: RecurringJob
|
||||||
|
metadata:
|
||||||
|
name: veles-postgres-backup
|
||||||
|
namespace: longhorn-system
|
||||||
|
spec:
|
||||||
|
cron: "30 5 * * *"
|
||||||
|
task: backup
|
||||||
|
groups:
|
||||||
|
- veles
|
||||||
|
- veles-postgres
|
||||||
|
retain: 7
|
||||||
|
concurrency: 1
|
||||||
|
---
|
||||||
|
apiVersion: longhorn.io/v1beta2
|
||||||
|
kind: RecurringJob
|
||||||
|
metadata:
|
||||||
|
name: veles-postgres-snapshot
|
||||||
|
namespace: longhorn-system
|
||||||
|
spec:
|
||||||
|
cron: "*/30 * * * *"
|
||||||
|
task: snapshot
|
||||||
|
groups:
|
||||||
|
- veles
|
||||||
|
- veles-postgres
|
||||||
|
retain: 8
|
||||||
|
concurrency: 1
|
||||||
@ -3,3 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
|||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
- scavenger.yaml
|
- scavenger.yaml
|
||||||
|
- veles.yaml
|
||||||
|
|||||||
17
infrastructure/modules/base/priorityclass/veles.yaml
Normal file
17
infrastructure/modules/base/priorityclass/veles.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# infrastructure/modules/base/priorityclass/veles.yaml
|
||||||
|
apiVersion: scheduling.k8s.io/v1
|
||||||
|
kind: PriorityClass
|
||||||
|
metadata:
|
||||||
|
name: veles-core
|
||||||
|
value: 500
|
||||||
|
globalDefault: false
|
||||||
|
description: "For Veles core database, API, and controller workloads"
|
||||||
|
---
|
||||||
|
apiVersion: scheduling.k8s.io/v1
|
||||||
|
kind: PriorityClass
|
||||||
|
metadata:
|
||||||
|
name: veles-sim
|
||||||
|
value: 50
|
||||||
|
globalDefault: false
|
||||||
|
preemptionPolicy: Never
|
||||||
|
description: "For Veles simulation jobs; lower than core and non-preempting"
|
||||||
@ -5,3 +5,6 @@ resources:
|
|||||||
- asteria.yaml
|
- asteria.yaml
|
||||||
- asteria-encrypted.yaml
|
- asteria-encrypted.yaml
|
||||||
- astreae.yaml
|
- astreae.yaml
|
||||||
|
- veles-oceanus-db.yaml
|
||||||
|
- veles-oceanus-artifacts.yaml
|
||||||
|
- veles-oceanus-policy.yaml
|
||||||
|
|||||||
@ -0,0 +1,20 @@
|
|||||||
|
# infrastructure/modules/base/storageclass/veles-oceanus-artifacts.yaml
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: veles-oceanus-artifacts
|
||||||
|
annotations:
|
||||||
|
veles.bstein.dev/allowed-namespace: veles
|
||||||
|
provisioner: driver.longhorn.io
|
||||||
|
parameters:
|
||||||
|
nodeSelector: veles-oceanus
|
||||||
|
diskSelector: veles-oceanus,veles-artifacts
|
||||||
|
fromBackup: ""
|
||||||
|
numberOfReplicas: "1"
|
||||||
|
staleReplicaTimeout: "30"
|
||||||
|
fsType: ext4
|
||||||
|
replicaAutoBalance: disabled
|
||||||
|
dataLocality: strict-local
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
allowVolumeExpansion: true
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
@ -0,0 +1,21 @@
|
|||||||
|
# infrastructure/modules/base/storageclass/veles-oceanus-db.yaml
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: veles-oceanus-db
|
||||||
|
annotations:
|
||||||
|
veles.bstein.dev/allowed-namespace: veles
|
||||||
|
provisioner: driver.longhorn.io
|
||||||
|
parameters:
|
||||||
|
nodeSelector: veles-oceanus
|
||||||
|
diskSelector: veles-oceanus,veles-db
|
||||||
|
fromBackup: ""
|
||||||
|
numberOfReplicas: "1"
|
||||||
|
staleReplicaTimeout: "30"
|
||||||
|
fsType: ext4
|
||||||
|
replicaAutoBalance: disabled
|
||||||
|
dataLocality: strict-local
|
||||||
|
recurringJobSelector: '[{"name":"veles-postgres-backup","isGroup":false},{"name":"veles-postgres-snapshot","isGroup":false}]'
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
allowVolumeExpansion: true
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
@ -0,0 +1,25 @@
|
|||||||
|
# infrastructure/modules/base/storageclass/veles-oceanus-policy.yaml
|
||||||
|
apiVersion: admissionregistration.k8s.io/v1
|
||||||
|
kind: ValidatingAdmissionPolicy
|
||||||
|
metadata:
|
||||||
|
name: veles-oceanus-storage-namespace
|
||||||
|
spec:
|
||||||
|
failurePolicy: Fail
|
||||||
|
matchConstraints:
|
||||||
|
resourceRules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
apiVersions: ["v1"]
|
||||||
|
operations: ["CREATE", "UPDATE"]
|
||||||
|
resources: ["persistentvolumeclaims"]
|
||||||
|
validations:
|
||||||
|
- expression: "!has(object.spec.storageClassName) || !(object.spec.storageClassName in ['veles-oceanus-db', 'veles-oceanus-artifacts']) || object.metadata.namespace == 'veles'"
|
||||||
|
message: "Veles Oceanus storage classes are reserved for namespace veles"
|
||||||
|
---
|
||||||
|
apiVersion: admissionregistration.k8s.io/v1
|
||||||
|
kind: ValidatingAdmissionPolicyBinding
|
||||||
|
metadata:
|
||||||
|
name: veles-oceanus-storage-namespace
|
||||||
|
spec:
|
||||||
|
policyName: veles-oceanus-storage-namespace
|
||||||
|
validationActions:
|
||||||
|
- Deny
|
||||||
@ -429,6 +429,24 @@ data:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pipelineJob('veles') {
|
||||||
|
disabled(true)
|
||||||
|
description('Staged Veles alpha image pipeline. Backend/frontend should build linux/amd64 and linux/arm64; sim-worker may begin amd64-only if Forge dependencies require it.')
|
||||||
|
definition {
|
||||||
|
cpsScm {
|
||||||
|
scm {
|
||||||
|
git {
|
||||||
|
remote {
|
||||||
|
url('https://scm.bstein.dev/bstein/veles.git')
|
||||||
|
credentials('gitea-pat')
|
||||||
|
}
|
||||||
|
branches('*/main')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scriptPath('Jenkinsfile')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
multibranchPipelineJob('titan-iac-quality-gate') {
|
multibranchPipelineJob('titan-iac-quality-gate') {
|
||||||
branchSources {
|
branchSources {
|
||||||
branchSource {
|
branchSource {
|
||||||
|
|||||||
@ -27,6 +27,7 @@ resources:
|
|||||||
- oneoffs/soteria-oidc-secret-ensure-job.yaml
|
- oneoffs/soteria-oidc-secret-ensure-job.yaml
|
||||||
- oneoffs/quality-oidc-secret-ensure-job.yaml
|
- oneoffs/quality-oidc-secret-ensure-job.yaml
|
||||||
- oneoffs/agent-oidc-secret-ensure-job.yaml
|
- oneoffs/agent-oidc-secret-ensure-job.yaml
|
||||||
|
- oneoffs/veles-realm-ensure-job.yaml
|
||||||
- oneoffs/metis-ssh-keys-secret-ensure-job.yaml
|
- oneoffs/metis-ssh-keys-secret-ensure-job.yaml
|
||||||
- oneoffs/metis-node-passwords-secret-ensure-job.yaml
|
- oneoffs/metis-node-passwords-secret-ensure-job.yaml
|
||||||
- oneoffs/harbor-oidc-secret-ensure-job.yaml
|
- oneoffs/harbor-oidc-secret-ensure-job.yaml
|
||||||
|
|||||||
332
services/keycloak/oneoffs/veles-realm-ensure-job.yaml
Normal file
332
services/keycloak/oneoffs/veles-realm-ensure-job.yaml
Normal file
@ -0,0 +1,332 @@
|
|||||||
|
# services/keycloak/oneoffs/veles-realm-ensure-job.yaml
|
||||||
|
# One-off job for sso/veles-realm-ensure-1.
|
||||||
|
# Purpose: create the Veles realm, groups, OIDC client, SMTP settings, and Vault client secret.
|
||||||
|
# Keep suspended until Veles Vault paths/policies have reconciled, then unsuspend once.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: veles-realm-ensure-1
|
||||||
|
namespace: sso
|
||||||
|
spec:
|
||||||
|
suspend: true
|
||||||
|
backoffLimit: 0
|
||||||
|
ttlSecondsAfterFinished: 3600
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
|
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||||
|
vault.hashicorp.com/role: "sso-secrets"
|
||||||
|
vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin"
|
||||||
|
vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: |
|
||||||
|
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
|
||||||
|
export KEYCLOAK_ADMIN="{{ .Data.data.username }}"
|
||||||
|
export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}"
|
||||||
|
export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}"
|
||||||
|
{{ end }}
|
||||||
|
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
|
||||||
|
export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}"
|
||||||
|
export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}"
|
||||||
|
{{ end }}
|
||||||
|
spec:
|
||||||
|
serviceAccountName: mas-secrets-ensure
|
||||||
|
restartPolicy: Never
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: node-role.kubernetes.io/worker
|
||||||
|
operator: Exists
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 100
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: kubernetes.io/arch
|
||||||
|
operator: In
|
||||||
|
values: ["arm64"]
|
||||||
|
containers:
|
||||||
|
- name: configure
|
||||||
|
image: python:3.11-alpine
|
||||||
|
env:
|
||||||
|
- name: KEYCLOAK_SERVER
|
||||||
|
value: http://keycloak.sso.svc.cluster.local
|
||||||
|
- name: KEYCLOAK_REALM
|
||||||
|
value: veles
|
||||||
|
- name: KEYCLOAK_CLIENT_ID
|
||||||
|
value: veles-web
|
||||||
|
- name: KEYCLOAK_PUBLIC_ISSUER
|
||||||
|
value: https://sso.bstein.dev/realms/veles
|
||||||
|
- name: VELES_BASE_URL
|
||||||
|
value: https://veles.bstein.dev
|
||||||
|
- name: KEYCLOAK_SMTP_HOST
|
||||||
|
value: mail.bstein.dev
|
||||||
|
- name: KEYCLOAK_SMTP_PORT
|
||||||
|
value: "587"
|
||||||
|
- name: KEYCLOAK_SMTP_FROM
|
||||||
|
value: no-reply-veles@bstein.dev
|
||||||
|
- name: KEYCLOAK_SMTP_FROM_NAME
|
||||||
|
value: Veles
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -eu
|
||||||
|
. /vault/secrets/keycloak-admin-env.sh
|
||||||
|
python - <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/")
|
||||||
|
realm = os.environ["KEYCLOAK_REALM"]
|
||||||
|
client_id = os.environ["KEYCLOAK_CLIENT_ID"]
|
||||||
|
issuer = os.environ["KEYCLOAK_PUBLIC_ISSUER"]
|
||||||
|
veles_base_url = os.environ["VELES_BASE_URL"].rstrip("/")
|
||||||
|
admin_user = os.environ["KEYCLOAK_ADMIN_USER"]
|
||||||
|
admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"]
|
||||||
|
|
||||||
|
def request(method, url, token=None, payload=None, headers=None, timeout=30):
|
||||||
|
data = None
|
||||||
|
req_headers = headers.copy() if headers else {}
|
||||||
|
if token:
|
||||||
|
req_headers["Authorization"] = f"Bearer {token}"
|
||||||
|
if payload is not None:
|
||||||
|
data = json.dumps(payload).encode()
|
||||||
|
req_headers["Content-Type"] = "application/json"
|
||||||
|
req = urllib.request.Request(url, data=data, headers=req_headers, method=method)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
body = resp.read()
|
||||||
|
if not body:
|
||||||
|
return resp.status, None
|
||||||
|
return resp.status, json.loads(body.decode())
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
raw = exc.read()
|
||||||
|
if not raw:
|
||||||
|
return exc.code, None
|
||||||
|
try:
|
||||||
|
return exc.code, json.loads(raw.decode())
|
||||||
|
except Exception:
|
||||||
|
return exc.code, {"raw": raw.decode(errors="replace")}
|
||||||
|
|
||||||
|
token_body = None
|
||||||
|
form = urllib.parse.urlencode(
|
||||||
|
{
|
||||||
|
"grant_type": "password",
|
||||||
|
"client_id": "admin-cli",
|
||||||
|
"username": admin_user,
|
||||||
|
"password": admin_password,
|
||||||
|
}
|
||||||
|
).encode()
|
||||||
|
for attempt in range(1, 11):
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{base_url}/realms/master/protocol/openid-connect/token",
|
||||||
|
data=form,
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
token_body = json.loads(resp.read().decode())
|
||||||
|
break
|
||||||
|
except urllib.error.URLError as exc:
|
||||||
|
if attempt == 10:
|
||||||
|
raise SystemExit(f"Keycloak token request failed after retries: {exc}")
|
||||||
|
time.sleep(attempt * 2)
|
||||||
|
token = token_body["access_token"]
|
||||||
|
|
||||||
|
smtp = {
|
||||||
|
"host": os.environ["KEYCLOAK_SMTP_HOST"],
|
||||||
|
"port": os.environ["KEYCLOAK_SMTP_PORT"],
|
||||||
|
"from": os.environ["KEYCLOAK_SMTP_FROM"],
|
||||||
|
"fromDisplayName": os.environ["KEYCLOAK_SMTP_FROM_NAME"],
|
||||||
|
"replyTo": os.environ["KEYCLOAK_SMTP_FROM"],
|
||||||
|
"replyToDisplayName": os.environ["KEYCLOAK_SMTP_FROM_NAME"],
|
||||||
|
"user": os.environ["KEYCLOAK_SMTP_USER"],
|
||||||
|
"password": os.environ["KEYCLOAK_SMTP_PASSWORD"],
|
||||||
|
"auth": "true",
|
||||||
|
"starttls": "true",
|
||||||
|
"ssl": "false",
|
||||||
|
}
|
||||||
|
|
||||||
|
status, realm_rep = request("GET", f"{base_url}/admin/realms/{realm}", token)
|
||||||
|
if status == 404:
|
||||||
|
create_payload = {
|
||||||
|
"realm": realm,
|
||||||
|
"enabled": True,
|
||||||
|
"registrationAllowed": False,
|
||||||
|
"resetPasswordAllowed": True,
|
||||||
|
"verifyEmail": True,
|
||||||
|
"loginWithEmailAllowed": True,
|
||||||
|
"duplicateEmailsAllowed": False,
|
||||||
|
"smtpServer": smtp,
|
||||||
|
}
|
||||||
|
status, body = request("POST", f"{base_url}/admin/realms", token, create_payload)
|
||||||
|
if status not in (201, 204, 409):
|
||||||
|
raise SystemExit(f"Realm create failed: status={status} body={body}")
|
||||||
|
status, realm_rep = request("GET", f"{base_url}/admin/realms/{realm}", token)
|
||||||
|
if status != 200 or not isinstance(realm_rep, dict):
|
||||||
|
raise SystemExit(f"Realm fetch failed: status={status}")
|
||||||
|
|
||||||
|
realm_rep.update(
|
||||||
|
{
|
||||||
|
"enabled": True,
|
||||||
|
"registrationAllowed": False,
|
||||||
|
"resetPasswordAllowed": True,
|
||||||
|
"verifyEmail": True,
|
||||||
|
"loginWithEmailAllowed": True,
|
||||||
|
"duplicateEmailsAllowed": False,
|
||||||
|
"smtpServer": smtp,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
status, body = request("PUT", f"{base_url}/admin/realms/{realm}", token, realm_rep)
|
||||||
|
if status not in (200, 204):
|
||||||
|
raise SystemExit(f"Realm update failed: status={status} body={body}")
|
||||||
|
|
||||||
|
def ensure_group(name):
|
||||||
|
status, groups = request(
|
||||||
|
"GET",
|
||||||
|
f"{base_url}/admin/realms/{realm}/groups?search={urllib.parse.quote(name)}",
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
if status != 200:
|
||||||
|
raise SystemExit(f"Group search failed for {name}: status={status}")
|
||||||
|
if any(group.get("name") == name for group in groups or []):
|
||||||
|
return
|
||||||
|
status, body = request("POST", f"{base_url}/admin/realms/{realm}/groups", token, {"name": name})
|
||||||
|
if status not in (201, 204, 409):
|
||||||
|
raise SystemExit(f"Group create failed for {name}: status={status} body={body}")
|
||||||
|
|
||||||
|
ensure_group("alpha")
|
||||||
|
ensure_group("admin")
|
||||||
|
|
||||||
|
status, clients = request(
|
||||||
|
"GET",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(client_id)}",
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
if status != 200:
|
||||||
|
raise SystemExit(f"Client lookup failed: status={status}")
|
||||||
|
client_uuid = clients[0]["id"] if clients else None
|
||||||
|
client_payload = {
|
||||||
|
"clientId": client_id,
|
||||||
|
"enabled": True,
|
||||||
|
"protocol": "openid-connect",
|
||||||
|
"publicClient": False,
|
||||||
|
"standardFlowEnabled": True,
|
||||||
|
"implicitFlowEnabled": False,
|
||||||
|
"directAccessGrantsEnabled": False,
|
||||||
|
"serviceAccountsEnabled": False,
|
||||||
|
"redirectUris": [f"{veles_base_url}/*"],
|
||||||
|
"webOrigins": [veles_base_url],
|
||||||
|
"rootUrl": veles_base_url,
|
||||||
|
"baseUrl": "/",
|
||||||
|
"attributes": {
|
||||||
|
"pkce.code.challenge.method": "S256",
|
||||||
|
"post.logout.redirect.uris": f"{veles_base_url}/*",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if not client_uuid:
|
||||||
|
status, body = request("POST", f"{base_url}/admin/realms/{realm}/clients", token, client_payload)
|
||||||
|
if status not in (201, 204, 409):
|
||||||
|
raise SystemExit(f"Client create failed: status={status} body={body}")
|
||||||
|
status, clients = request(
|
||||||
|
"GET",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(client_id)}",
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
client_uuid = clients[0]["id"] if clients else None
|
||||||
|
if not client_uuid:
|
||||||
|
raise SystemExit("Client veles-web not found after create")
|
||||||
|
status, body = request(
|
||||||
|
"PUT",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients/{client_uuid}",
|
||||||
|
token,
|
||||||
|
client_payload,
|
||||||
|
)
|
||||||
|
if status not in (200, 204):
|
||||||
|
raise SystemExit(f"Client update failed: status={status} body={body}")
|
||||||
|
|
||||||
|
mapper_payload = {
|
||||||
|
"name": "groups",
|
||||||
|
"protocol": "openid-connect",
|
||||||
|
"protocolMapper": "oidc-group-membership-mapper",
|
||||||
|
"consentRequired": False,
|
||||||
|
"config": {
|
||||||
|
"full.path": "false",
|
||||||
|
"id.token.claim": "true",
|
||||||
|
"access.token.claim": "true",
|
||||||
|
"userinfo.token.claim": "true",
|
||||||
|
"claim.name": "groups",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
status, mappers = request(
|
||||||
|
"GET",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients/{client_uuid}/protocol-mappers/models",
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
if status != 200:
|
||||||
|
raise SystemExit(f"Mapper lookup failed: status={status}")
|
||||||
|
mapper_id = next((mapper.get("id") for mapper in mappers or [] if mapper.get("name") == "groups"), None)
|
||||||
|
if mapper_id:
|
||||||
|
status, body = request(
|
||||||
|
"PUT",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients/{client_uuid}/protocol-mappers/models/{mapper_id}",
|
||||||
|
token,
|
||||||
|
mapper_payload,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
status, body = request(
|
||||||
|
"POST",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients/{client_uuid}/protocol-mappers/models",
|
||||||
|
token,
|
||||||
|
mapper_payload,
|
||||||
|
)
|
||||||
|
if status not in (200, 201, 204):
|
||||||
|
raise SystemExit(f"Mapper ensure failed: status={status} body={body}")
|
||||||
|
|
||||||
|
status, secret = request(
|
||||||
|
"GET",
|
||||||
|
f"{base_url}/admin/realms/{realm}/clients/{client_uuid}/client-secret",
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
client_secret = (secret or {}).get("value")
|
||||||
|
if status != 200 or not client_secret:
|
||||||
|
raise SystemExit(f"Client secret fetch failed: status={status}")
|
||||||
|
|
||||||
|
vault_addr = os.environ.get("VAULT_ADDR", "http://vault.vault.svc.cluster.local:8200")
|
||||||
|
jwt = open("/var/run/secrets/kubernetes.io/serviceaccount/token", encoding="utf-8").read().strip()
|
||||||
|
login_payload = json.dumps({"jwt": jwt, "role": os.environ.get("VAULT_ROLE", "sso-secrets")}).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{vault_addr}/v1/auth/kubernetes/login",
|
||||||
|
data=login_payload,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||||
|
vault_token = json.loads(resp.read().decode())["auth"]["client_token"]
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"data": {
|
||||||
|
"client_id": client_id,
|
||||||
|
"client_secret": client_secret,
|
||||||
|
"issuer": issuer,
|
||||||
|
"realm": realm,
|
||||||
|
"required_groups": "alpha,admin",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{vault_addr}/v1/kv/data/atlas/veles/veles-oidc",
|
||||||
|
data=json.dumps(payload).encode(),
|
||||||
|
headers={"X-Vault-Token": vault_token, "Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||||
|
if resp.status not in (200, 204):
|
||||||
|
raise SystemExit(f"Vault write returned {resp.status}")
|
||||||
|
|
||||||
|
print("Veles Keycloak realm/client ready")
|
||||||
|
PY
|
||||||
@ -9,7 +9,7 @@ data:
|
|||||||
METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml
|
METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml
|
||||||
METIS_DATA_DIR: /var/lib/metis
|
METIS_DATA_DIR: /var/lib/metis
|
||||||
METIS_DEFAULT_FLASH_HOST: titan-20
|
METIS_DEFAULT_FLASH_HOST: titan-20
|
||||||
METIS_FLASH_HOSTS: titan-20,titan-21,titan-22,titan-24,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
|
METIS_FLASH_HOSTS: titan-20,titan-21,titan-22,titan-23,titan-24,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
|
||||||
METIS_LOCAL_HOST: titan-20
|
METIS_LOCAL_HOST: titan-20
|
||||||
METIS_ALLOWED_GROUPS: admin,maintenance
|
METIS_ALLOWED_GROUPS: admin,maintenance
|
||||||
METIS_MAX_DEVICE_BYTES: "1000000000000"
|
METIS_MAX_DEVICE_BYTES: "1000000000000"
|
||||||
|
|||||||
@ -38,6 +38,12 @@ spec:
|
|||||||
operator: NotIn
|
operator: NotIn
|
||||||
values:
|
values:
|
||||||
- "true"
|
- "true"
|
||||||
|
- key: veles.bstein.dev/node-pool
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- oceanus
|
||||||
|
- key: node-role.kubernetes.io/accelerator
|
||||||
|
operator: Exists
|
||||||
tolerations:
|
tolerations:
|
||||||
- operator: Exists
|
- operator: Exists
|
||||||
containers:
|
containers:
|
||||||
|
|||||||
@ -50,6 +50,15 @@ spec:
|
|||||||
upgrade:
|
upgrade:
|
||||||
disableWait: true
|
disableWait: true
|
||||||
values:
|
values:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: veles.bstein.dev/node-pool
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- oceanus
|
||||||
rbac:
|
rbac:
|
||||||
pspEnabled: false
|
pspEnabled: false
|
||||||
service:
|
service:
|
||||||
|
|||||||
@ -240,6 +240,11 @@ write_policy_and_role "game-stream" "game-stream" "game-stream-vault" \
|
|||||||
"game-stream/*" ""
|
"game-stream/*" ""
|
||||||
write_policy_and_role "openclaw" "openclaw" "agent-vault" \
|
write_policy_and_role "openclaw" "openclaw" "agent-vault" \
|
||||||
"openclaw/*" ""
|
"openclaw/*" ""
|
||||||
|
write_policy_and_role "veles" "veles" "veles-backend,veles-postgres,veles-vault-sync" \
|
||||||
|
"veles/* shared/harbor-pull shared/postmark-relay" ""
|
||||||
|
write_policy_and_role "veles-secrets" "veles" "veles-secrets-ensure" \
|
||||||
|
"shared/postmark-relay" \
|
||||||
|
"veles/*"
|
||||||
write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \
|
write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \
|
||||||
"maintenance/ariadne-db maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull shared/soteria-restic harbor/harbor-core" "" \
|
"maintenance/ariadne-db maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull shared/soteria-restic harbor/harbor-core" "" \
|
||||||
'
|
'
|
||||||
@ -266,8 +271,8 @@ write_policy_and_role "vault" "vault" "vault" \
|
|||||||
"vault/*" ""
|
"vault/*" ""
|
||||||
|
|
||||||
write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \
|
write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \
|
||||||
"shared/keycloak-admin maintenance/metis-ssh-keys" \
|
"shared/keycloak-admin shared/postmark-relay maintenance/metis-ssh-keys" \
|
||||||
"harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys openclaw/agent-oidc" \
|
"harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys openclaw/agent-oidc veles/veles-oidc" \
|
||||||
'
|
'
|
||||||
path "kv/data/atlas/nodes/*" {
|
path "kv/data/atlas/nodes/*" {
|
||||||
capabilities = ["create", "update", "read"]
|
capabilities = ["create", "update", "read"]
|
||||||
|
|||||||
64
services/veles/NOTES.md
Normal file
64
services/veles/NOTES.md
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# Veles Infrastructure Contract
|
||||||
|
|
||||||
|
This stack is staged for Flux and intentionally starts the app deployments at `replicas: 0` until images and the app-side runtime contract are ready.
|
||||||
|
|
||||||
|
## Cluster Contract
|
||||||
|
|
||||||
|
- Namespace: `veles`
|
||||||
|
- Hostname: `https://veles.bstein.dev`
|
||||||
|
- Namespace: `veles`; no alternate alpha namespace is used.
|
||||||
|
- Backend service: `veles-backend.veles.svc.cluster.local:80`
|
||||||
|
- Frontend service: `veles-frontend.veles.svc.cluster.local:80`
|
||||||
|
- Postgres service: `veles-postgres.veles.svc.cluster.local:5432`
|
||||||
|
- Artifact PVC: `veles-artifacts`, mounted at `/data/veles-artifacts`
|
||||||
|
- Storage classes: `veles-oceanus-db`, `veles-oceanus-artifacts`
|
||||||
|
- Images:
|
||||||
|
- `registry.bstein.dev/veles/veles-backend`
|
||||||
|
- `registry.bstein.dev/veles/veles-frontend`
|
||||||
|
- `registry.bstein.dev/veles/veles-sim-worker`
|
||||||
|
|
||||||
|
## Runtime Env
|
||||||
|
|
||||||
|
Veles should consume:
|
||||||
|
|
||||||
|
- `VELES_PUBLIC_BASE_URL=https://veles.bstein.dev`
|
||||||
|
- `VELES_OIDC_ISSUER=https://sso.bstein.dev/realms/veles`
|
||||||
|
- `VELES_OIDC_CLIENT_ID=veles-web`
|
||||||
|
- `VELES_OIDC_REQUIRED_GROUPS=alpha,admin`
|
||||||
|
- `DATABASE_URL` from `kv/data/atlas/veles/veles-db`
|
||||||
|
- `VELES_SESSION_SECRET` from `kv/data/atlas/veles/app-secrets`
|
||||||
|
- `VELES_BYOK_ENCRYPTION_KEY` from `kv/data/atlas/veles/app-secrets`
|
||||||
|
|
||||||
|
User OpenAI API keys must stay in the Veles database encrypted with `VELES_BYOK_ENCRYPTION_KEY`; do not store per-user BYOK secrets in Vault.
|
||||||
|
|
||||||
|
## Simulation Jobs
|
||||||
|
|
||||||
|
The backend service account can create, watch, and delete Jobs only inside the `veles` namespace. Simulation pods should use service account `veles-sim`, set `automountServiceAccountToken: false`, and use:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
priorityClassName: veles-sim
|
||||||
|
nodeSelector:
|
||||||
|
veles.bstein.dev/simulation: "true"
|
||||||
|
tolerations:
|
||||||
|
- key: veles.bstein.dev/simulation
|
||||||
|
operator: Equal
|
||||||
|
value: "true"
|
||||||
|
effect: NoSchedule
|
||||||
|
```
|
||||||
|
|
||||||
|
## Staged Operator Steps
|
||||||
|
|
||||||
|
1. Join `titan-23`/Oceanus to Atlas as a worker.
|
||||||
|
2. Use Metis with `titan-23` in `METIS_FLASH_HOSTS`; the existing node secret placeholder uses `192.168.22.23`.
|
||||||
|
3. Confirm the node normalizer applies the Veles labels and taint.
|
||||||
|
4. Add Oceanus Longhorn disks at paths tagged by the Longhorn tag ensure job.
|
||||||
|
5. Let Vault policy reconciliation run, then unsuspend `veles-secrets-ensure-1`.
|
||||||
|
6. Unsuspend `veles-realm-ensure-1` in `services/keycloak` to create the realm/client secret.
|
||||||
|
7. Create the Harbor `veles` project or robot access before image automation is enabled in production.
|
||||||
|
8. Scale `veles-postgres`, then backend/frontend once app images exist.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- `veles-oceanus-artifacts` is RWO for alpha; simulation workers should either run on Oceanus with the backend or stream logs to the backend, which owns writes.
|
||||||
|
- Postgres uses Longhorn backup recurring jobs off Oceanus. This is not a substitute for a tested restore drill.
|
||||||
|
- The Jenkins job skeleton points at the Veles repo but stays disabled until that repo provides a Jenkinsfile.
|
||||||
16
services/veles/artifacts-pvc.yaml
Normal file
16
services/veles/artifacts-pvc.yaml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# services/veles/artifacts-pvc.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: veles-artifacts
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: veles
|
||||||
|
app.kubernetes.io/component: artifacts
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: veles-oceanus-artifacts
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 200Gi
|
||||||
89
services/veles/backend-deployment.yaml
Normal file
89
services/veles/backend-deployment.yaml
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# services/veles/backend-deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: veles-backend
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-backend
|
||||||
|
spec:
|
||||||
|
replicas: 0
|
||||||
|
revisionHistoryLimit: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: veles-backend
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: veles-backend
|
||||||
|
annotations:
|
||||||
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
|
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||||
|
vault.hashicorp.com/role: "veles"
|
||||||
|
vault.hashicorp.com/agent-inject-secret-veles-env.sh: "kv/data/atlas/veles/veles-db"
|
||||||
|
vault.hashicorp.com/agent-inject-template-veles-env.sh: |
|
||||||
|
{{- with secret "kv/data/atlas/veles/veles-db" }}
|
||||||
|
export DATABASE_URL="{{ .Data.data.DATABASE_URL }}"
|
||||||
|
export VELES_DATABASE_USER="{{ .Data.data.POSTGRES_USER }}"
|
||||||
|
export VELES_DATABASE_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/atlas/veles/veles-oidc" }}
|
||||||
|
export VELES_OIDC_CLIENT_SECRET="{{ .Data.data.client_secret }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- with secret "kv/data/atlas/veles/app-secrets" }}
|
||||||
|
export VELES_SESSION_SECRET="{{ .Data.data.VELES_SESSION_SECRET }}"
|
||||||
|
export VELES_BYOK_ENCRYPTION_KEY="{{ .Data.data.VELES_BYOK_ENCRYPTION_KEY }}"
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
serviceAccountName: veles-backend
|
||||||
|
priorityClassName: veles-core
|
||||||
|
nodeSelector:
|
||||||
|
veles.bstein.dev/node-pool: oceanus
|
||||||
|
tolerations:
|
||||||
|
- key: veles.bstein.dev/simulation
|
||||||
|
operator: Equal
|
||||||
|
value: "true"
|
||||||
|
effect: NoSchedule
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: backend
|
||||||
|
image: registry.bstein.dev/veles/veles-backend:0.1.0-0 # {"$imagepolicy": "veles:veles-backend"}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
if [ -f /vault/secrets/veles-env.sh ]; then
|
||||||
|
. /vault/secrets/veles-env.sh
|
||||||
|
fi
|
||||||
|
exec /app/veles-backend
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
protocol: TCP
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: veles-app-config
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
limits:
|
||||||
|
cpu: "2"
|
||||||
|
memory: 4Gi
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
runAsGroup: 1000
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: artifacts
|
||||||
|
mountPath: /data/veles-artifacts
|
||||||
|
volumes:
|
||||||
|
- name: artifacts
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: veles-artifacts
|
||||||
23
services/veles/configmap.yaml
Normal file
23
services/veles/configmap.yaml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# services/veles/configmap.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: veles-app-config
|
||||||
|
namespace: veles
|
||||||
|
data:
|
||||||
|
VELES_ENV: alpha
|
||||||
|
VELES_PUBLIC_BASE_URL: https://veles.bstein.dev
|
||||||
|
VELES_OIDC_ISSUER: https://sso.bstein.dev/realms/veles
|
||||||
|
VELES_OIDC_CLIENT_ID: veles-web
|
||||||
|
VELES_OIDC_REQUIRED_GROUPS: alpha,admin
|
||||||
|
VELES_DATABASE_HOST: veles-postgres.veles.svc.cluster.local
|
||||||
|
VELES_DATABASE_PORT: "5432"
|
||||||
|
VELES_DATABASE_NAME: veles
|
||||||
|
VELES_ARTIFACTS_PATH: /data/veles-artifacts
|
||||||
|
VELES_SIM_NAMESPACE: veles
|
||||||
|
VELES_SIM_SERVICE_ACCOUNT: veles-sim
|
||||||
|
VELES_SIM_PRIORITY_CLASS: veles-sim
|
||||||
|
VELES_SIM_NODE_SELECTOR: veles.bstein.dev/simulation=true
|
||||||
|
VELES_SIM_TOLERATION_KEY: veles.bstein.dev/simulation
|
||||||
|
VELES_SIM_TOLERATION_VALUE: "true"
|
||||||
|
VELES_LOG_RETENTION_DAYS: "30"
|
||||||
72
services/veles/frontend-deployment.yaml
Normal file
72
services/veles/frontend-deployment.yaml
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# services/veles/frontend-deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: veles-frontend
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-frontend
|
||||||
|
spec:
|
||||||
|
replicas: 0
|
||||||
|
revisionHistoryLimit: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: veles-frontend
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: veles-frontend
|
||||||
|
spec:
|
||||||
|
serviceAccountName: veles-frontend
|
||||||
|
priorityClassName: veles-core
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: node-role.kubernetes.io/worker
|
||||||
|
operator: Exists
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values: ["rpi5", "rpi4", "amd64"]
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 100
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: atlas.bstein.dev/spillover
|
||||||
|
operator: DoesNotExist
|
||||||
|
- weight: 90
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values: ["rpi5"]
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: frontend
|
||||||
|
image: registry.bstein.dev/veles/veles-frontend:0.1.0-0 # {"$imagepolicy": "veles:veles-frontend"}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
protocol: TCP
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: veles-app-config
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
runAsGroup: 1000
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
69
services/veles/image.yaml
Normal file
69
services/veles/image.yaml
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# services/veles/image.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageRepository
|
||||||
|
metadata:
|
||||||
|
name: veles-backend
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
image: registry.bstein.dev/veles/veles-backend
|
||||||
|
interval: 1m0s
|
||||||
|
secretRef:
|
||||||
|
name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImagePolicy
|
||||||
|
metadata:
|
||||||
|
name: veles-backend
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
imageRepositoryRef:
|
||||||
|
name: veles-backend
|
||||||
|
policy:
|
||||||
|
semver:
|
||||||
|
range: ">=0.1.0-0"
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageRepository
|
||||||
|
metadata:
|
||||||
|
name: veles-frontend
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
image: registry.bstein.dev/veles/veles-frontend
|
||||||
|
interval: 1m0s
|
||||||
|
secretRef:
|
||||||
|
name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImagePolicy
|
||||||
|
metadata:
|
||||||
|
name: veles-frontend
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
imageRepositoryRef:
|
||||||
|
name: veles-frontend
|
||||||
|
policy:
|
||||||
|
semver:
|
||||||
|
range: ">=0.1.0-0"
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageRepository
|
||||||
|
metadata:
|
||||||
|
name: veles-sim-worker
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
image: registry.bstein.dev/veles/veles-sim-worker
|
||||||
|
interval: 1m0s
|
||||||
|
secretRef:
|
||||||
|
name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImagePolicy
|
||||||
|
metadata:
|
||||||
|
name: veles-sim-worker
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
imageRepositoryRef:
|
||||||
|
name: veles-sim-worker
|
||||||
|
policy:
|
||||||
|
semver:
|
||||||
|
range: ">=0.1.0-0"
|
||||||
47
services/veles/ingress.yaml
Normal file
47
services/veles/ingress.yaml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# services/veles/ingress.yaml
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: veles
|
||||||
|
namespace: veles
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||||
|
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||||
|
spec:
|
||||||
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts: ["veles.bstein.dev"]
|
||||||
|
secretName: veles-tls
|
||||||
|
rules:
|
||||||
|
- host: veles.bstein.dev
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /api
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: veles-backend
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
- path: /events
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: veles-backend
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
- path: /ws
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: veles-backend
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: veles-frontend
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
22
services/veles/kustomization.yaml
Normal file
22
services/veles/kustomization.yaml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# services/veles/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: veles
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- serviceaccounts.yaml
|
||||||
|
- secretproviderclass.yaml
|
||||||
|
- vault-sync-deployment.yaml
|
||||||
|
- resourcequota.yaml
|
||||||
|
- limitrange.yaml
|
||||||
|
- configmap.yaml
|
||||||
|
- rbac.yaml
|
||||||
|
- artifacts-pvc.yaml
|
||||||
|
- postgres-service.yaml
|
||||||
|
- postgres-statefulset.yaml
|
||||||
|
- services.yaml
|
||||||
|
- backend-deployment.yaml
|
||||||
|
- frontend-deployment.yaml
|
||||||
|
- image.yaml
|
||||||
|
- ingress.yaml
|
||||||
|
- oneoffs/veles-secrets-ensure-job.yaml
|
||||||
21
services/veles/limitrange.yaml
Normal file
21
services/veles/limitrange.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# services/veles/limitrange.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: LimitRange
|
||||||
|
metadata:
|
||||||
|
name: veles-container-limits
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
limits:
|
||||||
|
- type: Container
|
||||||
|
defaultRequest:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
default:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
min:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 32Mi
|
||||||
|
max:
|
||||||
|
cpu: "16"
|
||||||
|
memory: 32Gi
|
||||||
8
services/veles/namespace.yaml
Normal file
8
services/veles/namespace.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# services/veles/namespace.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: veles
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: veles
|
||||||
|
app.kubernetes.io/part-of: veles
|
||||||
142
services/veles/oneoffs/veles-secrets-ensure-job.yaml
Normal file
142
services/veles/oneoffs/veles-secrets-ensure-job.yaml
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
# services/veles/oneoffs/veles-secrets-ensure-job.yaml
|
||||||
|
# One-off job for veles/veles-secrets-ensure-1.
|
||||||
|
# Purpose: seed Veles Vault paths before app/Postgres pods are scaled up.
|
||||||
|
# Keep suspended until the veles Vault role has reconciled, then unsuspend once.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: veles-secrets-ensure-1
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
suspend: true
|
||||||
|
backoffLimit: 0
|
||||||
|
ttlSecondsAfterFinished: 3600
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
serviceAccountName: veles-secrets-ensure
|
||||||
|
restartPolicy: Never
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: node-role.kubernetes.io/worker
|
||||||
|
operator: Exists
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 100
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: kubernetes.io/arch
|
||||||
|
operator: In
|
||||||
|
values: ["arm64"]
|
||||||
|
containers:
|
||||||
|
- name: apply
|
||||||
|
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
|
||||||
|
vault_role="${VAULT_ROLE:-veles-secrets}"
|
||||||
|
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||||
|
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
|
||||||
|
vault_token="$(curl -sS --request POST --data "${login_payload}" \
|
||||||
|
"${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
|
||||||
|
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
|
||||||
|
echo "vault login failed" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
read_secret() {
|
||||||
|
path="$1"
|
||||||
|
out="$2"
|
||||||
|
curl -sS -o "${out}" -w "%{http_code}" \
|
||||||
|
-H "X-Vault-Token: ${vault_token}" \
|
||||||
|
"${vault_addr}/v1/kv/data/atlas/${path}" || true
|
||||||
|
}
|
||||||
|
|
||||||
|
write_secret() {
|
||||||
|
path="$1"
|
||||||
|
payload="$2"
|
||||||
|
out="$(mktemp)"
|
||||||
|
status="$(curl -sS -o "${out}" -w "%{http_code}" -X POST \
|
||||||
|
-H "X-Vault-Token: ${vault_token}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "${payload}" \
|
||||||
|
"${vault_addr}/v1/kv/data/atlas/${path}")"
|
||||||
|
if [ "${status}" != "200" ] && [ "${status}" != "204" ]; then
|
||||||
|
echo "Vault write failed for ${path} (status ${status})" >&2
|
||||||
|
cat "${out}" >&2 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
rand_b64() {
|
||||||
|
bytes="$1"
|
||||||
|
openssl rand -base64 "${bytes}" | tr -d '\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
status="$(read_secret veles/veles-db /tmp/veles-db.json)"
|
||||||
|
if [ "${status}" = "200" ]; then
|
||||||
|
db_password="$(jq -r '.data.data.POSTGRES_PASSWORD // empty' /tmp/veles-db.json)"
|
||||||
|
elif [ "${status}" = "404" ]; then
|
||||||
|
db_password=""
|
||||||
|
else
|
||||||
|
echo "Vault read failed for veles-db (status ${status})" >&2
|
||||||
|
cat /tmp/veles-db.json >&2 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ -z "${db_password}" ]; then
|
||||||
|
db_password="$(rand_b64 36)"
|
||||||
|
fi
|
||||||
|
db_payload="$(jq -nc \
|
||||||
|
--arg host "veles-postgres.veles.svc.cluster.local" \
|
||||||
|
--arg port "5432" \
|
||||||
|
--arg db "veles" \
|
||||||
|
--arg user "veles" \
|
||||||
|
--arg password "${db_password}" \
|
||||||
|
'{data:{POSTGRES_HOST:$host,POSTGRES_PORT:$port,POSTGRES_DB:$db,POSTGRES_USER:$user,POSTGRES_PASSWORD:$password,DATABASE_URL:("postgresql://"+$user+":"+$password+"@"+$host+":"+$port+"/"+$db+"?sslmode=disable")}}')"
|
||||||
|
write_secret veles/veles-db "${db_payload}"
|
||||||
|
|
||||||
|
status="$(read_secret veles/app-secrets /tmp/app-secrets.json)"
|
||||||
|
if [ "${status}" = "200" ]; then
|
||||||
|
session_secret="$(jq -r '.data.data.VELES_SESSION_SECRET // empty' /tmp/app-secrets.json)"
|
||||||
|
byok_key="$(jq -r '.data.data.VELES_BYOK_ENCRYPTION_KEY // empty' /tmp/app-secrets.json)"
|
||||||
|
elif [ "${status}" = "404" ]; then
|
||||||
|
session_secret=""
|
||||||
|
byok_key=""
|
||||||
|
else
|
||||||
|
echo "Vault read failed for app-secrets (status ${status})" >&2
|
||||||
|
cat /tmp/app-secrets.json >&2 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ -z "${session_secret}" ]; then
|
||||||
|
session_secret="$(rand_b64 48)"
|
||||||
|
fi
|
||||||
|
if [ -z "${byok_key}" ]; then
|
||||||
|
byok_key="$(rand_b64 32)"
|
||||||
|
fi
|
||||||
|
app_payload="$(jq -nc \
|
||||||
|
--arg session_secret "${session_secret}" \
|
||||||
|
--arg byok_key "${byok_key}" \
|
||||||
|
'{data:{VELES_SESSION_SECRET:$session_secret,VELES_BYOK_ENCRYPTION_KEY:$byok_key}}')"
|
||||||
|
write_secret veles/app-secrets "${app_payload}"
|
||||||
|
|
||||||
|
postmark_status="$(read_secret shared/postmark-relay /tmp/postmark.json)"
|
||||||
|
if [ "${postmark_status}" = "200" ]; then
|
||||||
|
smtp_password="$(jq -r '.data.data.apikey // empty' /tmp/postmark.json)"
|
||||||
|
if [ -n "${smtp_password}" ]; then
|
||||||
|
smtp_payload="$(jq -nc \
|
||||||
|
--arg host "mail.bstein.dev" \
|
||||||
|
--arg port "587" \
|
||||||
|
--arg user "${smtp_password}" \
|
||||||
|
--arg password "${smtp_password}" \
|
||||||
|
--arg from "no-reply-veles@bstein.dev" \
|
||||||
|
--arg from_name "Veles" \
|
||||||
|
'{data:{SMTP_HOST:$host,SMTP_PORT:$port,SMTP_USER:$user,SMTP_PASSWORD:$password,SMTP_FROM:$from,SMTP_FROM_NAME:$from_name,SMTP_STARTTLS:"true"}}')"
|
||||||
|
write_secret veles/smtp "${smtp_payload}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Veles Vault paths ready: veles-db, app-secrets, smtp when Postmark relay exists"
|
||||||
17
services/veles/postgres-service.yaml
Normal file
17
services/veles/postgres-service.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# services/veles/postgres-service.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: veles-postgres
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-postgres
|
||||||
|
spec:
|
||||||
|
clusterIP: None
|
||||||
|
ports:
|
||||||
|
- name: postgres
|
||||||
|
port: 5432
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 5432
|
||||||
|
selector:
|
||||||
|
app: veles-postgres
|
||||||
88
services/veles/postgres-statefulset.yaml
Normal file
88
services/veles/postgres-statefulset.yaml
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
# services/veles/postgres-statefulset.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: veles-postgres
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-postgres
|
||||||
|
spec:
|
||||||
|
serviceName: veles-postgres
|
||||||
|
replicas: 0
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: veles-postgres
|
||||||
|
persistentVolumeClaimRetentionPolicy:
|
||||||
|
whenDeleted: Retain
|
||||||
|
whenScaled: Retain
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: veles-postgres
|
||||||
|
annotations:
|
||||||
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
|
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||||
|
vault.hashicorp.com/role: "veles"
|
||||||
|
vault.hashicorp.com/agent-inject-secret-postgres-password: "kv/data/atlas/veles/veles-db"
|
||||||
|
vault.hashicorp.com/agent-inject-template-postgres-password: |
|
||||||
|
{{- with secret "kv/data/atlas/veles/veles-db" -}}
|
||||||
|
{{ .Data.data.POSTGRES_PASSWORD }}
|
||||||
|
{{- end -}}
|
||||||
|
spec:
|
||||||
|
serviceAccountName: veles-postgres
|
||||||
|
priorityClassName: veles-core
|
||||||
|
nodeSelector:
|
||||||
|
veles.bstein.dev/node-pool: oceanus
|
||||||
|
tolerations:
|
||||||
|
- key: veles.bstein.dev/simulation
|
||||||
|
operator: Equal
|
||||||
|
value: "true"
|
||||||
|
effect: NoSchedule
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 999
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: postgres
|
||||||
|
image: postgres:15
|
||||||
|
ports:
|
||||||
|
- name: postgres
|
||||||
|
containerPort: 5432
|
||||||
|
protocol: TCP
|
||||||
|
env:
|
||||||
|
- name: PGDATA
|
||||||
|
value: /var/lib/postgresql/data/pgdata
|
||||||
|
- name: POSTGRES_USER
|
||||||
|
value: veles
|
||||||
|
- name: POSTGRES_PASSWORD_FILE
|
||||||
|
value: /vault/secrets/postgres-password
|
||||||
|
- name: POSTGRES_DB
|
||||||
|
value: veles
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "2"
|
||||||
|
memory: 8Gi
|
||||||
|
limits:
|
||||||
|
cpu: "4"
|
||||||
|
memory: 16Gi
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: postgres-data
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: postgres-data
|
||||||
|
labels:
|
||||||
|
app: veles-postgres
|
||||||
|
veles.bstein.dev/backup: longhorn
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
storageClassName: veles-oceanus-db
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 100Gi
|
||||||
36
services/veles/rbac.yaml
Normal file
36
services/veles/rbac.yaml
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# services/veles/rbac.yaml
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: veles-backend-jobs
|
||||||
|
namespace: veles
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["batch"]
|
||||||
|
resources: ["jobs"]
|
||||||
|
verbs: ["create", "delete", "deletecollection", "get", "list", "patch", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["delete", "get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods/log"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["events"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups: ["events.k8s.io"]
|
||||||
|
resources: ["events"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: veles-backend-jobs
|
||||||
|
namespace: veles
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: veles-backend
|
||||||
|
namespace: veles
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: veles-backend-jobs
|
||||||
54
services/veles/resourcequota.yaml
Normal file
54
services/veles/resourcequota.yaml
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# services/veles/resourcequota.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ResourceQuota
|
||||||
|
metadata:
|
||||||
|
name: veles-namespace-quota
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
hard:
|
||||||
|
requests.cpu: "12"
|
||||||
|
requests.memory: 24Gi
|
||||||
|
limits.cpu: "40"
|
||||||
|
limits.memory: 96Gi
|
||||||
|
pods: "60"
|
||||||
|
count/jobs.batch: "100"
|
||||||
|
persistentvolumeclaims: "8"
|
||||||
|
requests.storage: 300Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ResourceQuota
|
||||||
|
metadata:
|
||||||
|
name: veles-core-quota
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
hard:
|
||||||
|
requests.cpu: "4"
|
||||||
|
requests.memory: 12Gi
|
||||||
|
limits.cpu: "8"
|
||||||
|
limits.memory: 24Gi
|
||||||
|
pods: "12"
|
||||||
|
scopeSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- scopeName: PriorityClass
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- veles-core
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ResourceQuota
|
||||||
|
metadata:
|
||||||
|
name: veles-sim-quota
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
hard:
|
||||||
|
requests.cpu: "8"
|
||||||
|
requests.memory: 16Gi
|
||||||
|
limits.cpu: "32"
|
||||||
|
limits.memory: 72Gi
|
||||||
|
pods: "48"
|
||||||
|
scopeSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- scopeName: PriorityClass
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- veles-sim
|
||||||
21
services/veles/secretproviderclass.yaml
Normal file
21
services/veles/secretproviderclass.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# services/veles/secretproviderclass.yaml
|
||||||
|
apiVersion: secrets-store.csi.x-k8s.io/v1
|
||||||
|
kind: SecretProviderClass
|
||||||
|
metadata:
|
||||||
|
name: veles-vault
|
||||||
|
namespace: veles
|
||||||
|
spec:
|
||||||
|
provider: vault
|
||||||
|
parameters:
|
||||||
|
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
|
||||||
|
roleName: "veles"
|
||||||
|
objects: |
|
||||||
|
- objectName: "harbor-pull__dockerconfigjson"
|
||||||
|
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||||
|
secretKey: "dockerconfigjson"
|
||||||
|
secretObjects:
|
||||||
|
- secretName: harbor-regcred
|
||||||
|
type: kubernetes.io/dockerconfigjson
|
||||||
|
data:
|
||||||
|
- objectName: harbor-pull__dockerconfigjson
|
||||||
|
key: .dockerconfigjson
|
||||||
45
services/veles/serviceaccounts.yaml
Normal file
45
services/veles/serviceaccounts.yaml
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# services/veles/serviceaccounts.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: veles-backend
|
||||||
|
namespace: veles
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: veles-frontend
|
||||||
|
namespace: veles
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: veles-postgres
|
||||||
|
namespace: veles
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: veles-vault-sync
|
||||||
|
namespace: veles
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: veles-secrets-ensure
|
||||||
|
namespace: veles
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: veles-sim
|
||||||
|
namespace: veles
|
||||||
|
automountServiceAccountToken: false
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-regcred
|
||||||
32
services/veles/services.yaml
Normal file
32
services/veles/services.yaml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# services/veles/services.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: veles-backend
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-backend
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 8080
|
||||||
|
selector:
|
||||||
|
app: veles-backend
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: veles-frontend
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-frontend
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 8080
|
||||||
|
selector:
|
||||||
|
app: veles-frontend
|
||||||
43
services/veles/vault-sync-deployment.yaml
Normal file
43
services/veles/vault-sync-deployment.yaml
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# services/veles/vault-sync-deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: veles-vault-sync
|
||||||
|
namespace: veles
|
||||||
|
labels:
|
||||||
|
app: veles-vault-sync
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: veles-vault-sync
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: veles-vault-sync
|
||||||
|
spec:
|
||||||
|
serviceAccountName: veles-vault-sync
|
||||||
|
containers:
|
||||||
|
- name: sync
|
||||||
|
image: alpine:3.20
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- "sleep infinity"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 16Mi
|
||||||
|
limits:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 64Mi
|
||||||
|
volumeMounts:
|
||||||
|
- name: vault-secrets
|
||||||
|
mountPath: /vault/secrets
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: vault-secrets
|
||||||
|
csi:
|
||||||
|
driver: secrets-store.csi.k8s.io
|
||||||
|
readOnly: true
|
||||||
|
volumeAttributes:
|
||||||
|
secretProviderClass: veles-vault
|
||||||
Loading…
x
Reference in New Issue
Block a user