titan-iac/services/bstein-dev-home/backend-deployment.yaml

65 lines
2.0 KiB
YAML

# services/bstein-dev-home/backend-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: bstein-dev-home-backend
namespace: bstein-dev-home
spec:
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app: bstein-dev-home-backend
template:
metadata:
labels:
app: bstein-dev-home-backend
spec:
automountServiceAccountToken: true
serviceAccountName: bstein-dev-home
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
imagePullSecrets:
- name: harbor-bstein-robot
containers:
- name: backend
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-30 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
imagePullPolicy: Always
env:
- name: AI_CHAT_API
value: http://ollama.ai.svc.cluster.local:11434
- name: AI_CHAT_MODEL
value: qwen2.5-coder:7b-instruct-q4_0
- name: AI_CHAT_TIMEOUT_SEC
value: "60"
- name: AI_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: AI_NODE_GPU_MAP
value: |
{"titan-20": "Jetson Xavier (edge GPU)", "titan-21": "Jetson Xavier (edge GPU)", "titan-22": "RTX 3050 8GB (local GPU)", "titan-24": "RTX 3080 8GB (local GPU)"}
ports:
- name: http
containerPort: 8080
readinessProbe:
httpGet:
path: /api/healthz
port: http
initialDelaySeconds: 2
periodSeconds: 5
livenessProbe:
httpGet:
path: /api/healthz
port: http
initialDelaySeconds: 10
periodSeconds: 10
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 300m
memory: 256Mi