titan-iac/services/openclaw/ollama-deployment.yaml
2026-05-19 19:45:10 -03:00

112 lines
3.2 KiB
YAML

# services/openclaw/ollama-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: openclaw-ollama
namespace: openclaw
labels:
app: openclaw-ollama
spec:
replicas: 1
revisionHistoryLimit: 2
strategy:
type: Recreate
selector:
matchLabels:
app: openclaw-ollama
template:
metadata:
labels:
app: openclaw-ollama
annotations:
ai.bstein.dev/model: qwen2.5:1.5b-instruct-q4_0
ai.bstein.dev/gpu: Jetson pool (titan-20/21)
spec:
runtimeClassName: nvidia
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- titan-20
- titan-21
volumes:
- name: models
emptyDir: {}
initContainers:
- name: warm-model
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
imagePullPolicy: IfNotPresent
env:
- name: OLLAMA_HOST
value: 0.0.0.0
- name: OLLAMA_MODELS
value: /root/.ollama
- name: OLLAMA_MODEL
value: qwen2.5:1.5b-instruct-q4_0
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
command:
- /bin/sh
- -c
- |
set -e
ollama serve >/tmp/ollama.log 2>&1 &
sleep 6
ollama pull "${OLLAMA_MODEL}"
pkill ollama || true
volumeMounts:
- name: models
mountPath: /root/.ollama
resources:
requests:
cpu: "1"
memory: 4Gi
nvidia.com/gpu.shared: 1
limits:
cpu: "4"
memory: 10Gi
nvidia.com/gpu.shared: 1
containers:
- name: ollama
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 11434
env:
- name: OLLAMA_HOST
value: 0.0.0.0
- name: OLLAMA_KEEP_ALIVE
value: 6h
- name: OLLAMA_MODELS
value: /root/.ollama
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
volumeMounts:
- name: models
mountPath: /root/.ollama
readinessProbe:
httpGet:
path: /api/tags
port: 11434
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 5
resources:
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu.shared: 1
limits:
cpu: "6"
memory: 12Gi
nvidia.com/gpu.shared: 1