titan-iac/services/ai-llm/deployment.yaml

105 lines
3.0 KiB
YAML
Raw Normal View History

# services/ai-llm/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: ai
spec:
replicas: 1
revisionHistoryLimit: 2
strategy:
2026-01-01 14:48:54 -03:00
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
annotations:
ai.bstein.dev/model: qwen2.5:7b-instruct-q4_0
ai.bstein.dev/gpu: GPU pool (titan-22/24)
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- titan-22
- titan-24
runtimeClassName: nvidia
volumes:
- name: models
persistentVolumeClaim:
claimName: ollama-models
initContainers:
- name: warm-model
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
env:
- name: OLLAMA_HOST
value: 0.0.0.0
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
- name: OLLAMA_MODELS
value: /root/.ollama
- name: OLLAMA_MODEL
value: qwen2.5:7b-instruct-q4_0
command:
- /bin/sh
- -c
- |
set -e
ollama serve >/tmp/ollama.log 2>&1 &
sleep 6
ollama pull "${OLLAMA_MODEL}"
pkill ollama || true
volumeMounts:
- name: models
mountPath: /root/.ollama
resources:
requests:
cpu: 500m
memory: 2Gi
nvidia.com/gpu.shared: 1
limits:
nvidia.com/gpu.shared: 1
containers:
- name: ollama
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 11434
env:
- name: OLLAMA_HOST
value: 0.0.0.0
- name: OLLAMA_KEEP_ALIVE
value: 6h
- name: OLLAMA_MODELS
value: /root/.ollama
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
volumeMounts:
- name: models
mountPath: /root/.ollama
resources:
requests:
cpu: "4"
memory: 16Gi
nvidia.com/gpu.shared: 1
limits:
cpu: "8"
memory: 24Gi
nvidia.com/gpu.shared: 1