2025-12-20 14:10:34 -03:00
|
|
|
# services/ai-llm/deployment.yaml
|
|
|
|
|
apiVersion: apps/v1
|
|
|
|
|
kind: Deployment
|
|
|
|
|
metadata:
|
|
|
|
|
name: ollama
|
|
|
|
|
namespace: ai
|
|
|
|
|
spec:
|
|
|
|
|
replicas: 1
|
|
|
|
|
revisionHistoryLimit: 2
|
|
|
|
|
selector:
|
|
|
|
|
matchLabels:
|
|
|
|
|
app: ollama
|
|
|
|
|
template:
|
|
|
|
|
metadata:
|
|
|
|
|
labels:
|
|
|
|
|
app: ollama
|
|
|
|
|
spec:
|
|
|
|
|
nodeSelector:
|
|
|
|
|
kubernetes.io/hostname: titan-24
|
|
|
|
|
runtimeClassName: nvidia
|
|
|
|
|
volumes:
|
|
|
|
|
- name: models
|
|
|
|
|
persistentVolumeClaim:
|
|
|
|
|
claimName: ollama-models
|
|
|
|
|
initContainers:
|
|
|
|
|
- name: warm-model
|
|
|
|
|
image: ollama/ollama:latest
|
|
|
|
|
env:
|
|
|
|
|
- name: OLLAMA_HOST
|
|
|
|
|
value: 0.0.0.0
|
|
|
|
|
- name: NVIDIA_VISIBLE_DEVICES
|
|
|
|
|
value: all
|
|
|
|
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
|
|
|
value: compute,utility
|
|
|
|
|
- name: OLLAMA_MODELS
|
|
|
|
|
value: /root/.ollama
|
|
|
|
|
- name: OLLAMA_MODEL
|
2025-12-20 15:19:03 -03:00
|
|
|
value: qwen2.5-coder:7b-instruct-q4_0
|
2025-12-20 14:10:34 -03:00
|
|
|
command:
|
|
|
|
|
- /bin/sh
|
|
|
|
|
- -c
|
|
|
|
|
- |
|
|
|
|
|
set -e
|
|
|
|
|
ollama serve >/tmp/ollama.log 2>&1 &
|
|
|
|
|
sleep 6
|
|
|
|
|
ollama pull "${OLLAMA_MODEL}"
|
|
|
|
|
pkill ollama || true
|
|
|
|
|
volumeMounts:
|
|
|
|
|
- name: models
|
|
|
|
|
mountPath: /root/.ollama
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 250m
|
|
|
|
|
memory: 1Gi
|
2025-12-20 15:19:03 -03:00
|
|
|
nvidia.com/gpu: 1
|
|
|
|
|
limits:
|
|
|
|
|
nvidia.com/gpu: 1
|
2025-12-20 14:10:34 -03:00
|
|
|
containers:
|
|
|
|
|
- name: ollama
|
|
|
|
|
image: ollama/ollama:latest
|
|
|
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
|
ports:
|
|
|
|
|
- name: http
|
|
|
|
|
containerPort: 11434
|
|
|
|
|
env:
|
|
|
|
|
- name: OLLAMA_HOST
|
|
|
|
|
value: 0.0.0.0
|
|
|
|
|
- name: OLLAMA_KEEP_ALIVE
|
|
|
|
|
value: 6h
|
|
|
|
|
- name: OLLAMA_MODELS
|
|
|
|
|
value: /root/.ollama
|
|
|
|
|
- name: NVIDIA_VISIBLE_DEVICES
|
|
|
|
|
value: all
|
|
|
|
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
|
|
|
value: compute,utility
|
|
|
|
|
volumeMounts:
|
|
|
|
|
- name: models
|
|
|
|
|
mountPath: /root/.ollama
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: "2"
|
|
|
|
|
memory: 8Gi
|
2025-12-20 15:19:03 -03:00
|
|
|
nvidia.com/gpu: 1
|
2025-12-20 14:10:34 -03:00
|
|
|
limits:
|
|
|
|
|
cpu: "4"
|
|
|
|
|
memory: 12Gi
|
2025-12-20 15:19:03 -03:00
|
|
|
nvidia.com/gpu: 1
|