ai: add ollama service and wire chat backend

This commit is contained in:
Brad Stein 2025-12-20 14:10:34 -03:00
parent f68668f987
commit c8adca5a5b
8 changed files with 156 additions and 0 deletions

View File

@ -0,0 +1,23 @@
# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: ai-llm
namespace: flux-system
spec:
interval: 10m
path: ./services/ai-llm
targetNamespace: ai
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: ollama
namespace: ai
dependsOn:
- name: core

View File

@ -22,3 +22,4 @@ resources:
- jenkins/kustomization.yaml
- ci-demo/kustomization.yaml
- ci-demo/image-automation.yaml
- ai-llm/kustomization.yaml

View File

@ -0,0 +1,84 @@
# services/ai-llm/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: ai
spec:
replicas: 1
revisionHistoryLimit: 2
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
spec:
nodeSelector:
kubernetes.io/hostname: titan-24
runtimeClassName: nvidia
volumes:
- name: models
persistentVolumeClaim:
claimName: ollama-models
initContainers:
- name: warm-model
image: ollama/ollama:latest
env:
- name: OLLAMA_HOST
value: 0.0.0.0
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
- name: OLLAMA_MODELS
value: /root/.ollama
- name: OLLAMA_MODEL
value: phi3:mini-4k-instruct-q4_0
command:
- /bin/sh
- -c
- |
set -e
ollama serve >/tmp/ollama.log 2>&1 &
sleep 6
ollama pull "${OLLAMA_MODEL}"
pkill ollama || true
volumeMounts:
- name: models
mountPath: /root/.ollama
resources:
requests:
cpu: 250m
memory: 1Gi
containers:
- name: ollama
image: ollama/ollama:latest
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 11434
env:
- name: OLLAMA_HOST
value: 0.0.0.0
- name: OLLAMA_KEEP_ALIVE
value: 6h
- name: OLLAMA_MODELS
value: /root/.ollama
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
volumeMounts:
- name: models
mountPath: /root/.ollama
resources:
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: 1
limits:
cpu: "4"
memory: 12Gi
nvidia.com/gpu: 1

View File

@ -0,0 +1,9 @@
# services/ai-llm/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: ai
resources:
- namespace.yaml
- pvc.yaml
- deployment.yaml
- service.yaml

View File

@ -0,0 +1,5 @@
# services/ai-llm/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: ai

13
services/ai-llm/pvc.yaml Normal file
View File

@ -0,0 +1,13 @@
# services/ai-llm/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-models
namespace: ai
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: astreae

View File

@ -0,0 +1,14 @@
# services/ai-llm/service.yaml
apiVersion: v1
kind: Service
metadata:
name: ollama
namespace: ai
spec:
type: ClusterIP
selector:
app: ollama
ports:
- name: http
port: 11434
targetPort: 11434

View File

@ -24,6 +24,13 @@ spec:
- name: backend
image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest
imagePullPolicy: Always
env:
- name: AI_CHAT_API
value: http://ollama.ai.svc.cluster.local:11434
- name: AI_CHAT_MODEL
value: phi3:mini-4k-instruct-q4_0
- name: AI_CHAT_TIMEOUT_SEC
value: "20"
ports:
- name: http
containerPort: 8080