ai: add ollama service and wire chat backend
This commit is contained in:
parent
f68668f987
commit
c8adca5a5b
@ -0,0 +1,23 @@
|
||||
# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: ai-llm
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/ai-llm
|
||||
targetNamespace: ai
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
wait: true
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: ollama
|
||||
namespace: ai
|
||||
dependsOn:
|
||||
- name: core
|
||||
@ -22,3 +22,4 @@ resources:
|
||||
- jenkins/kustomization.yaml
|
||||
- ci-demo/kustomization.yaml
|
||||
- ci-demo/image-automation.yaml
|
||||
- ai-llm/kustomization.yaml
|
||||
|
||||
84
services/ai-llm/deployment.yaml
Normal file
84
services/ai-llm/deployment.yaml
Normal file
@ -0,0 +1,84 @@
|
||||
# services/ai-llm/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ai
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: titan-24
|
||||
runtimeClassName: nvidia
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: ollama-models
|
||||
initContainers:
|
||||
- name: warm-model
|
||||
image: ollama/ollama:latest
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: 0.0.0.0
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: compute,utility
|
||||
- name: OLLAMA_MODELS
|
||||
value: /root/.ollama
|
||||
- name: OLLAMA_MODEL
|
||||
value: phi3:mini-4k-instruct-q4_0
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
ollama serve >/tmp/ollama.log 2>&1 &
|
||||
sleep 6
|
||||
ollama pull "${OLLAMA_MODEL}"
|
||||
pkill ollama || true
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /root/.ollama
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 11434
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: 0.0.0.0
|
||||
- name: OLLAMA_KEEP_ALIVE
|
||||
value: 6h
|
||||
- name: OLLAMA_MODELS
|
||||
value: /root/.ollama
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: compute,utility
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /root/.ollama
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: 8Gi
|
||||
nvidia.com/gpu: 1
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 12Gi
|
||||
nvidia.com/gpu: 1
|
||||
9
services/ai-llm/kustomization.yaml
Normal file
9
services/ai-llm/kustomization.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# services/ai-llm/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: ai
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- pvc.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
5
services/ai-llm/namespace.yaml
Normal file
5
services/ai-llm/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# services/ai-llm/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: ai
|
||||
13
services/ai-llm/pvc.yaml
Normal file
13
services/ai-llm/pvc.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
# services/ai-llm/pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: ollama-models
|
||||
namespace: ai
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
storageClassName: astreae
|
||||
14
services/ai-llm/service.yaml
Normal file
14
services/ai-llm/service.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
# services/ai-llm/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ai
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: ollama
|
||||
ports:
|
||||
- name: http
|
||||
port: 11434
|
||||
targetPort: 11434
|
||||
@ -24,6 +24,13 @@ spec:
|
||||
- name: backend
|
||||
image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest
|
||||
imagePullPolicy: Always
|
||||
env:
|
||||
- name: AI_CHAT_API
|
||||
value: http://ollama.ai.svc.cluster.local:11434
|
||||
- name: AI_CHAT_MODEL
|
||||
value: phi3:mini-4k-instruct-q4_0
|
||||
- name: AI_CHAT_TIMEOUT_SEC
|
||||
value: "20"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user