From c8adca5a5b1790f608a6103311130840d621ebbc Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 14:10:34 -0300 Subject: [PATCH] ai: add ollama service and wire chat backend --- .../applications/ai-llm/kustomization.yaml | 23 +++++ .../applications/kustomization.yaml | 1 + services/ai-llm/deployment.yaml | 84 +++++++++++++++++++ services/ai-llm/kustomization.yaml | 9 ++ services/ai-llm/namespace.yaml | 5 ++ services/ai-llm/pvc.yaml | 13 +++ services/ai-llm/service.yaml | 14 ++++ .../bstein-dev-home/backend-deployment.yaml | 7 ++ 8 files changed, 156 insertions(+) create mode 100644 clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml create mode 100644 services/ai-llm/deployment.yaml create mode 100644 services/ai-llm/kustomization.yaml create mode 100644 services/ai-llm/namespace.yaml create mode 100644 services/ai-llm/pvc.yaml create mode 100644 services/ai-llm/service.yaml diff --git a/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml new file mode 100644 index 0000000..3572a6c --- /dev/null +++ b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml @@ -0,0 +1,23 @@ +# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: ai-llm + namespace: flux-system +spec: + interval: 10m + path: ./services/ai-llm + targetNamespace: ai + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + wait: true + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: ollama + namespace: ai + dependsOn: + - name: core diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index 5825734..b5a5e62 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -22,3 +22,4 @@ resources: - jenkins/kustomization.yaml - ci-demo/kustomization.yaml - ci-demo/image-automation.yaml + - ai-llm/kustomization.yaml diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml new file mode 100644 index 0000000..f9098db --- /dev/null +++ b/services/ai-llm/deployment.yaml @@ -0,0 +1,84 @@ +# services/ai-llm/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ai +spec: + replicas: 1 + revisionHistoryLimit: 2 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + nodeSelector: + kubernetes.io/hostname: titan-24 + runtimeClassName: nvidia + volumes: + - name: models + persistentVolumeClaim: + claimName: ollama-models + initContainers: + - name: warm-model + image: ollama/ollama:latest + env: + - name: OLLAMA_HOST + value: 0.0.0.0 + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + - name: OLLAMA_MODELS + value: /root/.ollama + - name: OLLAMA_MODEL + value: phi3:mini-4k-instruct-q4_0 + command: + - /bin/sh + - -c + - | + set -e + ollama serve >/tmp/ollama.log 2>&1 & + sleep 6 + ollama pull "${OLLAMA_MODEL}" + pkill ollama || true + volumeMounts: + - name: models + mountPath: /root/.ollama + resources: + requests: + cpu: 250m + memory: 1Gi + containers: + - name: ollama + image: ollama/ollama:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 11434 + env: + - name: OLLAMA_HOST + value: 0.0.0.0 + - name: OLLAMA_KEEP_ALIVE + value: 6h + - name: OLLAMA_MODELS + value: /root/.ollama + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + volumeMounts: + - name: models + mountPath: /root/.ollama + resources: + requests: + cpu: "2" + memory: 8Gi + nvidia.com/gpu: 1 + limits: + cpu: "4" + memory: 12Gi + nvidia.com/gpu: 1 diff --git a/services/ai-llm/kustomization.yaml b/services/ai-llm/kustomization.yaml new file mode 100644 index 0000000..46ea286 --- /dev/null +++ b/services/ai-llm/kustomization.yaml @@ -0,0 +1,9 @@ +# services/ai-llm/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: ai +resources: + - namespace.yaml + - pvc.yaml + - deployment.yaml + - service.yaml diff --git a/services/ai-llm/namespace.yaml b/services/ai-llm/namespace.yaml new file mode 100644 index 0000000..96f5a81 --- /dev/null +++ b/services/ai-llm/namespace.yaml @@ -0,0 +1,5 @@ +# services/ai-llm/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: ai diff --git a/services/ai-llm/pvc.yaml b/services/ai-llm/pvc.yaml new file mode 100644 index 0000000..51c0384 --- /dev/null +++ b/services/ai-llm/pvc.yaml @@ -0,0 +1,13 @@ +# services/ai-llm/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-models + namespace: ai +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 30Gi + storageClassName: astreae diff --git a/services/ai-llm/service.yaml b/services/ai-llm/service.yaml new file mode 100644 index 0000000..f086a90 --- /dev/null +++ b/services/ai-llm/service.yaml @@ -0,0 +1,14 @@ +# services/ai-llm/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ai +spec: + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: 11434 diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 1159487..c80a9ac 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -24,6 +24,13 @@ spec: - name: backend image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest imagePullPolicy: Always + env: + - name: AI_CHAT_API + value: http://ollama.ai.svc.cluster.local:11434 + - name: AI_CHAT_MODEL + value: phi3:mini-4k-instruct-q4_0 + - name: AI_CHAT_TIMEOUT_SEC + value: "20" ports: - name: http containerPort: 8080