From c8adca5a5b1790f608a6103311130840d621ebbc Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Sat, 20 Dec 2025 14:10:34 -0300
Subject: [PATCH] ai: add ollama service and wire chat backend

---
 .../applications/ai-llm/kustomization.yaml    | 23 +++++
 .../applications/kustomization.yaml           |  1 +
 services/ai-llm/deployment.yaml               | 84 +++++++++++++++++++
 services/ai-llm/kustomization.yaml            |  9 ++
 services/ai-llm/namespace.yaml                |  5 ++
 services/ai-llm/pvc.yaml                      | 13 +++
 services/ai-llm/service.yaml                  | 14 ++++
 .../bstein-dev-home/backend-deployment.yaml   |  7 ++
 8 files changed, 156 insertions(+)
 create mode 100644 clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
 create mode 100644 services/ai-llm/deployment.yaml
 create mode 100644 services/ai-llm/kustomization.yaml
 create mode 100644 services/ai-llm/namespace.yaml
 create mode 100644 services/ai-llm/pvc.yaml
 create mode 100644 services/ai-llm/service.yaml

diff --git a/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
new file mode 100644
index 0000000..3572a6c
--- /dev/null
+++ b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
@@ -0,0 +1,23 @@
+# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: ai-llm
+  namespace: flux-system
+spec:
+  interval: 10m
+  path: ./services/ai-llm
+  targetNamespace: ai
+  prune: true
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+    namespace: flux-system
+  wait: true
+  healthChecks:
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: ollama
+      namespace: ai
+  dependsOn:
+    - name: core
diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml
index 5825734..b5a5e62 100644
--- a/clusters/atlas/flux-system/applications/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/kustomization.yaml
@@ -22,3 +22,4 @@ resources:
   - jenkins/kustomization.yaml
   - ci-demo/kustomization.yaml
   - ci-demo/image-automation.yaml
+  - ai-llm/kustomization.yaml
diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml
new file mode 100644
index 0000000..f9098db
--- /dev/null
+++ b/services/ai-llm/deployment.yaml
@@ -0,0 +1,84 @@
+# services/ai-llm/deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ai
+spec:
+  replicas: 1
+  revisionHistoryLimit: 2
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      nodeSelector:
+        kubernetes.io/hostname: titan-24
+      runtimeClassName: nvidia
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: ollama-models
+      initContainers:
+        - name: warm-model
+          image: ollama/ollama:latest
+          env:
+            - name: OLLAMA_HOST
+              value: 0.0.0.0
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: all
+            - name: NVIDIA_DRIVER_CAPABILITIES
+              value: compute,utility
+            - name: OLLAMA_MODELS
+              value: /root/.ollama
+            - name: OLLAMA_MODEL
+              value: phi3:mini-4k-instruct-q4_0
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -e
+              ollama serve >/tmp/ollama.log 2>&1 &
+              sleep 6
+              ollama pull "${OLLAMA_MODEL}"
+              pkill ollama || true
+          volumeMounts:
+            - name: models
+              mountPath: /root/.ollama
+          resources:
+            requests:
+              cpu: 250m
+              memory: 1Gi
+      containers:
+        - name: ollama
+          image: ollama/ollama:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: 11434
+          env:
+            - name: OLLAMA_HOST
+              value: 0.0.0.0
+            - name: OLLAMA_KEEP_ALIVE
+              value: 6h
+            - name: OLLAMA_MODELS
+              value: /root/.ollama
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: all
+            - name: NVIDIA_DRIVER_CAPABILITIES
+              value: compute,utility
+          volumeMounts:
+            - name: models
+              mountPath: /root/.ollama
+          resources:
+            requests:
+              cpu: "2"
+              memory: 8Gi
+              nvidia.com/gpu: 1
+            limits:
+              cpu: "4"
+              memory: 12Gi
+              nvidia.com/gpu: 1
diff --git a/services/ai-llm/kustomization.yaml b/services/ai-llm/kustomization.yaml
new file mode 100644
index 0000000..46ea286
--- /dev/null
+++ b/services/ai-llm/kustomization.yaml
@@ -0,0 +1,9 @@
+# services/ai-llm/kustomization.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: ai
+resources:
+  - namespace.yaml
+  - pvc.yaml
+  - deployment.yaml
+  - service.yaml
diff --git a/services/ai-llm/namespace.yaml b/services/ai-llm/namespace.yaml
new file mode 100644
index 0000000..96f5a81
--- /dev/null
+++ b/services/ai-llm/namespace.yaml
@@ -0,0 +1,5 @@
+# services/ai-llm/namespace.yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ai
diff --git a/services/ai-llm/pvc.yaml b/services/ai-llm/pvc.yaml
new file mode 100644
index 0000000..51c0384
--- /dev/null
+++ b/services/ai-llm/pvc.yaml
@@ -0,0 +1,13 @@
+# services/ai-llm/pvc.yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models
+  namespace: ai
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 30Gi
+  storageClassName: astreae
diff --git a/services/ai-llm/service.yaml b/services/ai-llm/service.yaml
new file mode 100644
index 0000000..f086a90
--- /dev/null
+++ b/services/ai-llm/service.yaml
@@ -0,0 +1,14 @@
+# services/ai-llm/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ai
+spec:
+  type: ClusterIP
+  selector:
+    app: ollama
+  ports:
+    - name: http
+      port: 11434
+      targetPort: 11434
diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml
index 1159487..c80a9ac 100644
--- a/services/bstein-dev-home/backend-deployment.yaml
+++ b/services/bstein-dev-home/backend-deployment.yaml
@@ -24,6 +24,13 @@ spec:
         - name: backend
           image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest
           imagePullPolicy: Always
+          env:
+            - name: AI_CHAT_API
+              value: http://ollama.ai.svc.cluster.local:11434
+            - name: AI_CHAT_MODEL
+              value: phi3:mini-4k-instruct-q4_0
+            - name: AI_CHAT_TIMEOUT_SEC
+              value: "20"
           ports:
             - name: http
               containerPort: 8080