ai: add ollama service and wire chat backend

2025-12-20 14:10:34 -03:00 · 2025-12-20 14:10:34 -03:00 · c8adca5a5b
commit c8adca5a5b
parent f68668f987
8 changed files with 156 additions and 0 deletions
--- a/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
@ -0,0 +1,23 @@
+# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: ai-llm
+  namespace: flux-system
+spec:
+  interval: 10m
+  path: ./services/ai-llm
+  targetNamespace: ai
+  prune: true
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+    namespace: flux-system
+  wait: true
+  healthChecks:
+    - apiVersion: apps/v1
+      kind: Deployment
+      name: ollama
+      namespace: ai
+  dependsOn:
+    - name: core
--- a/clusters/atlas/flux-system/applications/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/kustomization.yaml
@ -22,3 +22,4 @@ resources:
  - jenkins/kustomization.yaml
  - ci-demo/kustomization.yaml
  - ci-demo/image-automation.yaml
+  - ai-llm/kustomization.yaml
--- a/services/ai-llm/deployment.yaml
+++ b/services/ai-llm/deployment.yaml
@ -0,0 +1,84 @@
+# services/ai-llm/deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ai
+spec:
+  replicas: 1
+  revisionHistoryLimit: 2
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      nodeSelector:
+        kubernetes.io/hostname: titan-24
+      runtimeClassName: nvidia
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: ollama-models
+      initContainers:
+        - name: warm-model
+          image: ollama/ollama:latest
+          env:
+            - name: OLLAMA_HOST
+              value: 0.0.0.0
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: all
+            - name: NVIDIA_DRIVER_CAPABILITIES
+              value: compute,utility
+            - name: OLLAMA_MODELS
+              value: /root/.ollama
+            - name: OLLAMA_MODEL
+              value: phi3:mini-4k-instruct-q4_0
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -e
+              ollama serve >/tmp/ollama.log 2>&1 &
+              sleep 6
+              ollama pull "${OLLAMA_MODEL}"
+              pkill ollama || true
+          volumeMounts:
+            - name: models
+              mountPath: /root/.ollama
+          resources:
+            requests:
+              cpu: 250m
+              memory: 1Gi
+      containers:
+        - name: ollama
+          image: ollama/ollama:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: 11434
+          env:
+            - name: OLLAMA_HOST
+              value: 0.0.0.0
+            - name: OLLAMA_KEEP_ALIVE
+              value: 6h
+            - name: OLLAMA_MODELS
+              value: /root/.ollama
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: all
+            - name: NVIDIA_DRIVER_CAPABILITIES
+              value: compute,utility
+          volumeMounts:
+            - name: models
+              mountPath: /root/.ollama
+          resources:
+            requests:
+              cpu: "2"
+              memory: 8Gi
+              nvidia.com/gpu: 1
+            limits:
+              cpu: "4"
+              memory: 12Gi
+              nvidia.com/gpu: 1
--- a/services/ai-llm/kustomization.yaml
+++ b/services/ai-llm/kustomization.yaml
@ -0,0 +1,9 @@
+# services/ai-llm/kustomization.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: ai
+resources:
+  - namespace.yaml
+  - pvc.yaml
+  - deployment.yaml
+  - service.yaml
--- a/services/ai-llm/namespace.yaml
+++ b/services/ai-llm/namespace.yaml
@ -0,0 +1,5 @@
+# services/ai-llm/namespace.yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ai
--- a/services/ai-llm/pvc.yaml
+++ b/services/ai-llm/pvc.yaml
@ -0,0 +1,13 @@
+# services/ai-llm/pvc.yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models
+  namespace: ai
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 30Gi
+  storageClassName: astreae
--- a/services/ai-llm/service.yaml
+++ b/services/ai-llm/service.yaml
@ -0,0 +1,14 @@
+# services/ai-llm/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ai
+spec:
+  type: ClusterIP
+  selector:
+    app: ollama
+  ports:
+    - name: http
+      port: 11434
+      targetPort: 11434
--- a/services/bstein-dev-home/backend-deployment.yaml
+++ b/services/bstein-dev-home/backend-deployment.yaml
@ -24,6 +24,13 @@ spec:
        - name: backend
          image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest
          imagePullPolicy: Always
+          env:
+            - name: AI_CHAT_API
+              value: http://ollama.ai.svc.cluster.local:11434
+            - name: AI_CHAT_MODEL
+              value: phi3:mini-4k-instruct-q4_0
+            - name: AI_CHAT_TIMEOUT_SEC
+              value: "20"
          ports:
            - name: http
              containerPort: 8080