From 2c3ffdbf955fbdc5ef8da59cd28efab2324c5857 Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Mon, 26 Jan 2026 11:44:28 -0300
Subject: [PATCH] ai-llm: tighten gpu placement and resources

---
 services/ai-llm/deployment.yaml | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml
index dfa1bdd..4f34d86 100644
--- a/services/ai-llm/deployment.yaml
+++ b/services/ai-llm/deployment.yaml
@@ -21,8 +21,8 @@ spec:
         app: ollama
       annotations:
         ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
-        ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
-        ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z"
+        ai.bstein.dev/gpu: GPU pool (titan-22/24)
+        ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
     spec:
       affinity:
         nodeAffinity:
@@ -32,8 +32,6 @@ spec:
                   - key: kubernetes.io/hostname
                     operator: In
                     values:
-                      - titan-20
-                      - titan-21
                       - titan-22
                       - titan-24
       runtimeClassName: nvidia
@@ -69,8 +67,8 @@ spec:
               mountPath: /root/.ollama
           resources:
             requests:
-              cpu: 250m
-              memory: 1Gi
+              cpu: 500m
+              memory: 2Gi
               nvidia.com/gpu.shared: 1
             limits:
               nvidia.com/gpu.shared: 1
@@ -97,10 +95,10 @@ spec:
               mountPath: /root/.ollama
           resources:
             requests:
-              cpu: "2"
-              memory: 8Gi
+              cpu: "4"
+              memory: 16Gi
               nvidia.com/gpu.shared: 1
             limits:
-              cpu: "4"
-              memory: 12Gi
+              cpu: "8"
+              memory: 24Gi
               nvidia.com/gpu.shared: 1