ai-llm: tighten gpu placement and resources

2026-01-26 11:44:28 -03:00 · 2026-01-26 11:44:28 -03:00 · 2c3ffdbf95
commit 2c3ffdbf95
parent fec7713049
1 changed files with 8 additions and 10 deletions
--- a/services/ai-llm/deployment.yaml
+++ b/services/ai-llm/deployment.yaml
@ -21,8 +21,8 @@ spec:
        app: ollama
      annotations:
        ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
-        ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
+        ai.bstein.dev/gpu: GPU pool (titan-22/24)
-        ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z"
+        ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
    spec:
      affinity:
        nodeAffinity:
@ -32,8 +32,6 @@ spec:
                  - key: kubernetes.io/hostname
                    operator: In
                    values:
                      - titan-20
                      - titan-21
                      - titan-22
                      - titan-24
      runtimeClassName: nvidia
@ -69,8 +67,8 @@ spec:
              mountPath: /root/.ollama
          resources:
            requests:
-              cpu: 250m
+              cpu: 500m
-              memory: 1Gi
+              memory: 2Gi
              nvidia.com/gpu.shared: 1
            limits:
              nvidia.com/gpu.shared: 1
@ -97,10 +95,10 @@ spec:
              mountPath: /root/.ollama
          resources:
            requests:
-              cpu: "2"
+              cpu: "4"
-              memory: 8Gi
+              memory: 16Gi
              nvidia.com/gpu.shared: 1
            limits:
-              cpu: "4"
+              cpu: "8"
-              memory: 12Gi
+              memory: 24Gi
              nvidia.com/gpu.shared: 1