From 2c3ffdbf955fbdc5ef8da59cd28efab2324c5857 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 26 Jan 2026 11:44:28 -0300 Subject: [PATCH] ai-llm: tighten gpu placement and resources --- services/ai-llm/deployment.yaml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index dfa1bdd..4f34d86 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -21,8 +21,8 @@ spec: app: ollama annotations: ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0 - ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24) - ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z" + ai.bstein.dev/gpu: GPU pool (titan-22/24) + ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z" spec: affinity: nodeAffinity: @@ -32,8 +32,6 @@ spec: - key: kubernetes.io/hostname operator: In values: - - titan-20 - - titan-21 - titan-22 - titan-24 runtimeClassName: nvidia @@ -69,8 +67,8 @@ spec: mountPath: /root/.ollama resources: requests: - cpu: 250m - memory: 1Gi + cpu: 500m + memory: 2Gi nvidia.com/gpu.shared: 1 limits: nvidia.com/gpu.shared: 1 @@ -97,10 +95,10 @@ spec: mountPath: /root/.ollama resources: requests: - cpu: "2" - memory: 8Gi + cpu: "4" + memory: 16Gi nvidia.com/gpu.shared: 1 limits: - cpu: "4" - memory: 12Gi + cpu: "8" + memory: 24Gi nvidia.com/gpu.shared: 1